mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-01 23:12:20 +00:00
Actor: Enhance Dockerfile with additional utilities and env vars
- Add installation of `time` and `procps` packages for better resource monitoring. - Set environment variables `PYTHONUNBUFFERED`, `MALLOC_ARENA_MAX`, and `EASYOCR_DOWNLOAD_CACHE` for improved performance. - Create a cache directory for EasyOCR to optimize storage usage. Signed-off-by: Václav Vančura <commit@vancura.dev>
This commit is contained in:
parent
1b6d4b5c50
commit
b745459a34
@ -26,7 +26,13 @@ RUN groupadd -r appuser && useradd -r -g appuser -s /sbin/nologin appuser && \
|
|||||||
npm cache clean --force && \
|
npm cache clean --force && \
|
||||||
\
|
\
|
||||||
mkdir -p /home/appuser && \
|
mkdir -p /home/appuser && \
|
||||||
chown -R appuser:appuser /home/appuser
|
chown -R appuser:appuser /home/appuser && \
|
||||||
|
\
|
||||||
|
apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
time \
|
||||||
|
procps \
|
||||||
|
&& apt-get clean \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
@ -39,4 +45,13 @@ COPY --chown=appuser:appuser . .
|
|||||||
|
|
||||||
USER appuser
|
USER appuser
|
||||||
|
|
||||||
|
# Set environment variables for better resource management
|
||||||
|
ENV PYTHONUNBUFFERED=1
|
||||||
|
ENV MALLOC_ARENA_MAX=2
|
||||||
|
ENV EASYOCR_DOWNLOAD_CACHE="/tmp/easyocr"
|
||||||
|
|
||||||
|
# Create cache directory for EasyOCR
|
||||||
|
RUN mkdir -p /tmp/easyocr && \
|
||||||
|
chown -R appuser:appuser /tmp/easyocr
|
||||||
|
|
||||||
ENTRYPOINT [".actor/actor.sh"]
|
ENTRYPOINT [".actor/actor.sh"]
|
||||||
|
@ -67,6 +67,10 @@ if [ "$(echo "${INPUT}" | jq -r '.ocr')" = "true" ]; then
|
|||||||
DOC_CONVERT_CMD="${DOC_CONVERT_CMD} --ocr"
|
DOC_CONVERT_CMD="${DOC_CONVERT_CMD} --ocr"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Print the exact command that will be executed
|
||||||
|
echo "Debug: Command string: $DOC_CONVERT_CMD"
|
||||||
|
echo "Debug: Full command: /usr/bin/time -v bash -c \"$DOC_CONVERT_CMD\""
|
||||||
|
|
||||||
# --- Process document with Docling ---
|
# --- Process document with Docling ---
|
||||||
|
|
||||||
echo "Processing document with Docling CLI..."
|
echo "Processing document with Docling CLI..."
|
||||||
@ -79,15 +83,17 @@ touch "$TIMESTAMP_FILE" || {
|
|||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
# Execute the command
|
# Execute the command with timeout and memory monitoring
|
||||||
set +e # Temporarily disable exit on error to handle the error ourselves
|
echo "Starting document processing with memory monitoring..."
|
||||||
eval "$DOC_CONVERT_CMD"
|
/usr/bin/time -v bash -c "$DOC_CONVERT_CMD" 2>&1 | tee -a "$LOG_FILE"
|
||||||
DOCLING_EXIT_CODE=$?
|
DOCLING_EXIT_CODE=${PIPESTATUS[0]}
|
||||||
set -e # Re-enable exit on error
|
|
||||||
|
|
||||||
# Check if the command failed and handle the error
|
# Check if the command failed and handle the error
|
||||||
if [ $DOCLING_EXIT_CODE -ne 0 ]; then
|
if [ $DOCLING_EXIT_CODE -ne 0 ]; then
|
||||||
echo "Error: Docling command failed with exit code $DOCLING_EXIT_CODE"
|
echo "Error: Docling command failed with exit code $DOCLING_EXIT_CODE"
|
||||||
|
echo "Memory usage information:"
|
||||||
|
free -h
|
||||||
|
df -h
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user