mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-30 22:14:37 +00:00
Actor: Enhance Dockerfile with additional utilities and env vars
- Add installation of `time` and `procps` packages for better resource monitoring. - Set environment variables `PYTHONUNBUFFERED`, `MALLOC_ARENA_MAX`, and `EASYOCR_DOWNLOAD_CACHE` for improved performance. - Create a cache directory for EasyOCR to optimize storage usage. Signed-off-by: Václav Vančura <commit@vancura.dev>
This commit is contained in:
parent
1b6d4b5c50
commit
b745459a34
@ -26,7 +26,13 @@ RUN groupadd -r appuser && useradd -r -g appuser -s /sbin/nologin appuser && \
|
||||
npm cache clean --force && \
|
||||
\
|
||||
mkdir -p /home/appuser && \
|
||||
chown -R appuser:appuser /home/appuser
|
||||
chown -R appuser:appuser /home/appuser && \
|
||||
\
|
||||
apt-get update && apt-get install -y --no-install-recommends \
|
||||
time \
|
||||
procps \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
@ -39,4 +45,13 @@ COPY --chown=appuser:appuser . .
|
||||
|
||||
USER appuser
|
||||
|
||||
# Set environment variables for better resource management
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV MALLOC_ARENA_MAX=2
|
||||
ENV EASYOCR_DOWNLOAD_CACHE="/tmp/easyocr"
|
||||
|
||||
# Create cache directory for EasyOCR
|
||||
RUN mkdir -p /tmp/easyocr && \
|
||||
chown -R appuser:appuser /tmp/easyocr
|
||||
|
||||
ENTRYPOINT [".actor/actor.sh"]
|
||||
|
@ -67,6 +67,10 @@ if [ "$(echo "${INPUT}" | jq -r '.ocr')" = "true" ]; then
|
||||
DOC_CONVERT_CMD="${DOC_CONVERT_CMD} --ocr"
|
||||
fi
|
||||
|
||||
# Print the exact command that will be executed
|
||||
echo "Debug: Command string: $DOC_CONVERT_CMD"
|
||||
echo "Debug: Full command: /usr/bin/time -v bash -c \"$DOC_CONVERT_CMD\""
|
||||
|
||||
# --- Process document with Docling ---
|
||||
|
||||
echo "Processing document with Docling CLI..."
|
||||
@ -79,15 +83,17 @@ touch "$TIMESTAMP_FILE" || {
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Execute the command
|
||||
set +e # Temporarily disable exit on error to handle the error ourselves
|
||||
eval "$DOC_CONVERT_CMD"
|
||||
DOCLING_EXIT_CODE=$?
|
||||
set -e # Re-enable exit on error
|
||||
# Execute the command with timeout and memory monitoring
|
||||
echo "Starting document processing with memory monitoring..."
|
||||
/usr/bin/time -v bash -c "$DOC_CONVERT_CMD" 2>&1 | tee -a "$LOG_FILE"
|
||||
DOCLING_EXIT_CODE=${PIPESTATUS[0]}
|
||||
|
||||
# Check if the command failed and handle the error
|
||||
if [ $DOCLING_EXIT_CODE -ne 0 ]; then
|
||||
echo "Error: Docling command failed with exit code $DOCLING_EXIT_CODE"
|
||||
echo "Memory usage information:"
|
||||
free -h
|
||||
df -h
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user