Actor: Enhance Dockerfile with additional utilities and env vars

- Add installation of `time` and `procps` packages for better resource monitoring.
- Set environment variables `PYTHONUNBUFFERED`, `MALLOC_ARENA_MAX`, and `EASYOCR_DOWNLOAD_CACHE` for improved performance.
- Create a cache directory for EasyOCR to optimize storage usage.

Signed-off-by: Václav Vančura <commit@vancura.dev>
This commit is contained in:
Václav Vančura 2025-02-03 16:18:25 +01:00 committed by Adam Kliment
parent 1b6d4b5c50
commit b745459a34
2 changed files with 27 additions and 6 deletions

View File

@ -26,7 +26,13 @@ RUN groupadd -r appuser && useradd -r -g appuser -s /sbin/nologin appuser && \
npm cache clean --force && \
\
mkdir -p /home/appuser && \
chown -R appuser:appuser /home/appuser
chown -R appuser:appuser /home/appuser && \
\
apt-get update && apt-get install -y --no-install-recommends \
time \
procps \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
@ -39,4 +45,13 @@ COPY --chown=appuser:appuser . .
USER appuser
# Set environment variables for better resource management
ENV PYTHONUNBUFFERED=1
ENV MALLOC_ARENA_MAX=2
ENV EASYOCR_DOWNLOAD_CACHE="/tmp/easyocr"
# Create cache directory for EasyOCR
RUN mkdir -p /tmp/easyocr && \
chown -R appuser:appuser /tmp/easyocr
ENTRYPOINT [".actor/actor.sh"]

View File

@ -67,6 +67,10 @@ if [ "$(echo "${INPUT}" | jq -r '.ocr')" = "true" ]; then
DOC_CONVERT_CMD="${DOC_CONVERT_CMD} --ocr"
fi
# Print the exact command that will be executed
echo "Debug: Command string: $DOC_CONVERT_CMD"
echo "Debug: Full command: /usr/bin/time -v bash -c \"$DOC_CONVERT_CMD\""
# --- Process document with Docling ---
echo "Processing document with Docling CLI..."
@ -79,15 +83,17 @@ touch "$TIMESTAMP_FILE" || {
exit 1
}
# Execute the command
set +e # Temporarily disable exit on error to handle the error ourselves
eval "$DOC_CONVERT_CMD"
DOCLING_EXIT_CODE=$?
set -e # Re-enable exit on error
# Execute the command with timeout and memory monitoring
echo "Starting document processing with memory monitoring..."
/usr/bin/time -v bash -c "$DOC_CONVERT_CMD" 2>&1 | tee -a "$LOG_FILE"
DOCLING_EXIT_CODE=${PIPESTATUS[0]}
# Check if the command failed and handle the error
if [ $DOCLING_EXIT_CODE -ne 0 ]; then
echo "Error: Docling command failed with exit code $DOCLING_EXIT_CODE"
echo "Memory usage information:"
free -h
df -h
exit 1
fi