mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-30 14:04:27 +00:00
This commit transitions the Actor from using the full Docling CLI package to the more lightweight docling-serve API. Key changes include: - Redesign Dockerfile to use docling-serve as base image - Update actor.sh to communicate with API instead of running CLI commands - Improve content type handling for various output formats - Update input schema to align with API parameters - Reduce Docker image size from ~6GB to ~600MB - Update documentation and changelog to reflect architectural changes The image size reduction will make the Actor more cost-effective for users while maintaining all existing functionality including OCR capabilities. Issue: No official docling-serve Docker image is currently available, which will be addressed in a future commit. Signed-off-by: Václav Vančura <commit@vancura.dev>
37 lines
1.2 KiB
Docker
37 lines
1.2 KiB
Docker
FROM ds4sd/docling-serve:latest
|
|
|
|
LABEL maintainer="Vaclav Vancura <@vancura>"
|
|
LABEL description="Apify Actor for document processing using Docling"
|
|
LABEL version="1.1.0"
|
|
|
|
# Install necessary dependencies for the Apify Actor
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
curl \
|
|
gpg \
|
|
jq \
|
|
xz-utils \
|
|
jo \
|
|
procps \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# Install Node.js for Apify CLI
|
|
RUN mkdir -p /etc/apt/keyrings && \
|
|
curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \
|
|
echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list && \
|
|
apt-get update && apt-get install -y nodejs && \
|
|
rm -rf /var/lib/apt/lists/* && \
|
|
npm install -g npm@latest && \
|
|
npm install -g apify-cli && \
|
|
npm cache clean --force
|
|
|
|
# Create directories and set permissions
|
|
WORKDIR /app
|
|
|
|
# Copy actor files
|
|
COPY --chown=1000:1000 .actor/ .actor/
|
|
|
|
# Security best practice: run as non-root user (docling-serve already uses a non-root user)
|
|
USER 1000
|
|
|
|
ENTRYPOINT [".actor/actor.sh"]
|