From d21a870a735a45e5f27899a487147ad07551f5e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=A1clav=20Van=C4=8Dura?= Date: Sun, 18 May 2025 14:29:05 +0200 Subject: [PATCH 1/5] fix(actor): remove references to missing docling_processor.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Václav Vančura --- .actor/Dockerfile | 1 - .actor/actor.sh | 11 ----------- 2 files changed, 12 deletions(-) diff --git a/.actor/Dockerfile b/.actor/Dockerfile index 9c7270df..60f77e72 100644 --- a/.actor/Dockerfile +++ b/.actor/Dockerfile @@ -64,7 +64,6 @@ ENV EASYOCR_MODULE_PATH=/tmp/easyocr-models COPY --chown=1000:1000 .actor/actor.sh .actor/actor.sh COPY --chown=1000:1000 .actor/actor.json .actor/actor.json COPY --chown=1000:1000 .actor/input_schema.json .actor/input_schema.json -COPY --chown=1000:1000 .actor/docling_processor.py .actor/docling_processor.py RUN chmod +x .actor/actor.sh # Copy the build files from builder diff --git a/.actor/actor.sh b/.actor/actor.sh index cbbcf2b8..1498bb9c 100755 --- a/.actor/actor.sh +++ b/.actor/actor.sh @@ -154,17 +154,6 @@ else echo "Warning: No build files directory found. Some tools may be unavailable." fi -# Copy Python processor script to tools directory -PYTHON_SCRIPT_PATH="$(dirname "$0")/docling_processor.py" -if [ -f "$PYTHON_SCRIPT_PATH" ]; then - echo "Copying Python processor script to tools directory..." - cp "$PYTHON_SCRIPT_PATH" "$TOOLS_DIR/" - chmod +x "$TOOLS_DIR/docling_processor.py" -else - echo "ERROR: Python processor script not found at $PYTHON_SCRIPT_PATH" - exit 1 -fi - # Check OCR directories and ensure they're writable echo "Checking OCR directory permissions..." OCR_DIR="/opt/app-root/src/.EasyOCR" From 5006acc01efeca400208dd8f6bd38a8fc25cf1bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=A1clav=20Van=C4=8Dura?= Date: Sun, 18 May 2025 14:29:58 +0200 Subject: [PATCH 2/5] chore(actor): update Actor README.md with recent repo URL changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Václav Vančura --- .actor/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.actor/README.md b/.actor/README.md index a43181f3..ba461334 100644 --- a/.actor/README.md +++ b/.actor/README.md @@ -2,7 +2,7 @@ [![Docling Actor](https://apify.com/actor-badge?actor=vancura/docling?fpr=docling)](https://apify.com/vancura/docling) -This Actor (specification v1) wraps the [Docling project](https://ds4sd.github.io/docling/) to provide serverless document processing in the cloud. It can process complex documents (PDF, DOCX, images) and convert them into structured formats (Markdown, JSON, HTML, Text, or DocTags) with optional OCR support. +This Actor (specification v1) wraps the [Docling project](https://github.com/docling-project/docling) to provide serverless document processing in the cloud. It can process complex documents (PDF, DOCX, images) and convert them into structured formats (Markdown, JSON, HTML, Text, or DocTags) with optional OCR support. ## What are Actors? From 8a2550f3906fbb0d3775e6bcc21d790f50c9286c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=A1clav=20Van=C4=8Dura?= Date: Sun, 18 May 2025 14:30:31 +0200 Subject: [PATCH 3/5] chore(actor): improve the Actor README.md local header link MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Václav Vančura --- .actor/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.actor/README.md b/.actor/README.md index ba461334..1b3d4666 100644 --- a/.actor/README.md +++ b/.actor/README.md @@ -14,7 +14,7 @@ This Actor (specification v1) wraps the [Docling project](https://github.com/doc 2. [Usage](#usage) 3. [Input Parameters](#input-parameters) 4. [Output](#output) -5. [Performance & Resources](#performance--resources) +5. [Performance and Resources](#performance-and-resources) 6. [Troubleshooting](#troubleshooting) 7. [Local Development](#local-development) 8. [Architecture](#architecture) @@ -190,7 +190,7 @@ Access logs via: apify key-value-stores get-record DOCLING_LOG ``` -## Performance & Resources +## Performance and Resources - **Docker Image Size**: ~4GB - **Memory Requirements**: From 286aac38c16f470015d8ceb6b48ffd331f546e76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=A1clav=20Van=C4=8Dura?= Date: Sun, 18 May 2025 14:30:51 +0200 Subject: [PATCH 4/5] chore(actor): bump the Actor version number MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Václav Vančura --- .actor/actor.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.actor/actor.json b/.actor/actor.json index e9bf59be..2a96506e 100644 --- a/.actor/actor.json +++ b/.actor/actor.json @@ -1,7 +1,7 @@ { "actorSpecification": 1, "name": "docling", - "version": "0.0", + "version": "1.0", "environmentVariables": {}, "dockerFile": "./Dockerfile", "input": "./input_schema.json", From 908d38cd67403697820bbc7e6b0dccb969713037 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20=C4=8Curn?= Date: Tue, 20 May 2025 10:50:54 +0200 Subject: [PATCH 5/5] Update .actor/actor.json MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Marek Trunkát Signed-off-by: Jan Čurn --- .actor/actor.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.actor/actor.json b/.actor/actor.json index 2a96506e..2b2741a7 100644 --- a/.actor/actor.json +++ b/.actor/actor.json @@ -4,7 +4,7 @@ "version": "1.0", "environmentVariables": {}, "dockerFile": "./Dockerfile", - "input": "./input_schema.json", + "inputSchema": "./input_schema.json", "scripts": { "run": "./actor.sh" }