mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 20:14:47 +00:00
Merge pull request #1 from apify/vaclav/docling-sync
This commit is contained in:
commit
0df8b65a05
@ -64,7 +64,6 @@ ENV EASYOCR_MODULE_PATH=/tmp/easyocr-models
|
|||||||
COPY --chown=1000:1000 .actor/actor.sh .actor/actor.sh
|
COPY --chown=1000:1000 .actor/actor.sh .actor/actor.sh
|
||||||
COPY --chown=1000:1000 .actor/actor.json .actor/actor.json
|
COPY --chown=1000:1000 .actor/actor.json .actor/actor.json
|
||||||
COPY --chown=1000:1000 .actor/input_schema.json .actor/input_schema.json
|
COPY --chown=1000:1000 .actor/input_schema.json .actor/input_schema.json
|
||||||
COPY --chown=1000:1000 .actor/docling_processor.py .actor/docling_processor.py
|
|
||||||
RUN chmod +x .actor/actor.sh
|
RUN chmod +x .actor/actor.sh
|
||||||
|
|
||||||
# Copy the build files from builder
|
# Copy the build files from builder
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
[](https://apify.com/vancura/docling)
|
[](https://apify.com/vancura/docling)
|
||||||
|
|
||||||
This Actor (specification v1) wraps the [Docling project](https://ds4sd.github.io/docling/) to provide serverless document processing in the cloud. It can process complex documents (PDF, DOCX, images) and convert them into structured formats (Markdown, JSON, HTML, Text, or DocTags) with optional OCR support.
|
This Actor (specification v1) wraps the [Docling project](https://github.com/docling-project/docling) to provide serverless document processing in the cloud. It can process complex documents (PDF, DOCX, images) and convert them into structured formats (Markdown, JSON, HTML, Text, or DocTags) with optional OCR support.
|
||||||
|
|
||||||
## What are Actors?
|
## What are Actors?
|
||||||
|
|
||||||
@ -14,7 +14,7 @@ This Actor (specification v1) wraps the [Docling project](https://ds4sd.github.i
|
|||||||
2. [Usage](#usage)
|
2. [Usage](#usage)
|
||||||
3. [Input Parameters](#input-parameters)
|
3. [Input Parameters](#input-parameters)
|
||||||
4. [Output](#output)
|
4. [Output](#output)
|
||||||
5. [Performance & Resources](#performance--resources)
|
5. [Performance and Resources](#performance-and-resources)
|
||||||
6. [Troubleshooting](#troubleshooting)
|
6. [Troubleshooting](#troubleshooting)
|
||||||
7. [Local Development](#local-development)
|
7. [Local Development](#local-development)
|
||||||
8. [Architecture](#architecture)
|
8. [Architecture](#architecture)
|
||||||
@ -190,7 +190,7 @@ Access logs via:
|
|||||||
apify key-value-stores get-record DOCLING_LOG
|
apify key-value-stores get-record DOCLING_LOG
|
||||||
```
|
```
|
||||||
|
|
||||||
## Performance & Resources
|
## Performance and Resources
|
||||||
|
|
||||||
- **Docker Image Size**: ~4GB
|
- **Docker Image Size**: ~4GB
|
||||||
- **Memory Requirements**:
|
- **Memory Requirements**:
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
{
|
{
|
||||||
"actorSpecification": 1,
|
"actorSpecification": 1,
|
||||||
"name": "docling",
|
"name": "docling",
|
||||||
"version": "0.0",
|
"version": "1.0",
|
||||||
"environmentVariables": {},
|
"environmentVariables": {},
|
||||||
"dockerFile": "./Dockerfile",
|
"dockerFile": "./Dockerfile",
|
||||||
"input": "./input_schema.json",
|
"inputSchema": "./input_schema.json",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"run": "./actor.sh"
|
"run": "./actor.sh"
|
||||||
}
|
}
|
||||||
|
@ -154,17 +154,6 @@ else
|
|||||||
echo "Warning: No build files directory found. Some tools may be unavailable."
|
echo "Warning: No build files directory found. Some tools may be unavailable."
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Copy Python processor script to tools directory
|
|
||||||
PYTHON_SCRIPT_PATH="$(dirname "$0")/docling_processor.py"
|
|
||||||
if [ -f "$PYTHON_SCRIPT_PATH" ]; then
|
|
||||||
echo "Copying Python processor script to tools directory..."
|
|
||||||
cp "$PYTHON_SCRIPT_PATH" "$TOOLS_DIR/"
|
|
||||||
chmod +x "$TOOLS_DIR/docling_processor.py"
|
|
||||||
else
|
|
||||||
echo "ERROR: Python processor script not found at $PYTHON_SCRIPT_PATH"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Check OCR directories and ensure they're writable
|
# Check OCR directories and ensure they're writable
|
||||||
echo "Checking OCR directory permissions..."
|
echo "Checking OCR directory permissions..."
|
||||||
OCR_DIR="/opt/app-root/src/.EasyOCR"
|
OCR_DIR="/opt/app-root/src/.EasyOCR"
|
||||||
|
Loading…
Reference in New Issue
Block a user