style & quality applied

Signed-off-by: felix <felixdittrich92@gmail.com>
2025-07-30 14:04:27 +00:00 · 2025-03-21 21:49:50 +01:00 · 2025-03-21 21:49:50 +01:00 · a19cf81f98
commit a19cf81f98
parent 7c87467ea5
7 changed files with 1509 additions and 47 deletions
--- a/docling/datamodel/pipeline_options.py
+++ b/docling/datamodel/pipeline_options.py
@ -161,18 +161,12 @@ class OnnxtrOcrOptions(OcrOptions):
    det_arch: str = "fast_base"
    reco_arch: str = "crnn_vgg16_bn"  # NOTE: This can be also a hf hub model
    det_bs: int = (
        1  # NOTE: Should be 1 because docling seems not to support batch processing yet
    )
    reco_bs: int = 512
    auto_correct_orientation: bool = False
    preserve_aspect_ratio: bool = True
    symmetric_pad: bool = True
    paragraph_break: float = 0.035
    load_in_8_bit: bool = False
    det_engine_cfg: Dict[str, Any] = {}
    reco_engine_cfg: Dict[str, Any] = {}
    clf_engine_cfg: Dict[str, Any] = {}
    model_config = ConfigDict(
        extra="forbid",
--- a/docling/models/onnxtr_model.py
+++ b/docling/models/onnxtr_model.py
@ -1,8 +1,10 @@
 import logging
 import os
 from pathlib import Path
 from typing import Iterable, Optional, Type
 import numpy
 import numpy as np
 from docling_core.types.doc import BoundingBox, CoordOrigin
 from docling_core.types.doc.page import BoundingRectangle, TextCell
@ -42,7 +44,15 @@ class OnnxtrOcrModel(BaseOcrModel):
        if self.enabled:
            try:
-                from onnxtr.models import ocr_predictor, EngineConfig, from_hub  # type: ignore
+                from onnxtr.models import (  # type: ignore
                    EngineConfig,
                    from_hub,
                    ocr_predictor,
                )
                # We diable multiprocessing for OnnxTR,
                # because the speed up is minimal and it can raise memory leaks on windows
                os.environ["ONNXTR_MULTIPROCESSING_DISABLE"] = "TRUE"
            except ImportError:
                raise ImportError(
                    "OnnxTR is not installed. Please install it via `pip install 'onnxtr[gpu]'` to use this OCR engine. "
@ -62,6 +72,7 @@ class OnnxtrOcrModel(BaseOcrModel):
                config = {
                    "assume_straight_pages": True,
                    "straighten_pages": False,
                    # This should be disabled when docling supports polygons
                    "export_as_straight_boxes": True,
                    "disable_crop_orientation": False,
                    "disable_page_orientation": False,
@ -78,15 +89,22 @@ class OnnxtrOcrModel(BaseOcrModel):
                    if self.options.reco_arch.count("/") == 1
                    else self.options.reco_arch
                ),
                det_bs=1,  # NOTE: Should be always 1, because docling handles batching
                preserve_aspect_ratio=self.options.preserve_aspect_ratio,
                symmetric_pad=self.options.symmetric_pad,
                paragraph_break=self.options.paragraph_break,
                load_in_8_bit=self.options.load_in_8_bit,
                **config,
                # TODO: Allow specification of the engine configs in the options
                det_engine_cfg=None,
                reco_engine_cfg=None,
                clf_engine_cfg=None,
            )
    def _to_absolute_and_docling_format(
-        self, geom: list[list[float]], img_shape: tuple[int, int]
+        self,
        geom: tuple[tuple[float, float], tuple[float, float]] | np.ndarray,
        img_shape: tuple[int, int],
    ) -> tuple[int, int, int, int]:
        """
        Convert a bounding box or polygon from relative to absolute coordinates and return in [x1, y1, x2, y2] format.
@ -109,14 +127,11 @@ class OnnxtrOcrModel(BaseOcrModel):
            (xmin, ymin), (xmax, ymax) = geom
            x1, y1 = scale_point(xmin, ymin)
            x2, y2 = scale_point(xmax, ymax)
-        elif len(geom) == 4:
+        # 4-Point polygon
        else:
            abs_points = [scale_point(*point) for point in geom]
            x1, y1 = min(p[0] for p in abs_points), min(p[1] for p in abs_points)
            x2, y2 = max(p[0] for p in abs_points), max(p[1] for p in abs_points)
        else:
            raise ValueError(
                f"Invalid geometry format: {geom}. Expected either 2 or 4 points."
            )
        return x1, y1, x2, y2
--- a/docling/models/plugins/defaults.py
+++ b/docling/models/plugins/defaults.py
@ -1,9 +1,9 @@
 from docling.models.easyocr_model import EasyOcrModel
 from docling.models.ocr_mac_model import OcrMacModel
 from docling.models.onnxtr_model import OnnxtrOcrModel
 from docling.models.picture_description_api_model import PictureDescriptionApiModel
 from docling.models.picture_description_vlm_model import PictureDescriptionVlmModel
 from docling.models.rapid_ocr_model import RapidOcrModel
 from docling.models.onnxtr_model import OnnxtrOcrModel
 from docling.models.tesseract_ocr_cli_model import TesseractOcrCliModel
 from docling.models.tesseract_ocr_model import TesseractOcrModel
--- a/docs/examples/onnxtr_with_custom_models.py
+++ b/docs/examples/onnxtr_with_custom_models.py
@ -0,0 +1,40 @@
 from docling.datamodel.pipeline_options import OnnxtrOcrOptions, PdfPipelineOptions
 from docling.document_converter import (
    ConversionResult,
    DocumentConverter,
    InputFormat,
    PdfFormatOption,
 )
 def main():
    # Source document to convert
    source = "https://arxiv.org/pdf/2408.09869v4"
    ocr_options = OnnxtrOcrOptions(
        det_arch="db_mobilenet_v3_large",
        reco_arch="Felix92/onnxtr-parseq-multilingual-v1",  # Model will be downloaded from Hugging Face Hub
        auto_correct_orientation=True,  # This can be used to correct the orientation of the pages
    )
    pipeline_options = PdfPipelineOptions(
        ocr_options=ocr_options,
    )
    # Convert the document
    converter = DocumentConverter(
        format_options={
            InputFormat.PDF: PdfFormatOption(
                pipeline_options=pipeline_options,
            ),
        },
    )
    conversion_result: ConversionResult = converter.convert(source=source)
    doc = conversion_result.document
    md = doc.export_to_markdown()
    print(md)
 if __name__ == "__main__":
    main()
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@ -44,7 +44,7 @@ packages = [{ include = "docling" }]
 ######################
 # actual dependencies:
 ######################
-python = "^3.9"
+python = "^3.10"
 pydantic = "^2.0.0"
 docling-core = {extras = ["chunking"], version = "^2.24.1"}
 docling-ibm-models = "^3.4.0"
@ -72,7 +72,7 @@ openpyxl = "^3.1.5"
 lxml = ">=4.0.0,<6.0.0"
 ocrmac = { version = "^1.0.0", markers = "sys_platform == 'darwin'", optional = true }
 rapidocr-onnxruntime = { version = "^1.4.0", optional = true, markers = "python_version < '3.13'" }
-onnxtr = { extras= ["gpu", "viz"], version = "^0.6.3", optional = true, markers = "python_version < '3.13'" }
+onnxtr = { extras= ["gpu", "viz"], version = "^0.6.2", optional = true, markers = "python_version >= '3.10'" }
 onnxruntime = [
  # 1.19.2 is the last version with python3.9 support,
  # see https://github.com/microsoft/onnxruntime/releases/tag/v1.20.0
--- a/tests/test_e2e_ocr_conversion.py
+++ b/tests/test_e2e_ocr_conversion.py
@ -11,9 +11,9 @@ from docling.datamodel.pipeline_options import (
    EasyOcrOptions,
    OcrMacOptions,
    OcrOptions,
    OnnxtrOcrOptions,
    PdfPipelineOptions,
    RapidOcrOptions,
    OnnxtrOcrOptions,
    TesseractCliOcrOptions,
    TesseractOcrOptions,
 )