Rename docling backend to v4

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
2025-12-08 20:58:11 +00:00 · 2025-03-14 12:35:06 +01:00
parent b77f73beec
commit af18215714
9 changed files with 25 additions and 25 deletions
--- a/docling/backend/docling_parse_v4_backend.py
+++ b/docling/backend/docling_parse_v4_backend.py
@@ -20,7 +20,7 @@ if TYPE_CHECKING:
 _log = logging.getLogger(__name__)


-class DoclingParseV3PageBackend(PdfPageBackend):
+class DoclingParseV4PageBackend(PdfPageBackend):
    def __init__(self, parsed_page: SegmentedPdfPage, page_obj: PdfPage):
        self._ppage = page_obj
        self._dpage = parsed_page
@@ -144,7 +144,7 @@ class DoclingParseV3PageBackend(PdfPageBackend):
        self._dpage = None


-class DoclingParseV3DocumentBackend(PdfDocumentBackend):
+class DoclingParseV4DocumentBackend(PdfDocumentBackend):
    def __init__(self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path]):
        super().__init__(in_doc, path_or_stream)

@@ -171,8 +171,8 @@ class DoclingParseV3DocumentBackend(PdfDocumentBackend):

    def load_page(
        self, page_no: int, create_words: bool = True, create_textlines: bool = True
-    ) -> DoclingParseV3PageBackend:
-        return DoclingParseV3PageBackend(
+    ) -> DoclingParseV4PageBackend:
+        return DoclingParseV4PageBackend(
            self.dp_doc.get_page(
                page_no + 1,
                create_words=create_words,
--- a/docling/cli/main.py
+++ b/docling/cli/main.py
@@ -15,7 +15,7 @@ from docling_core.utils.file import resolve_source_to_path
 from pydantic import TypeAdapter

 from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
-from docling.backend.docling_parse_v3_backend import DoclingParseV3DocumentBackend
+from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
 from docling.backend.pdf_backend import PdfDocumentBackend
 from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
 from docling.datamodel.base_models import (
@@ -416,8 +416,8 @@ def convert(
            backend = DoclingParseV2DocumentBackend
        elif pdf_backend == PdfBackend.DLPARSE_V2:
            backend = DoclingParseV2DocumentBackend
-        elif pdf_backend == PdfBackend.DLPARSE_V3:
-            backend = DoclingParseV3DocumentBackend  # type: ignore
+        elif pdf_backend == PdfBackend.DLPARSE_V4:
+            backend = DoclingParseV4DocumentBackend  # type: ignore
        elif pdf_backend == PdfBackend.PYPDFIUM2:
            backend = PyPdfiumDocumentBackend  # type: ignore
        else:
--- a/docling/datamodel/pipeline_options.py
+++ b/docling/datamodel/pipeline_options.py
@@ -301,7 +301,7 @@ class PdfBackend(str, Enum):
    PYPDFIUM2 = "pypdfium2"
    DLPARSE_V1 = "dlparse_v1"
    DLPARSE_V2 = "dlparse_v2"
-    DLPARSE_V3 = "dlparse_v3"
+    DLPARSE_V4 = "dlparse_v4"


 # Define an enum for the ocr engines
--- a/docs/examples/batch_convert.py
+++ b/docs/examples/batch_convert.py
@@ -7,7 +7,7 @@ from typing import Iterable
 import yaml
 from docling_core.types.doc import ImageRefMode

-from docling.backend.docling_parse_v3_backend import DoclingParseV3DocumentBackend
+from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
 from docling.datamodel.base_models import ConversionStatus, InputFormat
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import PdfPipelineOptions
@@ -145,7 +145,7 @@ def main():
    doc_converter = DocumentConverter(
        format_options={
            InputFormat.PDF: PdfFormatOption(
-                pipeline_options=pipeline_options, backend=DoclingParseV3DocumentBackend
+                pipeline_options=pipeline_options, backend=DoclingParseV4DocumentBackend
            )
        }
    )
--- a/tests/test_backend_docling_parse_v4.py
+++ b/tests/test_backend_docling_parse_v4.py
@@ -2,9 +2,9 @@ from pathlib import Path

 import pytest

-from docling.backend.docling_parse_v3_backend import (
-    DoclingParseV3DocumentBackend,
-    DoclingParseV3PageBackend,
+from docling.backend.docling_parse_v4_backend import (
+    DoclingParseV4DocumentBackend,
+    DoclingParseV4PageBackend,
 )
 from docling.datamodel.base_models import BoundingBox, InputFormat
 from docling.datamodel.document import InputDocument
@@ -19,7 +19,7 @@ def _get_backend(pdf_doc):
    in_doc = InputDocument(
        path_or_stream=pdf_doc,
        format=InputFormat.PDF,
-        backend=DoclingParseV3DocumentBackend,
+        backend=DoclingParseV4DocumentBackend,
    )

    doc_backend = in_doc._backend
@@ -34,7 +34,7 @@ def test_text_cell_counts():
    for page_index in range(0, doc_backend.page_count()):
        last_cell_count = None
        for i in range(10):
-            page_backend: DoclingParseV3PageBackend = doc_backend.load_page(0)
+            page_backend: DoclingParseV4PageBackend = doc_backend.load_page(0)
            cells = list(page_backend.get_text_cells())

            if last_cell_count is None:
@@ -49,7 +49,7 @@ def test_text_cell_counts():

 def test_get_text_from_rect(test_doc_path):
    doc_backend = _get_backend(test_doc_path)
-    page_backend: DoclingParseV3PageBackend = doc_backend.load_page(0)
+    page_backend: DoclingParseV4PageBackend = doc_backend.load_page(0)

    # Get the title text of the DocLayNet paper
    textpiece = page_backend.get_text_in_rect(
@@ -62,7 +62,7 @@ def test_get_text_from_rect(test_doc_path):

 def test_crop_page_image(test_doc_path):
    doc_backend = _get_backend(test_doc_path)
-    page_backend: DoclingParseV3PageBackend = doc_backend.load_page(0)
+    page_backend: DoclingParseV4PageBackend = doc_backend.load_page(0)

    # Crop out "Figure 1" from the DocLayNet paper
    im = page_backend.get_page_image(
--- a/tests/test_e2e_conversion.py
+++ b/tests/test_e2e_conversion.py
@@ -1,6 +1,6 @@
 from pathlib import Path

-from docling.backend.docling_parse_v3_backend import DoclingParseV3DocumentBackend
+from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
 from docling.datamodel.base_models import InputFormat
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import AcceleratorDevice, PdfPipelineOptions
@@ -34,7 +34,7 @@ def get_converter():
    converter = DocumentConverter(
        format_options={
            InputFormat.PDF: PdfFormatOption(
-                pipeline_options=pipeline_options, backend=DoclingParseV3DocumentBackend
+                pipeline_options=pipeline_options, backend=DoclingParseV4DocumentBackend
            )
        }
    )
--- a/tests/test_e2e_ocr_conversion.py
+++ b/tests/test_e2e_ocr_conversion.py
@@ -2,7 +2,7 @@ import sys
 from pathlib import Path
 from typing import List

-from docling.backend.docling_parse_v3_backend import DoclingParseV3DocumentBackend
+from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
 from docling.datamodel.base_models import InputFormat
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import (
@@ -45,7 +45,7 @@ def get_converter(ocr_options: OcrOptions):
        format_options={
            InputFormat.PDF: PdfFormatOption(
                pipeline_options=pipeline_options,
-                backend=DoclingParseV3DocumentBackend,
+                backend=DoclingParseV4DocumentBackend,
            )
        }
    )
--- a/tests/test_interfaces.py
+++ b/tests/test_interfaces.py
@@ -3,7 +3,7 @@ from pathlib import Path

 import pytest

-from docling.backend.docling_parse_v3_backend import DoclingParseV3DocumentBackend
+from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
 from docling.datamodel.base_models import DocumentStream, InputFormat
 from docling.datamodel.pipeline_options import PdfPipelineOptions
 from docling.document_converter import DocumentConverter, PdfFormatOption
@@ -31,7 +31,7 @@ def converter():
    converter = DocumentConverter(
        format_options={
            InputFormat.PDF: PdfFormatOption(
-                pipeline_options=pipeline_options, backend=DoclingParseV3DocumentBackend
+                pipeline_options=pipeline_options, backend=DoclingParseV4DocumentBackend
            )
        }
    )
--- a/tests/test_options.py
+++ b/tests/test_options.py
@@ -3,7 +3,7 @@ from pathlib import Path

 import pytest

-from docling.backend.docling_parse_v3_backend import DoclingParseV3DocumentBackend
+from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
 from docling.datamodel.base_models import ConversionStatus, InputFormat
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import (
@@ -33,7 +33,7 @@ def get_converters_with_table_options():
                format_options={
                    InputFormat.PDF: PdfFormatOption(
                        pipeline_options=pipeline_options,
-                        backend=DoclingParseV3DocumentBackend,
+                        backend=DoclingParseV4DocumentBackend,
                    )
                }
            )