From 2461b56b84429a34bfb53042d5217f4cd887e52d Mon Sep 17 00:00:00 2001 From: Christoph Auer Date: Fri, 27 Sep 2024 09:21:15 +0200 Subject: [PATCH] Import rewrites, adapt to changes in docling-core Signed-off-by: Christoph Auer --- docling/backend/abstract_backend.py | 2 +- docling/backend/docling_parse_backend.py | 2 +- docling/backend/pypdfium2_backend.py | 2 +- docling/datamodel/base_models.py | 2 +- docling/datamodel/document.py | 9 ++++++--- docling/models/base_ocr_model.py | 2 +- docling/models/ds_glm_model.py | 2 +- docling/models/easyocr_model.py | 2 +- docling/models/layout_model.py | 2 +- docling/models/table_structure_model.py | 2 +- docling/utils/export.py | 2 +- tests/test_backend_docling_parse.py | 2 +- 12 files changed, 17 insertions(+), 14 deletions(-) diff --git a/docling/backend/abstract_backend.py b/docling/backend/abstract_backend.py index 22fdc1b2..95379a3d 100644 --- a/docling/backend/abstract_backend.py +++ b/docling/backend/abstract_backend.py @@ -3,7 +3,7 @@ from io import BytesIO from pathlib import Path from typing import TYPE_CHECKING, Any, Iterable, Optional, Union -from docling_core.types.experimental.base import BoundingBox, Size +from docling_core.types.experimental import BoundingBox, Size from PIL import Image if TYPE_CHECKING: diff --git a/docling/backend/docling_parse_backend.py b/docling/backend/docling_parse_backend.py index 94f2dfc5..95b687ad 100644 --- a/docling/backend/docling_parse_backend.py +++ b/docling/backend/docling_parse_backend.py @@ -5,7 +5,7 @@ from pathlib import Path from typing import Iterable, List, Optional, Union import pypdfium2 as pdfium -from docling_core.types.experimental.base import BoundingBox, CoordOrigin, Size +from docling_core.types.experimental import BoundingBox, CoordOrigin, Size from docling_parse.docling_parse import pdf_parser from PIL import Image, ImageDraw from pypdfium2 import PdfPage diff --git a/docling/backend/pypdfium2_backend.py b/docling/backend/pypdfium2_backend.py index 2c128357..ffd497de 100644 --- a/docling/backend/pypdfium2_backend.py +++ b/docling/backend/pypdfium2_backend.py @@ -6,7 +6,7 @@ from typing import Iterable, List, Optional, Union import pypdfium2 as pdfium import pypdfium2.raw as pdfium_c -from docling_core.types.experimental.base import BoundingBox, CoordOrigin, Size +from docling_core.types.experimental import BoundingBox, CoordOrigin, Size from PIL import Image, ImageDraw from pypdfium2 import PdfPage, PdfTextPage from pypdfium2._helpers.misc import PdfiumError diff --git a/docling/datamodel/base_models.py b/docling/datamodel/base_models.py index 041a741e..78cc50d0 100644 --- a/docling/datamodel/base_models.py +++ b/docling/datamodel/base_models.py @@ -4,7 +4,7 @@ from enum import Enum, auto from io import BytesIO from typing import Annotated, Any, Dict, List, Optional, Tuple, Union -from docling_core.types.experimental.base import BoundingBox, Size +from docling_core.types.experimental import BoundingBox, Size from docling_core.types.experimental.document import BaseFigureData, TableCell from docling_core.types.experimental.labels import DocItemLabel from PIL.Image import Image diff --git a/docling/datamodel/document.py b/docling/datamodel/document.py index ec4ce37c..10aa6149 100644 --- a/docling/datamodel/document.py +++ b/docling/datamodel/document.py @@ -11,8 +11,11 @@ from docling_core.types import PageDimensions, PageReference, Prov, Ref from docling_core.types import Table as DsSchemaTable from docling_core.types.doc.base import BoundingBox as DsBoundingBox from docling_core.types.doc.base import Figure, TableCell -from docling_core.types.experimental.document import DoclingDocument, FileInfo -from docling_core.types.experimental.labels import DocItemLabel +from docling_core.types.experimental import ( + DescriptionItem, + DocItemLabel, + DoclingDocument, +) from pydantic import BaseModel from typing_extensions import deprecated @@ -62,7 +65,7 @@ _EMPTY_DOC = DsDocument( ) _EMPTY_DOCLING_DOC = DoclingDocument( - description={}, file_info=FileInfo(filename="dummy", document_hash="123xyz") + description=DescriptionItem(), name="dummy" ) # TODO: Stub diff --git a/docling/models/base_ocr_model.py b/docling/models/base_ocr_model.py index 8818517d..a0777363 100644 --- a/docling/models/base_ocr_model.py +++ b/docling/models/base_ocr_model.py @@ -5,7 +5,7 @@ from typing import Iterable, List, Tuple import numpy import numpy as np -from docling_core.types.experimental.base import BoundingBox, CoordOrigin +from docling_core.types.experimental import BoundingBox, CoordOrigin from PIL import Image, ImageDraw from rtree import index from scipy.ndimage import find_objects, label diff --git a/docling/models/ds_glm_model.py b/docling/models/ds_glm_model.py index 8259bfe6..31690f4c 100644 --- a/docling/models/ds_glm_model.py +++ b/docling/models/ds_glm_model.py @@ -11,7 +11,7 @@ from deepsearch_glm.utils.load_pretrained_models import load_pretrained_nlp_mode from docling_core.types import BaseText from docling_core.types import Document as DsDocument from docling_core.types import Ref -from docling_core.types.experimental.base import BoundingBox, CoordOrigin +from docling_core.types.experimental import BoundingBox, CoordOrigin from docling_core.types.experimental.document import DoclingDocument from PIL import ImageDraw diff --git a/docling/models/easyocr_model.py b/docling/models/easyocr_model.py index b735f7ba..3bc1f89d 100644 --- a/docling/models/easyocr_model.py +++ b/docling/models/easyocr_model.py @@ -2,7 +2,7 @@ import logging from typing import Iterable import numpy -from docling_core.types.experimental.base import BoundingBox, CoordOrigin +from docling_core.types.experimental import BoundingBox, CoordOrigin from docling.datamodel.base_models import OcrCell, Page from docling.models.base_ocr_model import BaseOcrModel diff --git a/docling/models/layout_model.py b/docling/models/layout_model.py index 1661e97d..5ef39043 100644 --- a/docling/models/layout_model.py +++ b/docling/models/layout_model.py @@ -4,7 +4,7 @@ import random import time from typing import Iterable, List -from docling_core.types.experimental.base import CoordOrigin +from docling_core.types.experimental import CoordOrigin from docling_core.types.experimental.labels import DocItemLabel from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor from PIL import ImageDraw diff --git a/docling/models/table_structure_model.py b/docling/models/table_structure_model.py index 6919ce6b..b2098004 100644 --- a/docling/models/table_structure_model.py +++ b/docling/models/table_structure_model.py @@ -2,7 +2,7 @@ import copy from typing import Iterable, List import numpy -from docling_core.types.experimental.base import BoundingBox +from docling_core.types.experimental import BoundingBox from docling_core.types.experimental.document import TableCell from docling_core.types.experimental.labels import DocItemLabel from docling_ibm_models.tableformer.data_management.tf_predictor import TFPredictor diff --git a/docling/utils/export.py b/docling/utils/export.py index 60ca2035..e69d4715 100644 --- a/docling/utils/export.py +++ b/docling/utils/export.py @@ -2,7 +2,7 @@ import logging from typing import Any, Dict, Iterable, List, Tuple, Union from docling_core.types.doc.base import BaseCell, BaseText, BoundingBox, Ref, Table -from docling_core.types.experimental.base import CoordOrigin +from docling_core.types.experimental import CoordOrigin from docling.datamodel.base_models import OcrCell from docling.datamodel.document import ConversionResult, Page diff --git a/tests/test_backend_docling_parse.py b/tests/test_backend_docling_parse.py index 01e7cbbb..2b8e2f4d 100644 --- a/tests/test_backend_docling_parse.py +++ b/tests/test_backend_docling_parse.py @@ -1,7 +1,7 @@ from pathlib import Path import pytest -from docling_core.types.experimental.base import BoundingBox +from docling_core.types.experimental import BoundingBox from docling.backend.docling_parse_backend import ( DoclingParseDocumentBackend,