From dac82ca7f28f093b78f91312430b97bb609796ee Mon Sep 17 00:00:00 2001 From: Christoph Auer Date: Tue, 15 Oct 2024 10:11:10 +0200 Subject: [PATCH] Import statement updates from docling-core Signed-off-by: Christoph Auer --- docling/backend/abstract_backend.py | 2 +- docling/backend/docling_parse_backend.py | 2 +- docling/backend/html_backend.py | 4 +-- docling/backend/mspowerpoint_backend.py | 4 +-- docling/backend/msword_backend.py | 2 +- docling/backend/pdf_backend.py | 4 +-- docling/backend/pypdfium2_backend.py | 2 +- docling/cli/main.py | 17 ++---------- docling/datamodel/base_models.py | 6 ++-- docling/datamodel/document.py | 8 +++--- docling/models/base_model.py | 2 +- docling/models/base_ocr_model.py | 2 +- docling/models/ds_glm_model.py | 8 +++--- docling/models/dummy_picture_enrichment.py | 7 ++--- docling/models/easyocr_model.py | 2 +- docling/models/layout_model.py | 4 +-- docling/models/table_structure_model.py | 6 ++-- docling/models/tesseract_ocr_cli_model.py | 2 +- docling/models/tesseract_ocr_model.py | 2 +- docling/pipeline/base_pipeline.py | 2 +- docling/utils/export.py | 4 +-- docling/utils/layout_utils.py | 2 +- examples/batch_convert.py | 10 +++---- examples/run_with_formats.py | 4 +-- poetry.lock | 32 ++++++++++------------ pyproject.toml | 4 +-- tests/test_backend_docling_parse.py | 2 +- tests/test_backend_pdfium.py | 2 +- tests/verify_utils.py | 2 +- 29 files changed, 65 insertions(+), 85 deletions(-) diff --git a/docling/backend/abstract_backend.py b/docling/backend/abstract_backend.py index a83931df..5bfc02a2 100644 --- a/docling/backend/abstract_backend.py +++ b/docling/backend/abstract_backend.py @@ -3,7 +3,7 @@ from io import BytesIO from pathlib import Path from typing import TYPE_CHECKING, Set, Union -from docling_core.types.experimental import DoclingDocument +from docling_core.types.doc import DoclingDocument if TYPE_CHECKING: from docling.datamodel.base_models import InputFormat diff --git a/docling/backend/docling_parse_backend.py b/docling/backend/docling_parse_backend.py index 789471e8..f9732f40 100644 --- a/docling/backend/docling_parse_backend.py +++ b/docling/backend/docling_parse_backend.py @@ -5,7 +5,7 @@ from pathlib import Path from typing import Iterable, List, Optional, Union import pypdfium2 as pdfium -from docling_core.types.experimental import BoundingBox, CoordOrigin, Size +from docling_core.types.doc import BoundingBox, CoordOrigin, Size from docling_parse.docling_parse import pdf_parser from PIL import Image, ImageDraw from pypdfium2 import PdfPage diff --git a/docling/backend/html_backend.py b/docling/backend/html_backend.py index f6f35ff0..b9f7e630 100644 --- a/docling/backend/html_backend.py +++ b/docling/backend/html_backend.py @@ -4,14 +4,14 @@ from pathlib import Path from typing import Set, Union from bs4 import BeautifulSoup -from docling_core.types.experimental import ( +from docling_core.types.doc import ( DescriptionItem, DoclingDocument, PictureData, TableCell, TableData, ) -from docling_core.types.experimental.labels import DocItemLabel, GroupLabel +from docling_core.types.doc.labels import DocItemLabel, GroupLabel from docling.backend.abstract_backend import DeclarativeDocumentBackend from docling.datamodel.base_models import InputFormat diff --git a/docling/backend/mspowerpoint_backend.py b/docling/backend/mspowerpoint_backend.py index 325386ef..bdf73c99 100644 --- a/docling/backend/mspowerpoint_backend.py +++ b/docling/backend/mspowerpoint_backend.py @@ -3,7 +3,7 @@ from io import BytesIO from pathlib import Path from typing import Set, Union -from docling_core.types.experimental import ( +from docling_core.types.doc import ( DescriptionItem, DocItemLabel, DoclingDocument, @@ -14,7 +14,7 @@ from docling_core.types.experimental import ( TableCell, TableData, ) -from docling_core.types.experimental.base import BoundingBox, CoordOrigin, Size +from docling_core.types.doc.base import BoundingBox, CoordOrigin, Size from pptx import Presentation from pptx.enum.shapes import MSO_SHAPE_TYPE, PP_PLACEHOLDER diff --git a/docling/backend/msword_backend.py b/docling/backend/msword_backend.py index 32c22080..a098db51 100644 --- a/docling/backend/msword_backend.py +++ b/docling/backend/msword_backend.py @@ -4,7 +4,7 @@ from pathlib import Path from typing import Set, Union import docx -from docling_core.types.experimental import ( +from docling_core.types.doc import ( DescriptionItem, DocItemLabel, DoclingDocument, diff --git a/docling/backend/pdf_backend.py b/docling/backend/pdf_backend.py index 39b2899e..0bac725b 100644 --- a/docling/backend/pdf_backend.py +++ b/docling/backend/pdf_backend.py @@ -2,8 +2,8 @@ from abc import ABC, abstractmethod from io import BytesIO from typing import Iterable, Optional, Set, Union -from docling_core.types.doc.doc_ocr import Path -from docling_core.types.experimental import BoundingBox, Size +from docling_core.types.doc import BoundingBox, Size +from docling_core.types.legacy_doc.doc_ocr import Path from PIL import Image from docling.backend.abstract_backend import PaginatedDocumentBackend diff --git a/docling/backend/pypdfium2_backend.py b/docling/backend/pypdfium2_backend.py index 4fdbdbb9..5631a50d 100644 --- a/docling/backend/pypdfium2_backend.py +++ b/docling/backend/pypdfium2_backend.py @@ -6,7 +6,7 @@ from typing import Iterable, List, Optional, Union import pypdfium2 as pdfium import pypdfium2.raw as pdfium_c -from docling_core.types.experimental import BoundingBox, CoordOrigin, Size +from docling_core.types.doc import BoundingBox, CoordOrigin, Size from PIL import Image, ImageDraw from pypdfium2 import PdfTextPage from pypdfium2._helpers.misc import PdfiumError diff --git a/docling/cli/main.py b/docling/cli/main.py index ccb4a8ff..a1f25bb0 100644 --- a/docling/cli/main.py +++ b/docling/cli/main.py @@ -137,10 +137,10 @@ def convert( from_formats: List[InputFormat] = typer.Option( None, "--from", - help="Specify input formats " "to convert from. Defaults to all formats.", + help="Specify input formats to convert from. Defaults to all formats.", ), to_formats: List[OutputFormat] = typer.Option( - None, "--to", help="Specify output formats. " "Defaults to Markdown." + None, "--to", help="Specify output formats. Defaults to Markdown." ), ocr: Annotated[ bool, @@ -148,9 +148,6 @@ def convert( ..., help="If enabled, the bitmap content will be processed using OCR." ), ] = True, - # backend: Annotated[ - # Backend, typer.Option(..., help="The PDF backend to use.") - # ] = Backend.DOCLING, ocr_engine: Annotated[ OcrEngine, typer.Option(..., help="The OCR engine to use.") ] = OcrEngine.EASYOCR, @@ -196,16 +193,6 @@ def convert( export_txt = OutputFormat.TEXT in to_formats export_doctags = OutputFormat.DOCTAGS in to_formats - # match backend: - # case Backend.PYPDFIUM2: - # do_cell_matching = ocr # only do cell matching when OCR enabled - # pdf_backend = PyPdfiumDocumentBackend - # case Backend.DOCLING: - # do_cell_matching = True - # pdf_backend = DoclingParseDocumentBackend - # case _: - # raise RuntimeError(f"Unexpected backend type {backend}") - match ocr_engine: case OcrEngine.EASYOCR: ocr_options = EasyOcrOptions() diff --git a/docling/datamodel/base_models.py b/docling/datamodel/base_models.py index 14974c09..c915e419 100644 --- a/docling/datamodel/base_models.py +++ b/docling/datamodel/base_models.py @@ -2,9 +2,9 @@ from enum import Enum, auto from io import BytesIO from typing import TYPE_CHECKING, Dict, List, Optional, Set, Union -from docling_core.types.experimental import BoundingBox, Size -from docling_core.types.experimental.document import PictureData, TableCell -from docling_core.types.experimental.labels import DocItemLabel +from docling_core.types.doc import BoundingBox, Size +from docling_core.types.doc.document import PictureData, TableCell +from docling_core.types.doc.labels import DocItemLabel from PIL.Image import Image from pydantic import BaseModel, ConfigDict diff --git a/docling/datamodel/document.py b/docling/datamodel/document.py index 64a8ea0c..b8bab5fa 100644 --- a/docling/datamodel/document.py +++ b/docling/datamodel/document.py @@ -12,9 +12,7 @@ from docling_core.types import DocumentDescription as DsDocumentDescription from docling_core.types import FileInfoObject as DsFileInfoObject from docling_core.types import PageDimensions, PageReference, Prov, Ref from docling_core.types import Table as DsSchemaTable -from docling_core.types.doc.base import BoundingBox as DsBoundingBox -from docling_core.types.doc.base import Figure, GlmTableCell, TableCell -from docling_core.types.experimental import ( +from docling_core.types.doc import ( DescriptionItem, DocItem, DocItemLabel, @@ -24,7 +22,9 @@ from docling_core.types.experimental import ( TableItem, TextItem, ) -from docling_core.types.experimental.document import ListItem +from docling_core.types.doc.document import ListItem +from docling_core.types.legacy_doc.base import BoundingBox as DsBoundingBox +from docling_core.types.legacy_doc.base import Figure, GlmTableCell, TableCell from docling_core.utils.file import resolve_file_source from pydantic import BaseModel from typing_extensions import deprecated diff --git a/docling/models/base_model.py b/docling/models/base_model.py index 7472d4c8..dffad502 100644 --- a/docling/models/base_model.py +++ b/docling/models/base_model.py @@ -1,7 +1,7 @@ from abc import ABC, abstractmethod from typing import Any, Iterable -from docling_core.types.experimental import DoclingDocument, NodeItem +from docling_core.types.doc import DoclingDocument, NodeItem from docling.datamodel.base_models import Page diff --git a/docling/models/base_ocr_model.py b/docling/models/base_ocr_model.py index aea7755b..bd293dcd 100644 --- a/docling/models/base_ocr_model.py +++ b/docling/models/base_ocr_model.py @@ -4,7 +4,7 @@ from abc import abstractmethod from typing import Iterable, List, Tuple import numpy as np -from docling_core.types.experimental import BoundingBox, CoordOrigin +from docling_core.types.doc import BoundingBox, CoordOrigin from PIL import Image, ImageDraw from rtree import index from scipy.ndimage import find_objects, label diff --git a/docling/models/ds_glm_model.py b/docling/models/ds_glm_model.py index e356d692..80ee87c8 100644 --- a/docling/models/ds_glm_model.py +++ b/docling/models/ds_glm_model.py @@ -15,10 +15,10 @@ from docling_core.types import DocumentDescription as DsDocumentDescription from docling_core.types import FileInfoObject as DsFileInfoObject from docling_core.types import PageDimensions, PageReference, Prov, Ref from docling_core.types import Table as DsSchemaTable -from docling_core.types.doc.base import BoundingBox as DsBoundingBox -from docling_core.types.doc.base import Figure, TableCell -from docling_core.types.experimental import BoundingBox, CoordOrigin -from docling_core.types.experimental.document import DoclingDocument +from docling_core.types.doc import BoundingBox, CoordOrigin +from docling_core.types.doc.document import DoclingDocument +from docling_core.types.legacy_doc.base import BoundingBox as DsBoundingBox +from docling_core.types.legacy_doc.base import Figure, TableCell from PIL import ImageDraw from pydantic import BaseModel, ConfigDict diff --git a/docling/models/dummy_picture_enrichment.py b/docling/models/dummy_picture_enrichment.py index d0772b32..d227b31f 100644 --- a/docling/models/dummy_picture_enrichment.py +++ b/docling/models/dummy_picture_enrichment.py @@ -1,10 +1,7 @@ from typing import Any, Iterable -from docling_core.types.experimental import DoclingDocument, NodeItem -from docling_core.types.experimental.document import ( - PictureClassificationData, - PictureItem, -) +from docling_core.types.doc import DoclingDocument, NodeItem +from docling_core.types.doc.document import PictureClassificationData, PictureItem from docling.models.base_model import BaseEnrichmentModel diff --git a/docling/models/easyocr_model.py b/docling/models/easyocr_model.py index 9408076b..9e71fffc 100644 --- a/docling/models/easyocr_model.py +++ b/docling/models/easyocr_model.py @@ -2,7 +2,7 @@ import logging from typing import Iterable import numpy -from docling_core.types.experimental import BoundingBox, CoordOrigin +from docling_core.types.doc import BoundingBox, CoordOrigin from docling.datamodel.base_models import OcrCell, Page from docling.datamodel.pipeline_options import EasyOcrOptions diff --git a/docling/models/layout_model.py b/docling/models/layout_model.py index 2a9f0510..004be330 100644 --- a/docling/models/layout_model.py +++ b/docling/models/layout_model.py @@ -5,8 +5,8 @@ import time from pathlib import Path from typing import Iterable, List -from docling_core.types.experimental import CoordOrigin -from docling_core.types.experimental.labels import DocItemLabel +from docling_core.types.doc import CoordOrigin +from docling_core.types.doc.labels import DocItemLabel from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor from PIL import ImageDraw diff --git a/docling/models/table_structure_model.py b/docling/models/table_structure_model.py index 9eaace5a..5445a0b9 100644 --- a/docling/models/table_structure_model.py +++ b/docling/models/table_structure_model.py @@ -3,9 +3,9 @@ from pathlib import Path from typing import Iterable, List import numpy -from docling_core.types.experimental import BoundingBox -from docling_core.types.experimental.document import TableCell -from docling_core.types.experimental.labels import DocItemLabel +from docling_core.types.doc import BoundingBox +from docling_core.types.doc.document import TableCell +from docling_core.types.doc.labels import DocItemLabel from docling_ibm_models.tableformer.data_management.tf_predictor import TFPredictor from PIL import ImageDraw diff --git a/docling/models/tesseract_ocr_cli_model.py b/docling/models/tesseract_ocr_cli_model.py index be686b29..6ff0c35c 100644 --- a/docling/models/tesseract_ocr_cli_model.py +++ b/docling/models/tesseract_ocr_cli_model.py @@ -5,7 +5,7 @@ from subprocess import DEVNULL, PIPE, Popen from typing import Iterable, Tuple import pandas as pd -from docling_core.types.experimental import BoundingBox, CoordOrigin +from docling_core.types.doc import BoundingBox, CoordOrigin from docling.datamodel.base_models import OcrCell, Page from docling.datamodel.pipeline_options import TesseractCliOcrOptions diff --git a/docling/models/tesseract_ocr_model.py b/docling/models/tesseract_ocr_model.py index 56202ca9..6c5e5f04 100644 --- a/docling/models/tesseract_ocr_model.py +++ b/docling/models/tesseract_ocr_model.py @@ -1,7 +1,7 @@ import logging from typing import Iterable -from docling_core.types.experimental import BoundingBox, CoordOrigin +from docling_core.types.doc import BoundingBox, CoordOrigin from docling.datamodel.base_models import OcrCell, Page from docling.datamodel.pipeline_options import TesseractCliOcrOptions diff --git a/docling/pipeline/base_pipeline.py b/docling/pipeline/base_pipeline.py index 313705e8..c903d569 100644 --- a/docling/pipeline/base_pipeline.py +++ b/docling/pipeline/base_pipeline.py @@ -5,7 +5,7 @@ import traceback from abc import ABC, abstractmethod from typing import Callable, Iterable, List -from docling_core.types.experimental import DoclingDocument, NodeItem +from docling_core.types.doc import DoclingDocument, NodeItem from docling.backend.abstract_backend import AbstractDocumentBackend from docling.backend.pdf_backend import PdfDocumentBackend diff --git a/docling/utils/export.py b/docling/utils/export.py index 784723b7..5b022f4a 100644 --- a/docling/utils/export.py +++ b/docling/utils/export.py @@ -1,8 +1,8 @@ import logging from typing import Any, Dict, Iterable, List, Tuple, Union -from docling_core.types.doc.base import BaseCell, BaseText, Ref, Table -from docling_core.types.experimental import BoundingBox, CoordOrigin +from docling_core.types.doc import BoundingBox, CoordOrigin +from docling_core.types.legacy_doc.base import BaseCell, BaseText, Ref, Table from docling.datamodel.base_models import OcrCell from docling.datamodel.document import ConversionResult, Page diff --git a/docling/utils/layout_utils.py b/docling/utils/layout_utils.py index 0272de89..85967a3b 100644 --- a/docling/utils/layout_utils.py +++ b/docling/utils/layout_utils.py @@ -2,7 +2,7 @@ import copy import logging import networkx as nx -from docling_core.types.experimental.labels import DocItemLabel +from docling_core.types.doc.labels import DocItemLabel logger = logging.getLogger("layout_utils") diff --git a/examples/batch_convert.py b/examples/batch_convert.py index bbd4a9a0..02a6fc5e 100644 --- a/examples/batch_convert.py +++ b/examples/batch_convert.py @@ -32,23 +32,23 @@ def export_documents( doc_filename = conv_res.input.file.stem if USE_V2: - # Export Docling document format to JSON (experimental): + # Export Docling document format to JSON: with (output_dir / f"{doc_filename}.json").open("w") as fp: fp.write(json.dumps(conv_res.document.export_to_dict())) - # Export Docling document format to YAML (experimental): + # Export Docling document format to YAML: with (output_dir / f"{doc_filename}.yaml").open("w") as fp: fp.write(yaml.safe_dump(conv_res.document.export_to_dict())) - # Export Docling document format to doctags (experimental): + # Export Docling document format to doctags: with (output_dir / f"{doc_filename}.doctags.txt").open("w") as fp: fp.write(conv_res.document.export_to_document_tokens()) - # Export Docling document format to markdown (experimental): + # Export Docling document format to markdown: with (output_dir / f"{doc_filename}.md").open("w") as fp: fp.write(conv_res.document.export_to_markdown()) - # Export Docling document format to text (experimental): + # Export Docling document format to text: with (output_dir / f"{doc_filename}.txt").open("w") as fp: fp.write(conv_res.document.export_to_markdown(strict_text=True)) diff --git a/examples/run_with_formats.py b/examples/run_with_formats.py index 72227afb..6ff33034 100644 --- a/examples/run_with_formats.py +++ b/examples/run_with_formats.py @@ -58,8 +58,8 @@ for res in conv_results: f"Document {res.input.file.name} converted." f"\nSaved markdown output to: {str(out_path)}" ) - # print(res.experimental.export_to_markdown()) - # Export Docling document format to markdown (experimental): + # print(res.docdocument.export_to_markdown()) + # Export Docling document format to markdowndoc: with (out_path / f"{res.input.file.name}.md").open("w") as fp: fp.write(res.document.export_to_markdown()) diff --git a/poetry.lock b/poetry.lock index b4551ef2..cefef5e6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -885,7 +885,7 @@ files = [] develop = false [package.dependencies] -docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "4ddecf80cf5afb4b1488172ecafcf12cb2b8cb9b"} +docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "6fee533a101ca08f62e88826218c33e0aab2f417"} docutils = "!=0.21" matplotlib = "^3.7.1" networkx = "^3.1" @@ -909,8 +909,8 @@ toolkit = ["deepsearch-toolkit (>=0.31.0)"] [package.source] type = "git" url = "https://github.com/DS4SD/deepsearch-glm.git" -reference = "58c589fc23d675e8098f24ec680a9bf93e2a796e" -resolved_reference = "58c589fc23d675e8098f24ec680a9bf93e2a796e" +reference = "c13a6cdda25206911d63a5a28e990217ad823068" +resolved_reference = "c13a6cdda25206911d63a5a28e990217ad823068" [[package]] name = "dill" @@ -958,8 +958,8 @@ tabulate = "^0.9.0" [package.source] type = "git" url = "https://github.com/DS4SD/docling-core.git" -reference = "4ddecf80cf5afb4b1488172ecafcf12cb2b8cb9b" -resolved_reference = "4ddecf80cf5afb4b1488172ecafcf12cb2b8cb9b" +reference = "6fee533a101ca08f62e88826218c33e0aab2f417" +resolved_reference = "6fee533a101ca08f62e88826218c33e0aab2f417" [[package]] name = "docling-ibm-models" @@ -2296,22 +2296,18 @@ transformers = ">=4.39.0" [[package]] name = "langchain-milvus" -version = "0.1.5" +version = "0.1.6" description = "An integration package connecting Milvus and LangChain" optional = false -python-versions = "<4.0,>=3.8.1" +python-versions = "<4.0,>=3.9" files = [ - {file = "langchain_milvus-0.1.5-py3-none-any.whl", hash = "sha256:74aa487738afde4c3e1346433ef26f9556e599826161562b308d3357d86529fd"}, - {file = "langchain_milvus-0.1.5.tar.gz", hash = "sha256:1cceab384783ba264055102e5831451482fd726a68feb64258f6dbbd8d702557"}, + {file = "langchain_milvus-0.1.6-py3-none-any.whl", hash = "sha256:efab3fcf613bd6151735e2c75f3264dba9daecb317b9bb22604c2aac579049a9"}, + {file = "langchain_milvus-0.1.6.tar.gz", hash = "sha256:155979a6e5aeb94b0e141a12d2fdb4c34a4d7a0e5da2cec1ae7c9bccf6649205"}, ] [package.dependencies] -langchain-core = {version = ">=0.2.38,<0.4", markers = "python_version >= \"3.9\""} +langchain-core = ">=0.2.38,<0.4" pymilvus = ">=2.4.3,<3.0.0" -scipy = [ - {version = ">=1.9,<2.0", markers = "python_version >= \"3.12\""}, - {version = ">=1.7,<2.0", markers = "python_version < \"3.12\""}, -] [[package]] name = "langchain-text-splitters" @@ -2329,13 +2325,13 @@ langchain-core = ">=0.2.38,<0.3.0" [[package]] name = "langsmith" -version = "0.1.134" +version = "0.1.135" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." optional = false python-versions = "<4.0,>=3.8.1" files = [ - {file = "langsmith-0.1.134-py3-none-any.whl", hash = "sha256:ada98ad80ef38807725f32441a472da3dd28394010877751f48f458d3289da04"}, - {file = "langsmith-0.1.134.tar.gz", hash = "sha256:23abee3b508875a0e63c602afafffc02442a19cfd88f9daae05b3e9054fd6b61"}, + {file = "langsmith-0.1.135-py3-none-any.whl", hash = "sha256:b1d1ca3bad483a4239745c57e9b9157b4d099fbf3149be21e3d112c94ede06ac"}, + {file = "langsmith-0.1.135.tar.gz", hash = "sha256:7abed7e141386af99a2177f0b3600b124ae3ad1b482879ba0724ce92ef998a11"}, ] [package.dependencies] @@ -7118,4 +7114,4 @@ tesserocr = ["tesserocr"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "9678a9fb33ecbfbc6ec118fd3209aab5ab4e4c90d589e93c4dc7073dc9fb72ae" +content-hash = "46f6c1eb76034223f7d65760f6ebe0989ba9e8aff46fcdbce82c147030fcb8be" diff --git a/pyproject.toml b/pyproject.toml index 56a3e4a5..92225813 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,9 +37,9 @@ torchvision = [ ###################### python = "^3.10" pydantic = "^2.0.0" -docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "4ddecf80cf5afb4b1488172ecafcf12cb2b8cb9b"} +docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "6fee533a101ca08f62e88826218c33e0aab2f417"} docling-ibm-models = {git = "https://github.com/DS4SD/docling-ibm-models.git", rev = "1d2e2a2e6eb152c237f1383cdba20cf85db80b97"} -deepsearch-glm = {git = "https://github.com/DS4SD/deepsearch-glm.git", rev = "58c589fc23d675e8098f24ec680a9bf93e2a796e"} +deepsearch-glm = {git = "https://github.com/DS4SD/deepsearch-glm.git", rev = "c13a6cdda25206911d63a5a28e990217ad823068"} docling-parse = "^1.5.1" filetype = "^1.2.0" diff --git a/tests/test_backend_docling_parse.py b/tests/test_backend_docling_parse.py index fac71e70..ef119ba9 100644 --- a/tests/test_backend_docling_parse.py +++ b/tests/test_backend_docling_parse.py @@ -1,7 +1,7 @@ from pathlib import Path import pytest -from docling_core.types.experimental import BoundingBox +from docling_core.types.doc import BoundingBox from docling.backend.docling_parse_backend import ( DoclingParseDocumentBackend, diff --git a/tests/test_backend_pdfium.py b/tests/test_backend_pdfium.py index 9c0c3dd1..7eddb313 100644 --- a/tests/test_backend_pdfium.py +++ b/tests/test_backend_pdfium.py @@ -1,7 +1,7 @@ from pathlib import Path import pytest -from docling_core.types.experimental.base import BoundingBox +from docling_core.types.doc.base import BoundingBox from docling.backend.pypdfium2_backend import ( PyPdfiumDocumentBackend, diff --git a/tests/verify_utils.py b/tests/verify_utils.py index 5f04cf64..1349229e 100644 --- a/tests/verify_utils.py +++ b/tests/verify_utils.py @@ -4,7 +4,7 @@ from pathlib import Path from typing import List from docling_core.types import Document as DsDocument -from docling_core.types.experimental import DoclingDocument +from docling_core.types.doc import DoclingDocument from pydantic import TypeAdapter from pydantic.json import pydantic_encoder