Import rewrites, adapt to changes in docling-core

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer 2024-09-27 09:21:15 +02:00
parent 9ffd1dc396
commit 2461b56b84
12 changed files with 17 additions and 14 deletions

View File

@ -3,7 +3,7 @@ from io import BytesIO
from pathlib import Path from pathlib import Path
from typing import TYPE_CHECKING, Any, Iterable, Optional, Union from typing import TYPE_CHECKING, Any, Iterable, Optional, Union
from docling_core.types.experimental.base import BoundingBox, Size from docling_core.types.experimental import BoundingBox, Size
from PIL import Image from PIL import Image
if TYPE_CHECKING: if TYPE_CHECKING:

View File

@ -5,7 +5,7 @@ from pathlib import Path
from typing import Iterable, List, Optional, Union from typing import Iterable, List, Optional, Union
import pypdfium2 as pdfium import pypdfium2 as pdfium
from docling_core.types.experimental.base import BoundingBox, CoordOrigin, Size from docling_core.types.experimental import BoundingBox, CoordOrigin, Size
from docling_parse.docling_parse import pdf_parser from docling_parse.docling_parse import pdf_parser
from PIL import Image, ImageDraw from PIL import Image, ImageDraw
from pypdfium2 import PdfPage from pypdfium2 import PdfPage

View File

@ -6,7 +6,7 @@ from typing import Iterable, List, Optional, Union
import pypdfium2 as pdfium import pypdfium2 as pdfium
import pypdfium2.raw as pdfium_c import pypdfium2.raw as pdfium_c
from docling_core.types.experimental.base import BoundingBox, CoordOrigin, Size from docling_core.types.experimental import BoundingBox, CoordOrigin, Size
from PIL import Image, ImageDraw from PIL import Image, ImageDraw
from pypdfium2 import PdfPage, PdfTextPage from pypdfium2 import PdfPage, PdfTextPage
from pypdfium2._helpers.misc import PdfiumError from pypdfium2._helpers.misc import PdfiumError

View File

@ -4,7 +4,7 @@ from enum import Enum, auto
from io import BytesIO from io import BytesIO
from typing import Annotated, Any, Dict, List, Optional, Tuple, Union from typing import Annotated, Any, Dict, List, Optional, Tuple, Union
from docling_core.types.experimental.base import BoundingBox, Size from docling_core.types.experimental import BoundingBox, Size
from docling_core.types.experimental.document import BaseFigureData, TableCell from docling_core.types.experimental.document import BaseFigureData, TableCell
from docling_core.types.experimental.labels import DocItemLabel from docling_core.types.experimental.labels import DocItemLabel
from PIL.Image import Image from PIL.Image import Image

View File

@ -11,8 +11,11 @@ from docling_core.types import PageDimensions, PageReference, Prov, Ref
from docling_core.types import Table as DsSchemaTable from docling_core.types import Table as DsSchemaTable
from docling_core.types.doc.base import BoundingBox as DsBoundingBox from docling_core.types.doc.base import BoundingBox as DsBoundingBox
from docling_core.types.doc.base import Figure, TableCell from docling_core.types.doc.base import Figure, TableCell
from docling_core.types.experimental.document import DoclingDocument, FileInfo from docling_core.types.experimental import (
from docling_core.types.experimental.labels import DocItemLabel DescriptionItem,
DocItemLabel,
DoclingDocument,
)
from pydantic import BaseModel from pydantic import BaseModel
from typing_extensions import deprecated from typing_extensions import deprecated
@ -62,7 +65,7 @@ _EMPTY_DOC = DsDocument(
) )
_EMPTY_DOCLING_DOC = DoclingDocument( _EMPTY_DOCLING_DOC = DoclingDocument(
description={}, file_info=FileInfo(filename="dummy", document_hash="123xyz") description=DescriptionItem(), name="dummy"
) # TODO: Stub ) # TODO: Stub

View File

@ -5,7 +5,7 @@ from typing import Iterable, List, Tuple
import numpy import numpy
import numpy as np import numpy as np
from docling_core.types.experimental.base import BoundingBox, CoordOrigin from docling_core.types.experimental import BoundingBox, CoordOrigin
from PIL import Image, ImageDraw from PIL import Image, ImageDraw
from rtree import index from rtree import index
from scipy.ndimage import find_objects, label from scipy.ndimage import find_objects, label

View File

@ -11,7 +11,7 @@ from deepsearch_glm.utils.load_pretrained_models import load_pretrained_nlp_mode
from docling_core.types import BaseText from docling_core.types import BaseText
from docling_core.types import Document as DsDocument from docling_core.types import Document as DsDocument
from docling_core.types import Ref from docling_core.types import Ref
from docling_core.types.experimental.base import BoundingBox, CoordOrigin from docling_core.types.experimental import BoundingBox, CoordOrigin
from docling_core.types.experimental.document import DoclingDocument from docling_core.types.experimental.document import DoclingDocument
from PIL import ImageDraw from PIL import ImageDraw

View File

@ -2,7 +2,7 @@ import logging
from typing import Iterable from typing import Iterable
import numpy import numpy
from docling_core.types.experimental.base import BoundingBox, CoordOrigin from docling_core.types.experimental import BoundingBox, CoordOrigin
from docling.datamodel.base_models import OcrCell, Page from docling.datamodel.base_models import OcrCell, Page
from docling.models.base_ocr_model import BaseOcrModel from docling.models.base_ocr_model import BaseOcrModel

View File

@ -4,7 +4,7 @@ import random
import time import time
from typing import Iterable, List from typing import Iterable, List
from docling_core.types.experimental.base import CoordOrigin from docling_core.types.experimental import CoordOrigin
from docling_core.types.experimental.labels import DocItemLabel from docling_core.types.experimental.labels import DocItemLabel
from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor
from PIL import ImageDraw from PIL import ImageDraw

View File

@ -2,7 +2,7 @@ import copy
from typing import Iterable, List from typing import Iterable, List
import numpy import numpy
from docling_core.types.experimental.base import BoundingBox from docling_core.types.experimental import BoundingBox
from docling_core.types.experimental.document import TableCell from docling_core.types.experimental.document import TableCell
from docling_core.types.experimental.labels import DocItemLabel from docling_core.types.experimental.labels import DocItemLabel
from docling_ibm_models.tableformer.data_management.tf_predictor import TFPredictor from docling_ibm_models.tableformer.data_management.tf_predictor import TFPredictor

View File

@ -2,7 +2,7 @@ import logging
from typing import Any, Dict, Iterable, List, Tuple, Union from typing import Any, Dict, Iterable, List, Tuple, Union
from docling_core.types.doc.base import BaseCell, BaseText, BoundingBox, Ref, Table from docling_core.types.doc.base import BaseCell, BaseText, BoundingBox, Ref, Table
from docling_core.types.experimental.base import CoordOrigin from docling_core.types.experimental import CoordOrigin
from docling.datamodel.base_models import OcrCell from docling.datamodel.base_models import OcrCell
from docling.datamodel.document import ConversionResult, Page from docling.datamodel.document import ConversionResult, Page

View File

@ -1,7 +1,7 @@
from pathlib import Path from pathlib import Path
import pytest import pytest
from docling_core.types.experimental.base import BoundingBox from docling_core.types.experimental import BoundingBox
from docling.backend.docling_parse_backend import ( from docling.backend.docling_parse_backend import (
DoclingParseDocumentBackend, DoclingParseDocumentBackend,