Import rewrites, adapt to changes in docling-core

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer 2024-09-27 09:21:15 +02:00
parent 9ffd1dc396
commit 2461b56b84
12 changed files with 17 additions and 14 deletions

View File

@ -3,7 +3,7 @@ from io import BytesIO
from pathlib import Path
from typing import TYPE_CHECKING, Any, Iterable, Optional, Union
from docling_core.types.experimental.base import BoundingBox, Size
from docling_core.types.experimental import BoundingBox, Size
from PIL import Image
if TYPE_CHECKING:

View File

@ -5,7 +5,7 @@ from pathlib import Path
from typing import Iterable, List, Optional, Union
import pypdfium2 as pdfium
from docling_core.types.experimental.base import BoundingBox, CoordOrigin, Size
from docling_core.types.experimental import BoundingBox, CoordOrigin, Size
from docling_parse.docling_parse import pdf_parser
from PIL import Image, ImageDraw
from pypdfium2 import PdfPage

View File

@ -6,7 +6,7 @@ from typing import Iterable, List, Optional, Union
import pypdfium2 as pdfium
import pypdfium2.raw as pdfium_c
from docling_core.types.experimental.base import BoundingBox, CoordOrigin, Size
from docling_core.types.experimental import BoundingBox, CoordOrigin, Size
from PIL import Image, ImageDraw
from pypdfium2 import PdfPage, PdfTextPage
from pypdfium2._helpers.misc import PdfiumError

View File

@ -4,7 +4,7 @@ from enum import Enum, auto
from io import BytesIO
from typing import Annotated, Any, Dict, List, Optional, Tuple, Union
from docling_core.types.experimental.base import BoundingBox, Size
from docling_core.types.experimental import BoundingBox, Size
from docling_core.types.experimental.document import BaseFigureData, TableCell
from docling_core.types.experimental.labels import DocItemLabel
from PIL.Image import Image

View File

@ -11,8 +11,11 @@ from docling_core.types import PageDimensions, PageReference, Prov, Ref
from docling_core.types import Table as DsSchemaTable
from docling_core.types.doc.base import BoundingBox as DsBoundingBox
from docling_core.types.doc.base import Figure, TableCell
from docling_core.types.experimental.document import DoclingDocument, FileInfo
from docling_core.types.experimental.labels import DocItemLabel
from docling_core.types.experimental import (
DescriptionItem,
DocItemLabel,
DoclingDocument,
)
from pydantic import BaseModel
from typing_extensions import deprecated
@ -62,7 +65,7 @@ _EMPTY_DOC = DsDocument(
)
_EMPTY_DOCLING_DOC = DoclingDocument(
description={}, file_info=FileInfo(filename="dummy", document_hash="123xyz")
description=DescriptionItem(), name="dummy"
) # TODO: Stub

View File

@ -5,7 +5,7 @@ from typing import Iterable, List, Tuple
import numpy
import numpy as np
from docling_core.types.experimental.base import BoundingBox, CoordOrigin
from docling_core.types.experimental import BoundingBox, CoordOrigin
from PIL import Image, ImageDraw
from rtree import index
from scipy.ndimage import find_objects, label

View File

@ -11,7 +11,7 @@ from deepsearch_glm.utils.load_pretrained_models import load_pretrained_nlp_mode
from docling_core.types import BaseText
from docling_core.types import Document as DsDocument
from docling_core.types import Ref
from docling_core.types.experimental.base import BoundingBox, CoordOrigin
from docling_core.types.experimental import BoundingBox, CoordOrigin
from docling_core.types.experimental.document import DoclingDocument
from PIL import ImageDraw

View File

@ -2,7 +2,7 @@ import logging
from typing import Iterable
import numpy
from docling_core.types.experimental.base import BoundingBox, CoordOrigin
from docling_core.types.experimental import BoundingBox, CoordOrigin
from docling.datamodel.base_models import OcrCell, Page
from docling.models.base_ocr_model import BaseOcrModel

View File

@ -4,7 +4,7 @@ import random
import time
from typing import Iterable, List
from docling_core.types.experimental.base import CoordOrigin
from docling_core.types.experimental import CoordOrigin
from docling_core.types.experimental.labels import DocItemLabel
from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor
from PIL import ImageDraw

View File

@ -2,7 +2,7 @@ import copy
from typing import Iterable, List
import numpy
from docling_core.types.experimental.base import BoundingBox
from docling_core.types.experimental import BoundingBox
from docling_core.types.experimental.document import TableCell
from docling_core.types.experimental.labels import DocItemLabel
from docling_ibm_models.tableformer.data_management.tf_predictor import TFPredictor

View File

@ -2,7 +2,7 @@ import logging
from typing import Any, Dict, Iterable, List, Tuple, Union
from docling_core.types.doc.base import BaseCell, BaseText, BoundingBox, Ref, Table
from docling_core.types.experimental.base import CoordOrigin
from docling_core.types.experimental import CoordOrigin
from docling.datamodel.base_models import OcrCell
from docling.datamodel.document import ConversionResult, Page

View File

@ -1,7 +1,7 @@
from pathlib import Path
import pytest
from docling_core.types.experimental.base import BoundingBox
from docling_core.types.experimental import BoundingBox
from docling.backend.docling_parse_backend import (
DoclingParseDocumentBackend,