Reorganize imports from docling-core

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer 2024-09-20 10:53:52 +02:00
parent 6dd1e91c4a
commit abb6dddea8
15 changed files with 89 additions and 184 deletions

View File

@ -3,10 +3,11 @@ from io import BytesIO
from pathlib import Path from pathlib import Path
from typing import TYPE_CHECKING, Any, Iterable, Optional, Union from typing import TYPE_CHECKING, Any, Iterable, Optional, Union
from docling_core.types.experimental.base import BoundingBox, Size
from PIL import Image from PIL import Image
if TYPE_CHECKING: if TYPE_CHECKING:
from docling.datamodel.base_models import BoundingBox, Cell, PageSize from docling.datamodel.base_models import Cell
class PdfPageBackend(ABC): class PdfPageBackend(ABC):
@ -30,7 +31,7 @@ class PdfPageBackend(ABC):
pass pass
@abstractmethod @abstractmethod
def get_size(self) -> "PageSize": def get_size(self) -> "Size":
pass pass
@abstractmethod @abstractmethod

View File

@ -5,12 +5,13 @@ from pathlib import Path
from typing import Iterable, List, Optional, Union from typing import Iterable, List, Optional, Union
import pypdfium2 as pdfium import pypdfium2 as pdfium
from docling_core.types.experimental.base import BoundingBox, CoordOrigin, Size
from docling_parse.docling_parse import pdf_parser from docling_parse.docling_parse import pdf_parser
from PIL import Image, ImageDraw from PIL import Image, ImageDraw
from pypdfium2 import PdfPage from pypdfium2 import PdfPage
from docling.backend.abstract_backend import PdfDocumentBackend, PdfPageBackend from docling.backend.abstract_backend import PdfDocumentBackend, PdfPageBackend
from docling.datamodel.base_models import BoundingBox, Cell, CoordOrigin, PageSize from docling.datamodel.base_models import Cell
_log = logging.getLogger(__name__) _log = logging.getLogger(__name__)
@ -177,8 +178,8 @@ class DoclingParsePageBackend(PdfPageBackend):
return image return image
def get_size(self) -> PageSize: def get_size(self) -> Size:
return PageSize(width=self._ppage.get_width(), height=self._ppage.get_height()) return Size(width=self._ppage.get_width(), height=self._ppage.get_height())
def unload(self): def unload(self):
self._ppage = None self._ppage = None

View File

@ -6,12 +6,13 @@ from typing import Iterable, List, Optional, Union
import pypdfium2 as pdfium import pypdfium2 as pdfium
import pypdfium2.raw as pdfium_c import pypdfium2.raw as pdfium_c
from docling_core.types.experimental.base import BoundingBox, CoordOrigin, Size
from PIL import Image, ImageDraw from PIL import Image, ImageDraw
from pypdfium2 import PdfPage, PdfTextPage from pypdfium2 import PdfPage, PdfTextPage
from pypdfium2._helpers.misc import PdfiumError from pypdfium2._helpers.misc import PdfiumError
from docling.backend.abstract_backend import PdfDocumentBackend, PdfPageBackend from docling.backend.abstract_backend import PdfDocumentBackend, PdfPageBackend
from docling.datamodel.base_models import BoundingBox, Cell, CoordOrigin, PageSize from docling.datamodel.base_models import Cell
_log = logging.getLogger(__name__) _log = logging.getLogger(__name__)
@ -222,8 +223,8 @@ class PyPdfiumPageBackend(PdfPageBackend):
return image return image
def get_size(self) -> PageSize: def get_size(self) -> Size:
return PageSize(width=self._ppage.get_width(), height=self._ppage.get_height()) return Size(width=self._ppage.get_width(), height=self._ppage.get_height())
def unload(self): def unload(self):
self._ppage = None self._ppage = None

View File

@ -4,6 +4,7 @@ from enum import Enum, auto
from io import BytesIO from io import BytesIO
from typing import Annotated, Any, Dict, List, Optional, Tuple, Union from typing import Annotated, Any, Dict, List, Optional, Tuple, Union
from docling_core.types.experimental.base import BoundingBox, Size
from PIL.Image import Image from PIL.Image import Image
from pydantic import BaseModel, ConfigDict, Field, model_validator from pydantic import BaseModel, ConfigDict, Field, model_validator
from typing_extensions import Self from typing_extensions import Self
@ -24,11 +25,6 @@ class DocInputType(str, Enum):
STREAM = auto() STREAM = auto()
class CoordOrigin(str, Enum):
TOPLEFT = auto()
BOTTOMLEFT = auto()
class DoclingComponentType(str, Enum): class DoclingComponentType(str, Enum):
PDF_BACKEND = auto() PDF_BACKEND = auto()
MODEL = auto() MODEL = auto()
@ -41,115 +37,6 @@ class ErrorItem(BaseModel):
error_message: str error_message: str
class PageSize(BaseModel):
width: float = 0.0
height: float = 0.0
class BoundingBox(BaseModel):
l: float # left
t: float # top
r: float # right
b: float # bottom
coord_origin: CoordOrigin = CoordOrigin.TOPLEFT
@property
def width(self):
return self.r - self.l
@property
def height(self):
return abs(self.t - self.b)
def scaled(self, scale: float) -> "BoundingBox":
out_bbox = copy.deepcopy(self)
out_bbox.l *= scale
out_bbox.r *= scale
out_bbox.t *= scale
out_bbox.b *= scale
return out_bbox
def normalized(self, page_size: PageSize) -> "BoundingBox":
out_bbox = copy.deepcopy(self)
out_bbox.l /= page_size.width
out_bbox.r /= page_size.width
out_bbox.t /= page_size.height
out_bbox.b /= page_size.height
return out_bbox
def as_tuple(self):
if self.coord_origin == CoordOrigin.TOPLEFT:
return (self.l, self.t, self.r, self.b)
elif self.coord_origin == CoordOrigin.BOTTOMLEFT:
return (self.l, self.b, self.r, self.t)
@classmethod
def from_tuple(cls, coord: Tuple[float, ...], origin: CoordOrigin):
if origin == CoordOrigin.TOPLEFT:
l, t, r, b = coord[0], coord[1], coord[2], coord[3]
if r < l:
l, r = r, l
if b < t:
b, t = t, b
return BoundingBox(l=l, t=t, r=r, b=b, coord_origin=origin)
elif origin == CoordOrigin.BOTTOMLEFT:
l, b, r, t = coord[0], coord[1], coord[2], coord[3]
if r < l:
l, r = r, l
if b > t:
b, t = t, b
return BoundingBox(l=l, t=t, r=r, b=b, coord_origin=origin)
def area(self) -> float:
return (self.r - self.l) * (self.b - self.t)
def intersection_area_with(self, other: "BoundingBox") -> float:
# Calculate intersection coordinates
left = max(self.l, other.l)
top = max(self.t, other.t)
right = min(self.r, other.r)
bottom = min(self.b, other.b)
# Calculate intersection dimensions
width = right - left
height = bottom - top
# If the bounding boxes do not overlap, width or height will be negative
if width <= 0 or height <= 0:
return 0.0
return width * height
def to_bottom_left_origin(self, page_height) -> "BoundingBox":
if self.coord_origin == CoordOrigin.BOTTOMLEFT:
return self
elif self.coord_origin == CoordOrigin.TOPLEFT:
return BoundingBox(
l=self.l,
r=self.r,
t=page_height - self.t,
b=page_height - self.b,
coord_origin=CoordOrigin.BOTTOMLEFT,
)
def to_top_left_origin(self, page_height):
if self.coord_origin == CoordOrigin.TOPLEFT:
return self
elif self.coord_origin == CoordOrigin.BOTTOMLEFT:
return BoundingBox(
l=self.l,
r=self.r,
t=page_height - self.t, # self.b
b=page_height - self.b, # self.t
coord_origin=CoordOrigin.TOPLEFT,
)
class Cell(BaseModel): class Cell(BaseModel):
id: int id: int
text: str text: str
@ -266,7 +153,7 @@ class Page(BaseModel):
page_no: int page_no: int
page_hash: Optional[str] = None page_hash: Optional[str] = None
size: Optional[PageSize] = None size: Optional[Size] = None
cells: List[Cell] = [] cells: List[Cell] = []
predictions: PagePredictions = PagePredictions() predictions: PagePredictions = PagePredictions()
assembled: Optional[AssembledUnit] = None assembled: Optional[AssembledUnit] = None

View File

@ -4,13 +4,13 @@ from pathlib import Path, PurePath
from typing import ClassVar, Dict, Iterable, List, Optional, Tuple, Type, Union from typing import ClassVar, Dict, Iterable, List, Optional, Tuple, Type, Union
from docling_core.types import BaseCell, BaseText from docling_core.types import BaseCell, BaseText
from docling_core.types import BoundingBox as DsBoundingBox
from docling_core.types import Document as DsDocument from docling_core.types import Document as DsDocument
from docling_core.types import DocumentDescription as DsDocumentDescription from docling_core.types import DocumentDescription as DsDocumentDescription
from docling_core.types import FileInfoObject as DsFileInfoObject from docling_core.types import FileInfoObject as DsFileInfoObject
from docling_core.types import PageDimensions, PageReference, Prov, Ref from docling_core.types import PageDimensions, PageReference, Prov, Ref
from docling_core.types import Table as DsSchemaTable from docling_core.types import Table as DsSchemaTable
from docling_core.types import TableCell from docling_core.types import TableCell
from docling_core.types.doc.base import BoundingBox as DsBoundingBox
from docling_core.types.doc.base import Figure from docling_core.types.doc.base import Figure
from pydantic import BaseModel from pydantic import BaseModel
from typing_extensions import deprecated from typing_extensions import deprecated

View File

@ -5,11 +5,12 @@ from typing import Iterable, List, Tuple
import numpy import numpy
import numpy as np import numpy as np
from docling_core.types.experimental.base import BoundingBox, CoordOrigin
from PIL import Image, ImageDraw from PIL import Image, ImageDraw
from rtree import index from rtree import index
from scipy.ndimage import find_objects, label from scipy.ndimage import find_objects, label
from docling.datamodel.base_models import BoundingBox, CoordOrigin, OcrCell, Page from docling.datamodel.base_models import OcrCell, Page
_log = logging.getLogger(__name__) _log = logging.getLogger(__name__)

View File

@ -7,9 +7,10 @@ from deepsearch_glm.utils.load_pretrained_models import load_pretrained_nlp_mode
from docling_core.types import BaseText from docling_core.types import BaseText
from docling_core.types import Document as DsDocument from docling_core.types import Document as DsDocument
from docling_core.types import Ref from docling_core.types import Ref
from docling_core.types.experimental.base import BoundingBox, CoordOrigin
from PIL import ImageDraw from PIL import ImageDraw
from docling.datamodel.base_models import BoundingBox, Cluster, CoordOrigin from docling.datamodel.base_models import Cluster
from docling.datamodel.document import ConversionResult from docling.datamodel.document import ConversionResult

View File

@ -2,8 +2,9 @@ import logging
from typing import Iterable from typing import Iterable
import numpy import numpy
from docling_core.types.experimental.base import BoundingBox, CoordOrigin
from docling.datamodel.base_models import BoundingBox, CoordOrigin, OcrCell, Page from docling.datamodel.base_models import OcrCell, Page
from docling.models.base_ocr_model import BaseOcrModel from docling.models.base_ocr_model import BaseOcrModel
_log = logging.getLogger(__name__) _log = logging.getLogger(__name__)

View File

@ -4,6 +4,7 @@ import random
import time import time
from typing import Iterable, List from typing import Iterable, List
from docling_core.types.experimental.base import CoordOrigin
from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor
from PIL import ImageDraw from PIL import ImageDraw
@ -11,7 +12,6 @@ from docling.datamodel.base_models import (
BoundingBox, BoundingBox,
Cell, Cell,
Cluster, Cluster,
CoordOrigin,
LayoutPrediction, LayoutPrediction,
Page, Page,
) )

View File

@ -2,11 +2,11 @@ import copy
from typing import Iterable, List from typing import Iterable, List
import numpy import numpy
from docling_core.types.experimental.base import BoundingBox
from docling_ibm_models.tableformer.data_management.tf_predictor import TFPredictor from docling_ibm_models.tableformer.data_management.tf_predictor import TFPredictor
from PIL import ImageDraw from PIL import ImageDraw
from docling.datamodel.base_models import ( from docling.datamodel.base_models import (
BoundingBox,
Page, Page,
TableCell, TableCell,
TableElement, TableElement,

View File

@ -1,9 +1,17 @@
import logging import logging
from typing import Any, Dict, Iterable, List, Tuple, Union from typing import Any, Dict, Iterable, List, Tuple, Union
from docling_core.types.doc.base import BaseCell, BaseText, Ref, Table, TableCell from docling_core.types.doc.base import (
BaseCell,
BaseText,
BoundingBox,
Ref,
Table,
TableCell,
)
from docling_core.types.experimental.base import CoordOrigin
from docling.datamodel.base_models import BoundingBox, CoordOrigin, OcrCell from docling.datamodel.base_models import OcrCell
from docling.datamodel.document import ConversionResult, Page from docling.datamodel.document import ConversionResult, Page
_log = logging.getLogger(__name__) _log = logging.getLogger(__name__)

100
poetry.lock generated
View File

@ -857,50 +857,33 @@ name = "deepsearch-glm"
version = "0.21.1" version = "0.21.1"
description = "Graph Language Models" description = "Graph Language Models"
optional = false optional = false
python-versions = "<4.0,>=3.8" python-versions = "^3.9"
files = [ files = []
{file = "deepsearch_glm-0.21.1-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:b765d371ab0a4f57dd2532c651d7dc1b4a187395153e619a77b6f0d0f6aefb32"}, develop = false
{file = "deepsearch_glm-0.21.1-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:c69e055b98d0a22267a1d0b6139801aecc5b7386289b89f53f976ab723352728"},
{file = "deepsearch_glm-0.21.1-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:3eaa245e5ac4ab3e9d0c95a93e23f58d61d70f11431b76b6705fae358eb31c62"},
{file = "deepsearch_glm-0.21.1-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:63d195f6c5b30f4f908436589cffd4a5b9e18553c44c57fb635068a2afbd7fab"},
{file = "deepsearch_glm-0.21.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91c9296a2e417a30bf030de0c7c2e2cce4773c58bead039d5e6fccbf7deb2269"},
{file = "deepsearch_glm-0.21.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:166b9958d3a8a98d0671a1e3fdf8083ded9ccf12c2ab80fb9709908a2cf81784"},
{file = "deepsearch_glm-0.21.1-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:13bea2b4e8c04647ec743c3feb1ee66c784db542ab9dbed8dad7eb66fca74b70"},
{file = "deepsearch_glm-0.21.1-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:c5b8b8e2207615ff99e535f00548c7b0b8e4ca4593e59edd83fcad98fc318284"},
{file = "deepsearch_glm-0.21.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:ba74868243caf5ac850fff7c45c8a372c1cac0193431e22eb41888d45ac79719"},
{file = "deepsearch_glm-0.21.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:7815b06aa1c3953488496f191ce0265d0ee7bed5a6b96454a5f9d6f1add28f69"},
{file = "deepsearch_glm-0.21.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d1a7dd2a1e63cee47f6090ebfebc15f68d24f61d5f4f45a21f22120b2267798d"},
{file = "deepsearch_glm-0.21.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d52bd2934a27fdc9db5f2d0713dbeec0c94e5c5843d29996e85d641a11498ad0"},
{file = "deepsearch_glm-0.21.1-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:fd4d0d4ff853e566b05769c704a4ea3c050c0cfc5721e4e2035e550fb2a8fe91"},
{file = "deepsearch_glm-0.21.1-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:802a59a8a3bea1801bce848d58d19fcdbbcea27d9e2c23f163419d13cdec2345"},
{file = "deepsearch_glm-0.21.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:1ead7958bc044000a8d43cce53c9b82be0d341b0ca5cf7b39a0c09f9c4fd8ceb"},
{file = "deepsearch_glm-0.21.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:312cf2b0b6560c8dfe5331a5a80a0ed5cb409d29ee6cc999a81696774d50f5e7"},
{file = "deepsearch_glm-0.21.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc3d6f6ca2cffbe5e112818c8aba9a783af8ab7cffff04624bfb5bf8d185b707"},
{file = "deepsearch_glm-0.21.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8bc537d5e9d108233b7e7249c6739292dc9c36a0f39c11e7f430700df35ff884"},
{file = "deepsearch_glm-0.21.1-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:4db0a700c08ff2d6285461dc5f4a68ccd36876a59b62131f847dc4be76a85989"},
{file = "deepsearch_glm-0.21.1-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:f1041c44d1a4d1a43a324781795b03edfdfd8076c49a610c4dd384c86f2a6236"},
{file = "deepsearch_glm-0.21.1-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:efb0e9678fe07640bd9b6dc07651eaf1f8e5d5602e379b4cf78dbcddc62b50e9"},
{file = "deepsearch_glm-0.21.1-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:f8d46922d74339ec7fd7a6933220ebc36b2ff39738ad9bb74ea55a198dd31b2f"},
{file = "deepsearch_glm-0.21.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2872de101ce6d262f57afd3f4d68452064c214c5ab001b7ac698a948e0725314"},
{file = "deepsearch_glm-0.21.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:187da7dabc11317badbf6983ee508c367299eb39ed78938623206be6b21e41bd"},
]
[package.dependencies] [package.dependencies]
docling-core = {git = "ssh://git@github.com/DS4SD/docling-core.git", branch = "cau/new-format-dev"}
docutils = "!=0.21" docutils = "!=0.21"
matplotlib = ">=3.7.1,<4.0.0" matplotlib = "^3.7.1"
networkx = ">=3.1,<4.0" networkx = "^3.1"
netwulf = ">=0.1.5,<0.2.0" netwulf = "^0.1.5"
numerize = ">=0.12,<0.13" numerize = "^0.12"
numpy = {version = ">=1.26.4,<2.0.0", markers = "python_version >= \"3.9\""} numpy = {version = "^1.26.4", markers = "python_version >= \"3.9\""}
pandas = ">=1.5.1" pandas = ">=1.5.1"
python-dotenv = ">=1.0.0,<2.0.0" python-dotenv = "^1.0.0"
rich = ">=13.7.0,<14.0.0" rich = "^13.7.0"
tabulate = ">=0.8.9" tabulate = ">=0.8.9"
tqdm = ">=4.64.0,<5.0.0" tqdm = "^4.64.0"
[package.extras] [package.extras]
toolkit = ["deepsearch-toolkit (>=0.31.0)"] toolkit = ["deepsearch-toolkit (>=0.31.0)"]
[package.source]
type = "git"
url = "ssh://git@github.com/DS4SD/deepsearch-glm.git"
reference = "cau/new-format-dev"
resolved_reference = "6d86b7ddaa8911ec57df9bbabf981a42166e53d2"
[[package]] [[package]]
name = "deprecated" name = "deprecated"
version = "1.2.14" version = "1.2.14"
@ -957,23 +940,27 @@ files = [
[[package]] [[package]]
name = "docling-core" name = "docling-core"
version = "1.4.0" version = "1.4.1"
description = "A python library to define and validate data types in Docling." description = "A python library to define and validate data types in Docling."
optional = false optional = false
python-versions = "<4.0,>=3.9" python-versions = "^3.9"
files = [ files = []
{file = "docling_core-1.4.0-py3-none-any.whl", hash = "sha256:11cd6228d5f321fd11427cf61f40148afd544170e82236228794300f14f8a15a"}, develop = false
{file = "docling_core-1.4.0.tar.gz", hash = "sha256:6ea151974172a87a9bca0d63787dc16bdb4170ecb73f18e61e3c2e95eb3fe3d8"},
]
[package.dependencies] [package.dependencies]
json-schema-for-humans = ">=1.0.0,<2.0.0" json-schema-for-humans = "^1.0.0"
jsonref = ">=1.1.0,<2.0.0" jsonref = "^1.1.0"
jsonschema = ">=4.16.0,<5.0.0" jsonschema = "^4.16.0"
pandas = ">=2.2.2,<3.0.0" pandas = "^2.2.2"
pydantic = ">=2.6.0,<3.0.0" pydantic = "^2.6.0"
pyproject-toml = ">=0.0.10,<0.0.11" pyproject-toml = "^0.0.10"
tabulate = ">=0.9.0,<0.10.0" tabulate = "^0.9.0"
[package.source]
type = "git"
url = "ssh://git@github.com/DS4SD/docling-core.git"
reference = "cau/new-format-dev"
resolved_reference = "ed087646ec9ad86c5b54eb37d7b99322d03487f0"
[[package]] [[package]]
name = "docling-ibm-models" name = "docling-ibm-models"
@ -4697,6 +4684,21 @@ files = [
[package.dependencies] [package.dependencies]
six = ">=1.5" six = ">=1.5"
[[package]]
name = "python-docx"
version = "1.1.2"
description = "Create, read, and update Microsoft Word .docx files."
optional = false
python-versions = ">=3.7"
files = [
{file = "python_docx-1.1.2-py3-none-any.whl", hash = "sha256:08c20d6058916fb19853fcf080f7f42b6270d89eac9fa5f8c15f691c0017fabe"},
{file = "python_docx-1.1.2.tar.gz", hash = "sha256:0cf1f22e95b9002addca7948e16f2cd7acdfd498047f1941ca5d293db7762efd"},
]
[package.dependencies]
lxml = ">=3.1.0"
typing-extensions = ">=4.9.0"
[[package]] [[package]]
name = "python-dotenv" name = "python-dotenv"
version = "1.0.1" version = "1.0.1"
@ -7257,4 +7259,4 @@ examples = ["langchain-huggingface", "langchain-milvus", "langchain-text-splitte
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = "^3.10" python-versions = "^3.10"
content-hash = "7dc789b3c981898fdabec03f85ebb92273f2bb55b2bf1e18dad1d4c361c6b97b" content-hash = "1b908180d822d74ae8033e8b6c650b8d00b4365fc7dd36cea6505305651b79b6"

View File

@ -23,9 +23,10 @@ packages = [{include = "docling"}]
[tool.poetry.dependencies] [tool.poetry.dependencies]
python = "^3.10" python = "^3.10"
pydantic = "^2.0.0" pydantic = "^2.0.0"
docling-core = "^1.4.0" docling-core = {git = "ssh://git@github.com/DS4SD/docling-core.git", branch = "cau/new-format-dev"}
docling-ibm-models = "^1.2.0" docling-ibm-models = "^1.2.0"
deepsearch-glm = "^0.21.1" deepsearch-glm = {git = "ssh://git@github.com/DS4SD/deepsearch-glm.git", branch = "cau/new-format-dev"}
filetype = "^1.2.0" filetype = "^1.2.0"
pypdfium2 = "^4.30.0" pypdfium2 = "^4.30.0"
pydantic-settings = "^2.3.0" pydantic-settings = "^2.3.0"
@ -61,6 +62,7 @@ torchvision = [
{version = "~0.17.2", optional = true, markers = "sys_platform == 'darwin' and platform_machine == 'x86_64'"} {version = "~0.17.2", optional = true, markers = "sys_platform == 'darwin' and platform_machine == 'x86_64'"}
] ]
typer = "^0.12.5" typer = "^0.12.5"
python-docx = "^1.1.2"
[tool.poetry.group.dev.dependencies] [tool.poetry.group.dev.dependencies]
black = {extras = ["jupyter"], version = "^24.4.2"} black = {extras = ["jupyter"], version = "^24.4.2"}

View File

@ -1,12 +1,12 @@
from pathlib import Path from pathlib import Path
import pytest import pytest
from docling_core.types.experimental.base import BoundingBox
from docling.backend.docling_parse_backend import ( from docling.backend.docling_parse_backend import (
DoclingParseDocumentBackend, DoclingParseDocumentBackend,
DoclingParsePageBackend, DoclingParsePageBackend,
) )
from docling.datamodel.base_models import BoundingBox
@pytest.fixture @pytest.fixture

View File

@ -1,12 +1,12 @@
from pathlib import Path from pathlib import Path
import pytest import pytest
from docling_core.types.experimental.base import BoundingBox
from docling.backend.pypdfium2_backend import ( from docling.backend.pypdfium2_backend import (
PyPdfiumDocumentBackend, PyPdfiumDocumentBackend,
PyPdfiumPageBackend, PyPdfiumPageBackend,
) )
from docling.datamodel.base_models import BoundingBox
@pytest.fixture @pytest.fixture