mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 20:14:47 +00:00
Reorganize imports from docling-core
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
parent
6dd1e91c4a
commit
abb6dddea8
@ -3,10 +3,11 @@ from io import BytesIO
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import TYPE_CHECKING, Any, Iterable, Optional, Union
|
from typing import TYPE_CHECKING, Any, Iterable, Optional, Union
|
||||||
|
|
||||||
|
from docling_core.types.experimental.base import BoundingBox, Size
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from docling.datamodel.base_models import BoundingBox, Cell, PageSize
|
from docling.datamodel.base_models import Cell
|
||||||
|
|
||||||
|
|
||||||
class PdfPageBackend(ABC):
|
class PdfPageBackend(ABC):
|
||||||
@ -30,7 +31,7 @@ class PdfPageBackend(ABC):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def get_size(self) -> "PageSize":
|
def get_size(self) -> "Size":
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
|
@ -5,12 +5,13 @@ from pathlib import Path
|
|||||||
from typing import Iterable, List, Optional, Union
|
from typing import Iterable, List, Optional, Union
|
||||||
|
|
||||||
import pypdfium2 as pdfium
|
import pypdfium2 as pdfium
|
||||||
|
from docling_core.types.experimental.base import BoundingBox, CoordOrigin, Size
|
||||||
from docling_parse.docling_parse import pdf_parser
|
from docling_parse.docling_parse import pdf_parser
|
||||||
from PIL import Image, ImageDraw
|
from PIL import Image, ImageDraw
|
||||||
from pypdfium2 import PdfPage
|
from pypdfium2 import PdfPage
|
||||||
|
|
||||||
from docling.backend.abstract_backend import PdfDocumentBackend, PdfPageBackend
|
from docling.backend.abstract_backend import PdfDocumentBackend, PdfPageBackend
|
||||||
from docling.datamodel.base_models import BoundingBox, Cell, CoordOrigin, PageSize
|
from docling.datamodel.base_models import Cell
|
||||||
|
|
||||||
_log = logging.getLogger(__name__)
|
_log = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -177,8 +178,8 @@ class DoclingParsePageBackend(PdfPageBackend):
|
|||||||
|
|
||||||
return image
|
return image
|
||||||
|
|
||||||
def get_size(self) -> PageSize:
|
def get_size(self) -> Size:
|
||||||
return PageSize(width=self._ppage.get_width(), height=self._ppage.get_height())
|
return Size(width=self._ppage.get_width(), height=self._ppage.get_height())
|
||||||
|
|
||||||
def unload(self):
|
def unload(self):
|
||||||
self._ppage = None
|
self._ppage = None
|
||||||
|
@ -6,12 +6,13 @@ from typing import Iterable, List, Optional, Union
|
|||||||
|
|
||||||
import pypdfium2 as pdfium
|
import pypdfium2 as pdfium
|
||||||
import pypdfium2.raw as pdfium_c
|
import pypdfium2.raw as pdfium_c
|
||||||
|
from docling_core.types.experimental.base import BoundingBox, CoordOrigin, Size
|
||||||
from PIL import Image, ImageDraw
|
from PIL import Image, ImageDraw
|
||||||
from pypdfium2 import PdfPage, PdfTextPage
|
from pypdfium2 import PdfPage, PdfTextPage
|
||||||
from pypdfium2._helpers.misc import PdfiumError
|
from pypdfium2._helpers.misc import PdfiumError
|
||||||
|
|
||||||
from docling.backend.abstract_backend import PdfDocumentBackend, PdfPageBackend
|
from docling.backend.abstract_backend import PdfDocumentBackend, PdfPageBackend
|
||||||
from docling.datamodel.base_models import BoundingBox, Cell, CoordOrigin, PageSize
|
from docling.datamodel.base_models import Cell
|
||||||
|
|
||||||
_log = logging.getLogger(__name__)
|
_log = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -222,8 +223,8 @@ class PyPdfiumPageBackend(PdfPageBackend):
|
|||||||
|
|
||||||
return image
|
return image
|
||||||
|
|
||||||
def get_size(self) -> PageSize:
|
def get_size(self) -> Size:
|
||||||
return PageSize(width=self._ppage.get_width(), height=self._ppage.get_height())
|
return Size(width=self._ppage.get_width(), height=self._ppage.get_height())
|
||||||
|
|
||||||
def unload(self):
|
def unload(self):
|
||||||
self._ppage = None
|
self._ppage = None
|
||||||
|
@ -4,6 +4,7 @@ from enum import Enum, auto
|
|||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from typing import Annotated, Any, Dict, List, Optional, Tuple, Union
|
from typing import Annotated, Any, Dict, List, Optional, Tuple, Union
|
||||||
|
|
||||||
|
from docling_core.types.experimental.base import BoundingBox, Size
|
||||||
from PIL.Image import Image
|
from PIL.Image import Image
|
||||||
from pydantic import BaseModel, ConfigDict, Field, model_validator
|
from pydantic import BaseModel, ConfigDict, Field, model_validator
|
||||||
from typing_extensions import Self
|
from typing_extensions import Self
|
||||||
@ -24,11 +25,6 @@ class DocInputType(str, Enum):
|
|||||||
STREAM = auto()
|
STREAM = auto()
|
||||||
|
|
||||||
|
|
||||||
class CoordOrigin(str, Enum):
|
|
||||||
TOPLEFT = auto()
|
|
||||||
BOTTOMLEFT = auto()
|
|
||||||
|
|
||||||
|
|
||||||
class DoclingComponentType(str, Enum):
|
class DoclingComponentType(str, Enum):
|
||||||
PDF_BACKEND = auto()
|
PDF_BACKEND = auto()
|
||||||
MODEL = auto()
|
MODEL = auto()
|
||||||
@ -41,115 +37,6 @@ class ErrorItem(BaseModel):
|
|||||||
error_message: str
|
error_message: str
|
||||||
|
|
||||||
|
|
||||||
class PageSize(BaseModel):
|
|
||||||
width: float = 0.0
|
|
||||||
height: float = 0.0
|
|
||||||
|
|
||||||
|
|
||||||
class BoundingBox(BaseModel):
|
|
||||||
l: float # left
|
|
||||||
t: float # top
|
|
||||||
r: float # right
|
|
||||||
b: float # bottom
|
|
||||||
|
|
||||||
coord_origin: CoordOrigin = CoordOrigin.TOPLEFT
|
|
||||||
|
|
||||||
@property
|
|
||||||
def width(self):
|
|
||||||
return self.r - self.l
|
|
||||||
|
|
||||||
@property
|
|
||||||
def height(self):
|
|
||||||
return abs(self.t - self.b)
|
|
||||||
|
|
||||||
def scaled(self, scale: float) -> "BoundingBox":
|
|
||||||
out_bbox = copy.deepcopy(self)
|
|
||||||
out_bbox.l *= scale
|
|
||||||
out_bbox.r *= scale
|
|
||||||
out_bbox.t *= scale
|
|
||||||
out_bbox.b *= scale
|
|
||||||
|
|
||||||
return out_bbox
|
|
||||||
|
|
||||||
def normalized(self, page_size: PageSize) -> "BoundingBox":
|
|
||||||
out_bbox = copy.deepcopy(self)
|
|
||||||
out_bbox.l /= page_size.width
|
|
||||||
out_bbox.r /= page_size.width
|
|
||||||
out_bbox.t /= page_size.height
|
|
||||||
out_bbox.b /= page_size.height
|
|
||||||
|
|
||||||
return out_bbox
|
|
||||||
|
|
||||||
def as_tuple(self):
|
|
||||||
if self.coord_origin == CoordOrigin.TOPLEFT:
|
|
||||||
return (self.l, self.t, self.r, self.b)
|
|
||||||
elif self.coord_origin == CoordOrigin.BOTTOMLEFT:
|
|
||||||
return (self.l, self.b, self.r, self.t)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_tuple(cls, coord: Tuple[float, ...], origin: CoordOrigin):
|
|
||||||
if origin == CoordOrigin.TOPLEFT:
|
|
||||||
l, t, r, b = coord[0], coord[1], coord[2], coord[3]
|
|
||||||
if r < l:
|
|
||||||
l, r = r, l
|
|
||||||
if b < t:
|
|
||||||
b, t = t, b
|
|
||||||
|
|
||||||
return BoundingBox(l=l, t=t, r=r, b=b, coord_origin=origin)
|
|
||||||
elif origin == CoordOrigin.BOTTOMLEFT:
|
|
||||||
l, b, r, t = coord[0], coord[1], coord[2], coord[3]
|
|
||||||
if r < l:
|
|
||||||
l, r = r, l
|
|
||||||
if b > t:
|
|
||||||
b, t = t, b
|
|
||||||
|
|
||||||
return BoundingBox(l=l, t=t, r=r, b=b, coord_origin=origin)
|
|
||||||
|
|
||||||
def area(self) -> float:
|
|
||||||
return (self.r - self.l) * (self.b - self.t)
|
|
||||||
|
|
||||||
def intersection_area_with(self, other: "BoundingBox") -> float:
|
|
||||||
# Calculate intersection coordinates
|
|
||||||
left = max(self.l, other.l)
|
|
||||||
top = max(self.t, other.t)
|
|
||||||
right = min(self.r, other.r)
|
|
||||||
bottom = min(self.b, other.b)
|
|
||||||
|
|
||||||
# Calculate intersection dimensions
|
|
||||||
width = right - left
|
|
||||||
height = bottom - top
|
|
||||||
|
|
||||||
# If the bounding boxes do not overlap, width or height will be negative
|
|
||||||
if width <= 0 or height <= 0:
|
|
||||||
return 0.0
|
|
||||||
|
|
||||||
return width * height
|
|
||||||
|
|
||||||
def to_bottom_left_origin(self, page_height) -> "BoundingBox":
|
|
||||||
if self.coord_origin == CoordOrigin.BOTTOMLEFT:
|
|
||||||
return self
|
|
||||||
elif self.coord_origin == CoordOrigin.TOPLEFT:
|
|
||||||
return BoundingBox(
|
|
||||||
l=self.l,
|
|
||||||
r=self.r,
|
|
||||||
t=page_height - self.t,
|
|
||||||
b=page_height - self.b,
|
|
||||||
coord_origin=CoordOrigin.BOTTOMLEFT,
|
|
||||||
)
|
|
||||||
|
|
||||||
def to_top_left_origin(self, page_height):
|
|
||||||
if self.coord_origin == CoordOrigin.TOPLEFT:
|
|
||||||
return self
|
|
||||||
elif self.coord_origin == CoordOrigin.BOTTOMLEFT:
|
|
||||||
return BoundingBox(
|
|
||||||
l=self.l,
|
|
||||||
r=self.r,
|
|
||||||
t=page_height - self.t, # self.b
|
|
||||||
b=page_height - self.b, # self.t
|
|
||||||
coord_origin=CoordOrigin.TOPLEFT,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class Cell(BaseModel):
|
class Cell(BaseModel):
|
||||||
id: int
|
id: int
|
||||||
text: str
|
text: str
|
||||||
@ -266,7 +153,7 @@ class Page(BaseModel):
|
|||||||
|
|
||||||
page_no: int
|
page_no: int
|
||||||
page_hash: Optional[str] = None
|
page_hash: Optional[str] = None
|
||||||
size: Optional[PageSize] = None
|
size: Optional[Size] = None
|
||||||
cells: List[Cell] = []
|
cells: List[Cell] = []
|
||||||
predictions: PagePredictions = PagePredictions()
|
predictions: PagePredictions = PagePredictions()
|
||||||
assembled: Optional[AssembledUnit] = None
|
assembled: Optional[AssembledUnit] = None
|
||||||
|
@ -4,13 +4,13 @@ from pathlib import Path, PurePath
|
|||||||
from typing import ClassVar, Dict, Iterable, List, Optional, Tuple, Type, Union
|
from typing import ClassVar, Dict, Iterable, List, Optional, Tuple, Type, Union
|
||||||
|
|
||||||
from docling_core.types import BaseCell, BaseText
|
from docling_core.types import BaseCell, BaseText
|
||||||
from docling_core.types import BoundingBox as DsBoundingBox
|
|
||||||
from docling_core.types import Document as DsDocument
|
from docling_core.types import Document as DsDocument
|
||||||
from docling_core.types import DocumentDescription as DsDocumentDescription
|
from docling_core.types import DocumentDescription as DsDocumentDescription
|
||||||
from docling_core.types import FileInfoObject as DsFileInfoObject
|
from docling_core.types import FileInfoObject as DsFileInfoObject
|
||||||
from docling_core.types import PageDimensions, PageReference, Prov, Ref
|
from docling_core.types import PageDimensions, PageReference, Prov, Ref
|
||||||
from docling_core.types import Table as DsSchemaTable
|
from docling_core.types import Table as DsSchemaTable
|
||||||
from docling_core.types import TableCell
|
from docling_core.types import TableCell
|
||||||
|
from docling_core.types.doc.base import BoundingBox as DsBoundingBox
|
||||||
from docling_core.types.doc.base import Figure
|
from docling_core.types.doc.base import Figure
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from typing_extensions import deprecated
|
from typing_extensions import deprecated
|
||||||
|
@ -5,11 +5,12 @@ from typing import Iterable, List, Tuple
|
|||||||
|
|
||||||
import numpy
|
import numpy
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from docling_core.types.experimental.base import BoundingBox, CoordOrigin
|
||||||
from PIL import Image, ImageDraw
|
from PIL import Image, ImageDraw
|
||||||
from rtree import index
|
from rtree import index
|
||||||
from scipy.ndimage import find_objects, label
|
from scipy.ndimage import find_objects, label
|
||||||
|
|
||||||
from docling.datamodel.base_models import BoundingBox, CoordOrigin, OcrCell, Page
|
from docling.datamodel.base_models import OcrCell, Page
|
||||||
|
|
||||||
_log = logging.getLogger(__name__)
|
_log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@ -7,9 +7,10 @@ from deepsearch_glm.utils.load_pretrained_models import load_pretrained_nlp_mode
|
|||||||
from docling_core.types import BaseText
|
from docling_core.types import BaseText
|
||||||
from docling_core.types import Document as DsDocument
|
from docling_core.types import Document as DsDocument
|
||||||
from docling_core.types import Ref
|
from docling_core.types import Ref
|
||||||
|
from docling_core.types.experimental.base import BoundingBox, CoordOrigin
|
||||||
from PIL import ImageDraw
|
from PIL import ImageDraw
|
||||||
|
|
||||||
from docling.datamodel.base_models import BoundingBox, Cluster, CoordOrigin
|
from docling.datamodel.base_models import Cluster
|
||||||
from docling.datamodel.document import ConversionResult
|
from docling.datamodel.document import ConversionResult
|
||||||
|
|
||||||
|
|
||||||
|
@ -2,8 +2,9 @@ import logging
|
|||||||
from typing import Iterable
|
from typing import Iterable
|
||||||
|
|
||||||
import numpy
|
import numpy
|
||||||
|
from docling_core.types.experimental.base import BoundingBox, CoordOrigin
|
||||||
|
|
||||||
from docling.datamodel.base_models import BoundingBox, CoordOrigin, OcrCell, Page
|
from docling.datamodel.base_models import OcrCell, Page
|
||||||
from docling.models.base_ocr_model import BaseOcrModel
|
from docling.models.base_ocr_model import BaseOcrModel
|
||||||
|
|
||||||
_log = logging.getLogger(__name__)
|
_log = logging.getLogger(__name__)
|
||||||
|
@ -4,6 +4,7 @@ import random
|
|||||||
import time
|
import time
|
||||||
from typing import Iterable, List
|
from typing import Iterable, List
|
||||||
|
|
||||||
|
from docling_core.types.experimental.base import CoordOrigin
|
||||||
from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor
|
from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor
|
||||||
from PIL import ImageDraw
|
from PIL import ImageDraw
|
||||||
|
|
||||||
@ -11,7 +12,6 @@ from docling.datamodel.base_models import (
|
|||||||
BoundingBox,
|
BoundingBox,
|
||||||
Cell,
|
Cell,
|
||||||
Cluster,
|
Cluster,
|
||||||
CoordOrigin,
|
|
||||||
LayoutPrediction,
|
LayoutPrediction,
|
||||||
Page,
|
Page,
|
||||||
)
|
)
|
||||||
|
@ -2,11 +2,11 @@ import copy
|
|||||||
from typing import Iterable, List
|
from typing import Iterable, List
|
||||||
|
|
||||||
import numpy
|
import numpy
|
||||||
|
from docling_core.types.experimental.base import BoundingBox
|
||||||
from docling_ibm_models.tableformer.data_management.tf_predictor import TFPredictor
|
from docling_ibm_models.tableformer.data_management.tf_predictor import TFPredictor
|
||||||
from PIL import ImageDraw
|
from PIL import ImageDraw
|
||||||
|
|
||||||
from docling.datamodel.base_models import (
|
from docling.datamodel.base_models import (
|
||||||
BoundingBox,
|
|
||||||
Page,
|
Page,
|
||||||
TableCell,
|
TableCell,
|
||||||
TableElement,
|
TableElement,
|
||||||
|
@ -1,9 +1,17 @@
|
|||||||
import logging
|
import logging
|
||||||
from typing import Any, Dict, Iterable, List, Tuple, Union
|
from typing import Any, Dict, Iterable, List, Tuple, Union
|
||||||
|
|
||||||
from docling_core.types.doc.base import BaseCell, BaseText, Ref, Table, TableCell
|
from docling_core.types.doc.base import (
|
||||||
|
BaseCell,
|
||||||
|
BaseText,
|
||||||
|
BoundingBox,
|
||||||
|
Ref,
|
||||||
|
Table,
|
||||||
|
TableCell,
|
||||||
|
)
|
||||||
|
from docling_core.types.experimental.base import CoordOrigin
|
||||||
|
|
||||||
from docling.datamodel.base_models import BoundingBox, CoordOrigin, OcrCell
|
from docling.datamodel.base_models import OcrCell
|
||||||
from docling.datamodel.document import ConversionResult, Page
|
from docling.datamodel.document import ConversionResult, Page
|
||||||
|
|
||||||
_log = logging.getLogger(__name__)
|
_log = logging.getLogger(__name__)
|
||||||
|
100
poetry.lock
generated
100
poetry.lock
generated
@ -857,50 +857,33 @@ name = "deepsearch-glm"
|
|||||||
version = "0.21.1"
|
version = "0.21.1"
|
||||||
description = "Graph Language Models"
|
description = "Graph Language Models"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = "<4.0,>=3.8"
|
python-versions = "^3.9"
|
||||||
files = [
|
files = []
|
||||||
{file = "deepsearch_glm-0.21.1-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:b765d371ab0a4f57dd2532c651d7dc1b4a187395153e619a77b6f0d0f6aefb32"},
|
develop = false
|
||||||
{file = "deepsearch_glm-0.21.1-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:c69e055b98d0a22267a1d0b6139801aecc5b7386289b89f53f976ab723352728"},
|
|
||||||
{file = "deepsearch_glm-0.21.1-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:3eaa245e5ac4ab3e9d0c95a93e23f58d61d70f11431b76b6705fae358eb31c62"},
|
|
||||||
{file = "deepsearch_glm-0.21.1-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:63d195f6c5b30f4f908436589cffd4a5b9e18553c44c57fb635068a2afbd7fab"},
|
|
||||||
{file = "deepsearch_glm-0.21.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91c9296a2e417a30bf030de0c7c2e2cce4773c58bead039d5e6fccbf7deb2269"},
|
|
||||||
{file = "deepsearch_glm-0.21.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:166b9958d3a8a98d0671a1e3fdf8083ded9ccf12c2ab80fb9709908a2cf81784"},
|
|
||||||
{file = "deepsearch_glm-0.21.1-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:13bea2b4e8c04647ec743c3feb1ee66c784db542ab9dbed8dad7eb66fca74b70"},
|
|
||||||
{file = "deepsearch_glm-0.21.1-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:c5b8b8e2207615ff99e535f00548c7b0b8e4ca4593e59edd83fcad98fc318284"},
|
|
||||||
{file = "deepsearch_glm-0.21.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:ba74868243caf5ac850fff7c45c8a372c1cac0193431e22eb41888d45ac79719"},
|
|
||||||
{file = "deepsearch_glm-0.21.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:7815b06aa1c3953488496f191ce0265d0ee7bed5a6b96454a5f9d6f1add28f69"},
|
|
||||||
{file = "deepsearch_glm-0.21.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d1a7dd2a1e63cee47f6090ebfebc15f68d24f61d5f4f45a21f22120b2267798d"},
|
|
||||||
{file = "deepsearch_glm-0.21.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d52bd2934a27fdc9db5f2d0713dbeec0c94e5c5843d29996e85d641a11498ad0"},
|
|
||||||
{file = "deepsearch_glm-0.21.1-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:fd4d0d4ff853e566b05769c704a4ea3c050c0cfc5721e4e2035e550fb2a8fe91"},
|
|
||||||
{file = "deepsearch_glm-0.21.1-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:802a59a8a3bea1801bce848d58d19fcdbbcea27d9e2c23f163419d13cdec2345"},
|
|
||||||
{file = "deepsearch_glm-0.21.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:1ead7958bc044000a8d43cce53c9b82be0d341b0ca5cf7b39a0c09f9c4fd8ceb"},
|
|
||||||
{file = "deepsearch_glm-0.21.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:312cf2b0b6560c8dfe5331a5a80a0ed5cb409d29ee6cc999a81696774d50f5e7"},
|
|
||||||
{file = "deepsearch_glm-0.21.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc3d6f6ca2cffbe5e112818c8aba9a783af8ab7cffff04624bfb5bf8d185b707"},
|
|
||||||
{file = "deepsearch_glm-0.21.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8bc537d5e9d108233b7e7249c6739292dc9c36a0f39c11e7f430700df35ff884"},
|
|
||||||
{file = "deepsearch_glm-0.21.1-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:4db0a700c08ff2d6285461dc5f4a68ccd36876a59b62131f847dc4be76a85989"},
|
|
||||||
{file = "deepsearch_glm-0.21.1-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:f1041c44d1a4d1a43a324781795b03edfdfd8076c49a610c4dd384c86f2a6236"},
|
|
||||||
{file = "deepsearch_glm-0.21.1-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:efb0e9678fe07640bd9b6dc07651eaf1f8e5d5602e379b4cf78dbcddc62b50e9"},
|
|
||||||
{file = "deepsearch_glm-0.21.1-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:f8d46922d74339ec7fd7a6933220ebc36b2ff39738ad9bb74ea55a198dd31b2f"},
|
|
||||||
{file = "deepsearch_glm-0.21.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2872de101ce6d262f57afd3f4d68452064c214c5ab001b7ac698a948e0725314"},
|
|
||||||
{file = "deepsearch_glm-0.21.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:187da7dabc11317badbf6983ee508c367299eb39ed78938623206be6b21e41bd"},
|
|
||||||
]
|
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
|
docling-core = {git = "ssh://git@github.com/DS4SD/docling-core.git", branch = "cau/new-format-dev"}
|
||||||
docutils = "!=0.21"
|
docutils = "!=0.21"
|
||||||
matplotlib = ">=3.7.1,<4.0.0"
|
matplotlib = "^3.7.1"
|
||||||
networkx = ">=3.1,<4.0"
|
networkx = "^3.1"
|
||||||
netwulf = ">=0.1.5,<0.2.0"
|
netwulf = "^0.1.5"
|
||||||
numerize = ">=0.12,<0.13"
|
numerize = "^0.12"
|
||||||
numpy = {version = ">=1.26.4,<2.0.0", markers = "python_version >= \"3.9\""}
|
numpy = {version = "^1.26.4", markers = "python_version >= \"3.9\""}
|
||||||
pandas = ">=1.5.1"
|
pandas = ">=1.5.1"
|
||||||
python-dotenv = ">=1.0.0,<2.0.0"
|
python-dotenv = "^1.0.0"
|
||||||
rich = ">=13.7.0,<14.0.0"
|
rich = "^13.7.0"
|
||||||
tabulate = ">=0.8.9"
|
tabulate = ">=0.8.9"
|
||||||
tqdm = ">=4.64.0,<5.0.0"
|
tqdm = "^4.64.0"
|
||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
toolkit = ["deepsearch-toolkit (>=0.31.0)"]
|
toolkit = ["deepsearch-toolkit (>=0.31.0)"]
|
||||||
|
|
||||||
|
[package.source]
|
||||||
|
type = "git"
|
||||||
|
url = "ssh://git@github.com/DS4SD/deepsearch-glm.git"
|
||||||
|
reference = "cau/new-format-dev"
|
||||||
|
resolved_reference = "6d86b7ddaa8911ec57df9bbabf981a42166e53d2"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "deprecated"
|
name = "deprecated"
|
||||||
version = "1.2.14"
|
version = "1.2.14"
|
||||||
@ -957,23 +940,27 @@ files = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "docling-core"
|
name = "docling-core"
|
||||||
version = "1.4.0"
|
version = "1.4.1"
|
||||||
description = "A python library to define and validate data types in Docling."
|
description = "A python library to define and validate data types in Docling."
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = "<4.0,>=3.9"
|
python-versions = "^3.9"
|
||||||
files = [
|
files = []
|
||||||
{file = "docling_core-1.4.0-py3-none-any.whl", hash = "sha256:11cd6228d5f321fd11427cf61f40148afd544170e82236228794300f14f8a15a"},
|
develop = false
|
||||||
{file = "docling_core-1.4.0.tar.gz", hash = "sha256:6ea151974172a87a9bca0d63787dc16bdb4170ecb73f18e61e3c2e95eb3fe3d8"},
|
|
||||||
]
|
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
json-schema-for-humans = ">=1.0.0,<2.0.0"
|
json-schema-for-humans = "^1.0.0"
|
||||||
jsonref = ">=1.1.0,<2.0.0"
|
jsonref = "^1.1.0"
|
||||||
jsonschema = ">=4.16.0,<5.0.0"
|
jsonschema = "^4.16.0"
|
||||||
pandas = ">=2.2.2,<3.0.0"
|
pandas = "^2.2.2"
|
||||||
pydantic = ">=2.6.0,<3.0.0"
|
pydantic = "^2.6.0"
|
||||||
pyproject-toml = ">=0.0.10,<0.0.11"
|
pyproject-toml = "^0.0.10"
|
||||||
tabulate = ">=0.9.0,<0.10.0"
|
tabulate = "^0.9.0"
|
||||||
|
|
||||||
|
[package.source]
|
||||||
|
type = "git"
|
||||||
|
url = "ssh://git@github.com/DS4SD/docling-core.git"
|
||||||
|
reference = "cau/new-format-dev"
|
||||||
|
resolved_reference = "ed087646ec9ad86c5b54eb37d7b99322d03487f0"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "docling-ibm-models"
|
name = "docling-ibm-models"
|
||||||
@ -4697,6 +4684,21 @@ files = [
|
|||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
six = ">=1.5"
|
six = ">=1.5"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "python-docx"
|
||||||
|
version = "1.1.2"
|
||||||
|
description = "Create, read, and update Microsoft Word .docx files."
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
files = [
|
||||||
|
{file = "python_docx-1.1.2-py3-none-any.whl", hash = "sha256:08c20d6058916fb19853fcf080f7f42b6270d89eac9fa5f8c15f691c0017fabe"},
|
||||||
|
{file = "python_docx-1.1.2.tar.gz", hash = "sha256:0cf1f22e95b9002addca7948e16f2cd7acdfd498047f1941ca5d293db7762efd"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
lxml = ">=3.1.0"
|
||||||
|
typing-extensions = ">=4.9.0"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "python-dotenv"
|
name = "python-dotenv"
|
||||||
version = "1.0.1"
|
version = "1.0.1"
|
||||||
@ -7257,4 +7259,4 @@ examples = ["langchain-huggingface", "langchain-milvus", "langchain-text-splitte
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = "^3.10"
|
python-versions = "^3.10"
|
||||||
content-hash = "7dc789b3c981898fdabec03f85ebb92273f2bb55b2bf1e18dad1d4c361c6b97b"
|
content-hash = "1b908180d822d74ae8033e8b6c650b8d00b4365fc7dd36cea6505305651b79b6"
|
||||||
|
@ -23,9 +23,10 @@ packages = [{include = "docling"}]
|
|||||||
[tool.poetry.dependencies]
|
[tool.poetry.dependencies]
|
||||||
python = "^3.10"
|
python = "^3.10"
|
||||||
pydantic = "^2.0.0"
|
pydantic = "^2.0.0"
|
||||||
docling-core = "^1.4.0"
|
docling-core = {git = "ssh://git@github.com/DS4SD/docling-core.git", branch = "cau/new-format-dev"}
|
||||||
docling-ibm-models = "^1.2.0"
|
docling-ibm-models = "^1.2.0"
|
||||||
deepsearch-glm = "^0.21.1"
|
deepsearch-glm = {git = "ssh://git@github.com/DS4SD/deepsearch-glm.git", branch = "cau/new-format-dev"}
|
||||||
|
|
||||||
filetype = "^1.2.0"
|
filetype = "^1.2.0"
|
||||||
pypdfium2 = "^4.30.0"
|
pypdfium2 = "^4.30.0"
|
||||||
pydantic-settings = "^2.3.0"
|
pydantic-settings = "^2.3.0"
|
||||||
@ -61,6 +62,7 @@ torchvision = [
|
|||||||
{version = "~0.17.2", optional = true, markers = "sys_platform == 'darwin' and platform_machine == 'x86_64'"}
|
{version = "~0.17.2", optional = true, markers = "sys_platform == 'darwin' and platform_machine == 'x86_64'"}
|
||||||
]
|
]
|
||||||
typer = "^0.12.5"
|
typer = "^0.12.5"
|
||||||
|
python-docx = "^1.1.2"
|
||||||
|
|
||||||
[tool.poetry.group.dev.dependencies]
|
[tool.poetry.group.dev.dependencies]
|
||||||
black = {extras = ["jupyter"], version = "^24.4.2"}
|
black = {extras = ["jupyter"], version = "^24.4.2"}
|
||||||
|
@ -1,12 +1,12 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
from docling_core.types.experimental.base import BoundingBox
|
||||||
|
|
||||||
from docling.backend.docling_parse_backend import (
|
from docling.backend.docling_parse_backend import (
|
||||||
DoclingParseDocumentBackend,
|
DoclingParseDocumentBackend,
|
||||||
DoclingParsePageBackend,
|
DoclingParsePageBackend,
|
||||||
)
|
)
|
||||||
from docling.datamodel.base_models import BoundingBox
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
|
@ -1,12 +1,12 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
from docling_core.types.experimental.base import BoundingBox
|
||||||
|
|
||||||
from docling.backend.pypdfium2_backend import (
|
from docling.backend.pypdfium2_backend import (
|
||||||
PyPdfiumDocumentBackend,
|
PyPdfiumDocumentBackend,
|
||||||
PyPdfiumPageBackend,
|
PyPdfiumPageBackend,
|
||||||
)
|
)
|
||||||
from docling.datamodel.base_models import BoundingBox
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
|
Loading…
Reference in New Issue
Block a user