Lots of import refactoring

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer 2024-09-23 12:22:49 +02:00
parent ac51a09065
commit 12477c8cac
7 changed files with 26 additions and 63 deletions

View File

@ -5,6 +5,7 @@ from io import BytesIO
from typing import Annotated, Any, Dict, List, Optional, Tuple, Union
from docling_core.types.experimental.base import BoundingBox, Size
from docling_core.types.experimental.document import BaseFigureData, TableCell
from PIL.Image import Image
from pydantic import BaseModel, ConfigDict, Field, model_validator
from typing_extensions import Self
@ -67,37 +68,7 @@ class LayoutPrediction(BaseModel):
clusters: List[Cluster] = []
class TableCell(BaseModel):
bbox: BoundingBox
row_span: int
col_span: int
start_row_offset_idx: int
end_row_offset_idx: int
start_col_offset_idx: int
end_col_offset_idx: int
text: str
column_header: bool = False
row_header: bool = False
row_section: bool = False
@model_validator(mode="before")
@classmethod
def from_dict_format(cls, data: Any) -> Any:
if isinstance(data, Dict):
text = data["bbox"].get("token", "")
if not len(text):
text_cells = data.pop("text_cell_bboxes", None)
if text_cells:
for el in text_cells:
text += el["token"] + " "
text = text.strip()
data["text"] = text
return data
class TableElement(BasePageElement):
class Table(BasePageElement):
otsl_seq: List[str]
num_rows: int = 0
num_cols: int = 0
@ -105,18 +76,14 @@ class TableElement(BasePageElement):
class TableStructurePrediction(BaseModel):
table_map: Dict[int, TableElement] = {}
table_map: Dict[int, Table] = {}
class TextElement(BasePageElement): ...
class FigureData(BaseModel):
pass
class FigureElement(BasePageElement):
data: Optional[FigureData] = None
data: Optional[BaseFigureData] = None
provenance: Optional[str] = None
predicted_class: Optional[str] = None
confidence: Optional[float] = None
@ -139,7 +106,7 @@ class PagePredictions(BaseModel):
equations_prediction: Optional[EquationPrediction] = None
PageElement = Union[TextElement, TableElement, FigureElement]
PageElement = Union[TextElement, Table, FigureElement]
class AssembledUnit(BaseModel):

View File

@ -9,9 +9,8 @@ from docling_core.types import DocumentDescription as DsDocumentDescription
from docling_core.types import FileInfoObject as DsFileInfoObject
from docling_core.types import PageDimensions, PageReference, Prov, Ref
from docling_core.types import Table as DsSchemaTable
from docling_core.types import TableCell
from docling_core.types.doc.base import BoundingBox as DsBoundingBox
from docling_core.types.doc.base import Figure
from docling_core.types.doc.base import Figure, TableCell
from pydantic import BaseModel
from typing_extensions import deprecated
@ -25,7 +24,7 @@ from docling.datamodel.base_models import (
FigureElement,
Page,
PageElement,
TableElement,
Table,
TextElement,
)
from docling.datamodel.settings import DocumentLimits
@ -186,7 +185,7 @@ class ConvertedDocument(BaseModel):
],
)
)
elif isinstance(element, TableElement):
elif isinstance(element, Table):
index = len(tables)
ref_str = f"#/tables/{index}"
main_text.append(

View File

@ -7,7 +7,7 @@ from docling.datamodel.base_models import (
FigureElement,
Page,
PageElement,
TableElement,
Table,
TextElement,
)
from docling.models.layout_model import LayoutModel
@ -84,7 +84,7 @@ class PageAssembleModel:
if (
not tbl
): # fallback: add table without structure, if it isn't present
tbl = TableElement(
tbl = Table(
label=cluster.label,
id=cluster.id,
text="",

View File

@ -3,15 +3,11 @@ from typing import Iterable, List
import numpy
from docling_core.types.experimental.base import BoundingBox
from docling_core.types.experimental.document import TableCell
from docling_ibm_models.tableformer.data_management.tf_predictor import TFPredictor
from PIL import ImageDraw
from docling.datamodel.base_models import (
Page,
TableCell,
TableElement,
TableStructurePrediction,
)
from docling.datamodel.base_models import Page, Table, TableStructurePrediction
class TableStructureModel:
@ -32,7 +28,7 @@ class TableStructureModel:
self.tf_predictor = TFPredictor(self.tm_config)
self.scale = 2.0 # Scale up table input images to 144 dpi
def draw_table_and_cells(self, page: Page, tbl_list: List[TableElement]):
def draw_table_and_cells(self, page: Page, tbl_list: List[Table]):
image = (
page._backend.get_page_image()
) # make new image to avoid drawing on the saved ones
@ -134,7 +130,7 @@ class TableStructureModel:
num_cols = table_out["predict_details"]["num_cols"]
otsl_seq = table_out["predict_details"]["prediction"]["rs_seq"]
tbl = TableElement(
tbl = Table(
otsl_seq=otsl_seq,
table_cells=table_cells,
num_rows=num_rows,

View File

@ -1,14 +1,7 @@
import logging
from typing import Any, Dict, Iterable, List, Tuple, Union
from docling_core.types.doc.base import (
BaseCell,
BaseText,
BoundingBox,
Ref,
Table,
TableCell,
)
from docling_core.types.doc.base import BaseCell, BaseText, BoundingBox, Ref, Table
from docling_core.types.experimental.base import CoordOrigin
from docling.datamodel.base_models import OcrCell

View File

@ -32,8 +32,16 @@ def export_documents(
with (output_dir / f"{doc_filename}.json").open("w") as fp:
fp.write(json.dumps(conv_res.render_as_dict()))
# Export Docling document format to JSON (experimental):
with (output_dir / f"{doc_filename}.experimental.json").open("w") as fp:
fp.write(
json.dumps(
conv_res.experimental.model_dump(mode="json", by_alias=True)
)
)
# Export Docling document format to YAML (experimental):
with (output_dir / f"{doc_filename}.yaml").open("w") as fp:
with (output_dir / f"{doc_filename}.experimental.yaml").open("w") as fp:
fp.write(
yaml.safe_dump(
conv_res.experimental.model_dump(mode="json", by_alias=True)

View File

@ -8,7 +8,7 @@ from docling.datamodel.base_models import (
ConversionStatus,
FigureElement,
PageElement,
TableElement,
Table,
)
from docling.datamodel.document import DocumentConversionInput
from docling.document_converter import DocumentConverter
@ -61,7 +61,7 @@ def main():
# Export figures and tables
for element, image in conv_res.render_element_images(
element_types=(FigureElement, TableElement)
element_types=(FigureElement, Table)
):
element_image_filename = (
output_dir / f"{doc_filename}-element-{element.id}.png"