From e4f8ff3980c34bf98830f4a36fa81472042cbdc1 Mon Sep 17 00:00:00 2001 From: Michele Dolfi Date: Tue, 14 Jan 2025 17:54:55 +0100 Subject: [PATCH] renaming Signed-off-by: Michele Dolfi --- docling/datamodel/base_models.py | 10 +++---- docling/models/base_model.py | 10 ++++--- docling/models/page_assemble_model.py | 4 +-- docling/pipeline/base_pipeline.py | 7 +++-- .../examples/develop_formula_understanding.py | 30 ++++++++++--------- 5 files changed, 33 insertions(+), 28 deletions(-) diff --git a/docling/datamodel/base_models.py b/docling/datamodel/base_models.py index 6dac4672..99d30108 100644 --- a/docling/datamodel/base_models.py +++ b/docling/datamodel/base_models.py @@ -4,10 +4,10 @@ from typing import TYPE_CHECKING, Dict, List, Optional, Union from docling_core.types.doc import ( BoundingBox, DocItemLabel, + NodeItem, PictureDataType, Size, TableCell, - TextItem, ) from docling_core.types.io import ( # DO ΝΟΤ REMOVE; explicitly exposed from this location DocumentStream, @@ -202,12 +202,12 @@ class AssembledUnit(BaseModel): headers: List[PageElement] = [] -class TextImageEnrichmentElement(BaseModel): - element: TextItem - image: Image - +class ItemAndImageEnrichmentElement(BaseModel): model_config = ConfigDict(arbitrary_types_allowed=True) + item: NodeItem + image: Image + class Page(BaseModel): model_config = ConfigDict(arbitrary_types_allowed=True) diff --git a/docling/models/base_model.py b/docling/models/base_model.py index 7cc764b6..5a98382a 100644 --- a/docling/models/base_model.py +++ b/docling/models/base_model.py @@ -4,7 +4,7 @@ from typing import Any, Generic, Iterable, Optional from docling_core.types.doc import DoclingDocument, NodeItem, TextItem from typing_extensions import TypeVar -from docling.datamodel.base_models import Page, TextImageEnrichmentElement +from docling.datamodel.base_models import ItemAndImageEnrichmentElement, Page from docling.datamodel.document import ConversionResult @@ -48,13 +48,15 @@ class BaseEnrichmentModel(GenericEnrichmentModel[NodeItem]): return None -class BaseTextImageEnrichmentModel(GenericEnrichmentModel[TextImageEnrichmentElement]): +class BaseItemAndImageEnrichmentModel( + GenericEnrichmentModel[ItemAndImageEnrichmentElement] +): images_scale: float def prepare_element( self, conv_res: ConversionResult, element: NodeItem - ) -> Optional[TextImageEnrichmentElement]: + ) -> Optional[ItemAndImageEnrichmentElement]: if not self.is_processable(doc=conv_res.document, element=element): return None @@ -64,4 +66,4 @@ class BaseTextImageEnrichmentModel(GenericEnrichmentModel[TextImageEnrichmentEle cropped_image = conv_res.pages[page_ix].get_image( scale=self.images_scale, cropbox=element_prov.bbox ) - return TextImageEnrichmentElement(element=element, image=cropped_image) + return ItemAndImageEnrichmentElement(item=element, image=cropped_image) diff --git a/docling/models/page_assemble_model.py b/docling/models/page_assemble_model.py index 812d00d2..6239dbfa 100644 --- a/docling/models/page_assemble_model.py +++ b/docling/models/page_assemble_model.py @@ -21,8 +21,8 @@ from docling.utils.profiling import TimeRecorder _log = logging.getLogger(__name__) -class PageAssembleOptions(BaseModel): # could be removed entirely or just make empty - keep_images: bool = False +class PageAssembleOptions(BaseModel): + pass class PageAssembleModel(BasePageModel): diff --git a/docling/pipeline/base_pipeline.py b/docling/pipeline/base_pipeline.py index 380c47f4..034e6d42 100644 --- a/docling/pipeline/base_pipeline.py +++ b/docling/pipeline/base_pipeline.py @@ -198,12 +198,13 @@ class PaginatedPipeline(BasePipeline): # TODO this is a bad name. return conv_res def _unload(self, conv_res: ConversionResult) -> ConversionResult: - if conv_res.input._backend: - conv_res.input._backend.unload() - for page in conv_res.pages: if page._backend is not None: page._backend.unload() + + if conv_res.input._backend: + conv_res.input._backend.unload() + return conv_res def _determine_status(self, conv_res: ConversionResult) -> ConversionStatus: diff --git a/docs/examples/develop_formula_understanding.py b/docs/examples/develop_formula_understanding.py index 82f61887..e2e508c3 100644 --- a/docs/examples/develop_formula_understanding.py +++ b/docs/examples/develop_formula_understanding.py @@ -4,19 +4,19 @@ from typing import Iterable from docling_core.types.doc import DocItemLabel, DoclingDocument, NodeItem, TextItem -from docling.datamodel.base_models import InputFormat, TextImageEnrichmentElement +from docling.datamodel.base_models import InputFormat, ItemAndImageEnrichmentElement from docling.datamodel.pipeline_options import PdfPipelineOptions from docling.document_converter import DocumentConverter, PdfFormatOption -from docling.models.base_model import BaseTextImageEnrichmentModel +from docling.models.base_model import BaseItemAndImageEnrichmentModel from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline -class ExampleFormulaUPipelineOptions(PdfPipelineOptions): +class ExampleFormulaUnderstandingPipelineOptions(PdfPipelineOptions): do_formula_understanding: bool = True # A new enrichment model using both the document element and its image as input -class ExampleFormulaUEnrichmentModel(BaseTextImageEnrichmentModel): +class ExampleFormulaUnderstandingEnrichmentModel(BaseItemAndImageEnrichmentModel): images_scale = 2.6 def __init__(self, enabled: bool): @@ -30,7 +30,9 @@ class ExampleFormulaUEnrichmentModel(BaseTextImageEnrichmentModel): ) def __call__( - self, doc: DoclingDocument, element_batch: Iterable[TextImageEnrichmentElement] + self, + doc: DoclingDocument, + element_batch: Iterable[ItemAndImageEnrichmentElement], ) -> Iterable[NodeItem]: if not self.enabled: return @@ -38,18 +40,18 @@ class ExampleFormulaUEnrichmentModel(BaseTextImageEnrichmentModel): for enrich_element in element_batch: enrich_element.image.show() - yield enrich_element.element + yield enrich_element.item # How the pipeline can be extended. -class ExampleFormulaUPipeline(StandardPdfPipeline): +class ExampleFormulaUnderstandingPipeline(StandardPdfPipeline): - def __init__(self, pipeline_options: ExampleFormulaUPipelineOptions): + def __init__(self, pipeline_options: ExampleFormulaUnderstandingPipelineOptions): super().__init__(pipeline_options) - self.pipeline_options: ExampleFormulaUPipelineOptions + self.pipeline_options: ExampleFormulaUnderstandingPipelineOptions self.enrichment_pipe = [ - ExampleFormulaUEnrichmentModel( + ExampleFormulaUnderstandingEnrichmentModel( enabled=self.pipeline_options.do_formula_understanding ) ] @@ -58,8 +60,8 @@ class ExampleFormulaUPipeline(StandardPdfPipeline): self.keep_backend = True @classmethod - def get_default_options(cls) -> ExampleFormulaUPipelineOptions: - return ExampleFormulaUPipelineOptions() + def get_default_options(cls) -> ExampleFormulaUnderstandingPipelineOptions: + return ExampleFormulaUnderstandingPipelineOptions() # Example main. In the final version, we simply have to set do_formula_understanding to true. @@ -68,13 +70,13 @@ def main(): input_doc_path = Path("./tests/data/2203.01017v2.pdf") - pipeline_options = ExampleFormulaUPipelineOptions() + pipeline_options = ExampleFormulaUnderstandingPipelineOptions() pipeline_options.do_formula_understanding = True doc_converter = DocumentConverter( format_options={ InputFormat.PDF: PdfFormatOption( - pipeline_cls=ExampleFormulaUPipeline, + pipeline_cls=ExampleFormulaUnderstandingPipeline, pipeline_options=pipeline_options, ) }