mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-02 07:22:14 +00:00
move logic in BaseTextImageEnrichmentModel
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
parent
3611335d22
commit
12b6417f51
@ -7,6 +7,7 @@ from docling_core.types.doc import (
|
||||
PictureDataType,
|
||||
Size,
|
||||
TableCell,
|
||||
TextItem,
|
||||
)
|
||||
from docling_core.types.io import ( # DO ΝΟΤ REMOVE; explicitly exposed from this location
|
||||
DocumentStream,
|
||||
@ -201,6 +202,13 @@ class AssembledUnit(BaseModel):
|
||||
headers: List[PageElement] = []
|
||||
|
||||
|
||||
class TextImageEnrichmentElement(BaseModel):
|
||||
element: TextItem
|
||||
image: Image
|
||||
|
||||
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||||
|
||||
|
||||
class Page(BaseModel):
|
||||
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||||
|
||||
|
@ -1,10 +1,10 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Generic, Iterable, Optional
|
||||
|
||||
from docling_core.types.doc import DoclingDocument, NodeItem
|
||||
from docling_core.types.doc import DoclingDocument, NodeItem, TextItem
|
||||
from typing_extensions import TypeVar
|
||||
|
||||
from docling.datamodel.base_models import Page
|
||||
from docling.datamodel.base_models import Page, TextImageEnrichmentElement
|
||||
from docling.datamodel.document import ConversionResult
|
||||
|
||||
|
||||
@ -46,3 +46,22 @@ class BaseEnrichmentModel(GenericEnrichmentModel[NodeItem]):
|
||||
if self.is_processable(doc=conv_res.document, element=element):
|
||||
return element
|
||||
return None
|
||||
|
||||
|
||||
class BaseTextImageEnrichmentModel(GenericEnrichmentModel[TextImageEnrichmentElement]):
|
||||
|
||||
images_scale: float
|
||||
|
||||
def prepare_element(
|
||||
self, conv_res: ConversionResult, element: NodeItem
|
||||
) -> Optional[TextImageEnrichmentElement]:
|
||||
if not self.is_processable(doc=conv_res.document, element=element):
|
||||
return None
|
||||
|
||||
assert isinstance(element, TextItem)
|
||||
element_prov = element.prov[0]
|
||||
page_ix = element_prov.page_no - 1
|
||||
cropped_image = conv_res.pages[page_ix].get_image(
|
||||
scale=self.images_scale, cropbox=element_prov.bbox
|
||||
)
|
||||
return TextImageEnrichmentElement(element=element, image=cropped_image)
|
||||
|
@ -3,14 +3,11 @@ from pathlib import Path
|
||||
from typing import Iterable
|
||||
|
||||
from docling_core.types.doc import DocItemLabel, DoclingDocument, NodeItem, TextItem
|
||||
from PIL import Image as PILImage
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
|
||||
from docling.datamodel.base_models import InputFormat
|
||||
from docling.datamodel.document import ConversionResult
|
||||
from docling.datamodel.base_models import InputFormat, TextImageEnrichmentElement
|
||||
from docling.datamodel.pipeline_options import PdfPipelineOptions
|
||||
from docling.document_converter import DocumentConverter, PdfFormatOption
|
||||
from docling.models.base_model import BaseEnrichmentModel, GenericEnrichmentModel
|
||||
from docling.models.base_model import BaseTextImageEnrichmentModel
|
||||
from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline
|
||||
|
||||
|
||||
@ -18,16 +15,9 @@ class ExampleFormulaUPipelineOptions(PdfPipelineOptions):
|
||||
do_formula_understanding: bool = True
|
||||
|
||||
|
||||
class FormulaEnrichmentElement(BaseModel):
|
||||
element: TextItem
|
||||
image: PILImage.Image
|
||||
|
||||
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||||
|
||||
|
||||
class ExampleFormulaUEnrichmentModel(GenericEnrichmentModel[FormulaEnrichmentElement]):
|
||||
|
||||
images_scale: float = 2.6
|
||||
# A new enrichment model using both the document element and its image as input
|
||||
class ExampleFormulaUEnrichmentModel(BaseTextImageEnrichmentModel):
|
||||
images_scale = 2.6
|
||||
|
||||
def __init__(self, enabled: bool):
|
||||
self.enabled = enabled
|
||||
@ -39,21 +29,8 @@ class ExampleFormulaUEnrichmentModel(GenericEnrichmentModel[FormulaEnrichmentEle
|
||||
and element.label == DocItemLabel.FORMULA
|
||||
)
|
||||
|
||||
def prepare_element(
|
||||
self, conv_res: ConversionResult, element: NodeItem
|
||||
) -> FormulaEnrichmentElement:
|
||||
if self.is_processable(doc=conv_res.document, element=element):
|
||||
assert isinstance(element, TextItem)
|
||||
element_prov = element.prov[0]
|
||||
page_ix = element_prov.page_no - 1
|
||||
cropped_image = conv_res.pages[page_ix].get_image(
|
||||
scale=self.images_scale, cropbox=element_prov.bbox
|
||||
)
|
||||
|
||||
return FormulaEnrichmentElement(element=element, image=cropped_image)
|
||||
|
||||
def __call__(
|
||||
self, doc: DoclingDocument, element_batch: Iterable[FormulaEnrichmentElement]
|
||||
self, doc: DoclingDocument, element_batch: Iterable[TextImageEnrichmentElement]
|
||||
) -> Iterable[NodeItem]:
|
||||
if not self.enabled:
|
||||
return
|
||||
|
Loading…
Reference in New Issue
Block a user