mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-02 15:32:30 +00:00
move logic in BaseTextImageEnrichmentModel
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
parent
3611335d22
commit
12b6417f51
@ -7,6 +7,7 @@ from docling_core.types.doc import (
|
|||||||
PictureDataType,
|
PictureDataType,
|
||||||
Size,
|
Size,
|
||||||
TableCell,
|
TableCell,
|
||||||
|
TextItem,
|
||||||
)
|
)
|
||||||
from docling_core.types.io import ( # DO ΝΟΤ REMOVE; explicitly exposed from this location
|
from docling_core.types.io import ( # DO ΝΟΤ REMOVE; explicitly exposed from this location
|
||||||
DocumentStream,
|
DocumentStream,
|
||||||
@ -201,6 +202,13 @@ class AssembledUnit(BaseModel):
|
|||||||
headers: List[PageElement] = []
|
headers: List[PageElement] = []
|
||||||
|
|
||||||
|
|
||||||
|
class TextImageEnrichmentElement(BaseModel):
|
||||||
|
element: TextItem
|
||||||
|
image: Image
|
||||||
|
|
||||||
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||||||
|
|
||||||
|
|
||||||
class Page(BaseModel):
|
class Page(BaseModel):
|
||||||
model_config = ConfigDict(arbitrary_types_allowed=True)
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||||||
|
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import Any, Generic, Iterable, Optional
|
from typing import Any, Generic, Iterable, Optional
|
||||||
|
|
||||||
from docling_core.types.doc import DoclingDocument, NodeItem
|
from docling_core.types.doc import DoclingDocument, NodeItem, TextItem
|
||||||
from typing_extensions import TypeVar
|
from typing_extensions import TypeVar
|
||||||
|
|
||||||
from docling.datamodel.base_models import Page
|
from docling.datamodel.base_models import Page, TextImageEnrichmentElement
|
||||||
from docling.datamodel.document import ConversionResult
|
from docling.datamodel.document import ConversionResult
|
||||||
|
|
||||||
|
|
||||||
@ -46,3 +46,22 @@ class BaseEnrichmentModel(GenericEnrichmentModel[NodeItem]):
|
|||||||
if self.is_processable(doc=conv_res.document, element=element):
|
if self.is_processable(doc=conv_res.document, element=element):
|
||||||
return element
|
return element
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class BaseTextImageEnrichmentModel(GenericEnrichmentModel[TextImageEnrichmentElement]):
|
||||||
|
|
||||||
|
images_scale: float
|
||||||
|
|
||||||
|
def prepare_element(
|
||||||
|
self, conv_res: ConversionResult, element: NodeItem
|
||||||
|
) -> Optional[TextImageEnrichmentElement]:
|
||||||
|
if not self.is_processable(doc=conv_res.document, element=element):
|
||||||
|
return None
|
||||||
|
|
||||||
|
assert isinstance(element, TextItem)
|
||||||
|
element_prov = element.prov[0]
|
||||||
|
page_ix = element_prov.page_no - 1
|
||||||
|
cropped_image = conv_res.pages[page_ix].get_image(
|
||||||
|
scale=self.images_scale, cropbox=element_prov.bbox
|
||||||
|
)
|
||||||
|
return TextImageEnrichmentElement(element=element, image=cropped_image)
|
||||||
|
@ -3,14 +3,11 @@ from pathlib import Path
|
|||||||
from typing import Iterable
|
from typing import Iterable
|
||||||
|
|
||||||
from docling_core.types.doc import DocItemLabel, DoclingDocument, NodeItem, TextItem
|
from docling_core.types.doc import DocItemLabel, DoclingDocument, NodeItem, TextItem
|
||||||
from PIL import Image as PILImage
|
|
||||||
from pydantic import BaseModel, ConfigDict
|
|
||||||
|
|
||||||
from docling.datamodel.base_models import InputFormat
|
from docling.datamodel.base_models import InputFormat, TextImageEnrichmentElement
|
||||||
from docling.datamodel.document import ConversionResult
|
|
||||||
from docling.datamodel.pipeline_options import PdfPipelineOptions
|
from docling.datamodel.pipeline_options import PdfPipelineOptions
|
||||||
from docling.document_converter import DocumentConverter, PdfFormatOption
|
from docling.document_converter import DocumentConverter, PdfFormatOption
|
||||||
from docling.models.base_model import BaseEnrichmentModel, GenericEnrichmentModel
|
from docling.models.base_model import BaseTextImageEnrichmentModel
|
||||||
from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline
|
from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline
|
||||||
|
|
||||||
|
|
||||||
@ -18,16 +15,9 @@ class ExampleFormulaUPipelineOptions(PdfPipelineOptions):
|
|||||||
do_formula_understanding: bool = True
|
do_formula_understanding: bool = True
|
||||||
|
|
||||||
|
|
||||||
class FormulaEnrichmentElement(BaseModel):
|
# A new enrichment model using both the document element and its image as input
|
||||||
element: TextItem
|
class ExampleFormulaUEnrichmentModel(BaseTextImageEnrichmentModel):
|
||||||
image: PILImage.Image
|
images_scale = 2.6
|
||||||
|
|
||||||
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
||||||
|
|
||||||
|
|
||||||
class ExampleFormulaUEnrichmentModel(GenericEnrichmentModel[FormulaEnrichmentElement]):
|
|
||||||
|
|
||||||
images_scale: float = 2.6
|
|
||||||
|
|
||||||
def __init__(self, enabled: bool):
|
def __init__(self, enabled: bool):
|
||||||
self.enabled = enabled
|
self.enabled = enabled
|
||||||
@ -39,21 +29,8 @@ class ExampleFormulaUEnrichmentModel(GenericEnrichmentModel[FormulaEnrichmentEle
|
|||||||
and element.label == DocItemLabel.FORMULA
|
and element.label == DocItemLabel.FORMULA
|
||||||
)
|
)
|
||||||
|
|
||||||
def prepare_element(
|
|
||||||
self, conv_res: ConversionResult, element: NodeItem
|
|
||||||
) -> FormulaEnrichmentElement:
|
|
||||||
if self.is_processable(doc=conv_res.document, element=element):
|
|
||||||
assert isinstance(element, TextItem)
|
|
||||||
element_prov = element.prov[0]
|
|
||||||
page_ix = element_prov.page_no - 1
|
|
||||||
cropped_image = conv_res.pages[page_ix].get_image(
|
|
||||||
scale=self.images_scale, cropbox=element_prov.bbox
|
|
||||||
)
|
|
||||||
|
|
||||||
return FormulaEnrichmentElement(element=element, image=cropped_image)
|
|
||||||
|
|
||||||
def __call__(
|
def __call__(
|
||||||
self, doc: DoclingDocument, element_batch: Iterable[FormulaEnrichmentElement]
|
self, doc: DoclingDocument, element_batch: Iterable[TextImageEnrichmentElement]
|
||||||
) -> Iterable[NodeItem]:
|
) -> Iterable[NodeItem]:
|
||||||
if not self.enabled:
|
if not self.enabled:
|
||||||
return
|
return
|
||||||
|
Loading…
Reference in New Issue
Block a user