mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 04:24:45 +00:00
use do_ flag in pipeline_options
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
parent
7c8d7e222e
commit
ddb509628e
@ -64,6 +64,8 @@ class PipelineOptions(BaseModel):
|
||||
True # This defautl will be set to False on a future version of docling
|
||||
)
|
||||
|
||||
do_dummy_picture_classifer: bool = False
|
||||
|
||||
|
||||
class PdfPipelineOptions(PipelineOptions):
|
||||
artifacts_path: Optional[Union[Path, str]] = None
|
||||
|
@ -10,12 +10,19 @@ from docling.models.base_model import BaseEnrichmentModel
|
||||
|
||||
|
||||
class DummyPictureClassifierEnrichmentModel(BaseEnrichmentModel):
|
||||
|
||||
def __init__(self, enabled: bool):
|
||||
self.enabled = enabled
|
||||
|
||||
def is_processable(self, doc: DoclingDocument, element: NodeItem) -> bool:
|
||||
return isinstance(element, PictureItem)
|
||||
return self.enabled and isinstance(element, PictureItem)
|
||||
|
||||
def __call__(
|
||||
self, doc: DoclingDocument, element_batch: Iterable[NodeItem]
|
||||
) -> Iterable[Any]:
|
||||
if not self.enabled:
|
||||
return
|
||||
|
||||
for element in element_batch:
|
||||
assert isinstance(element, PictureItem)
|
||||
element.data.classification = PictureClassificationData(
|
||||
|
@ -84,7 +84,10 @@ class StandardPdfPipeline(PaginatedPipeline):
|
||||
|
||||
self.enrichment_pipe = [
|
||||
# Other models working on `NodeItem` elements in the DoclingDocument
|
||||
# DummyPictureClassifierEnrichmentModel()
|
||||
# TODO Question: should we use the enabled flag or simply not add the model in the list?
|
||||
DummyPictureClassifierEnrichmentModel(
|
||||
enabled=pipeline_options.do_dummy_picture_classifer
|
||||
)
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
|
@ -58,6 +58,7 @@ def main():
|
||||
pipeline_options.do_ocr = False
|
||||
pipeline_options.do_table_structure = True
|
||||
pipeline_options.table_structure_options.do_cell_matching = True
|
||||
pipeline_options.do_dummy_picture_classifer = True
|
||||
|
||||
doc_converter = DocumentConverter(
|
||||
format_options={
|
||||
|
Loading…
Reference in New Issue
Block a user