use do_ flag in pipeline_options

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-12-11 14:18:30 +00:00 · 2024-10-13 16:54:46 +02:00
parent 7c8d7e222e
commit ddb509628e
4 changed files with 15 additions and 2 deletions
--- a/docling/datamodel/pipeline_options.py
+++ b/docling/datamodel/pipeline_options.py
@@ -64,6 +64,8 @@ class PipelineOptions(BaseModel):
        True  # This defautl will be set to False on a future version of docling
    )

+    do_dummy_picture_classifer: bool = False
+

 class PdfPipelineOptions(PipelineOptions):
    artifacts_path: Optional[Union[Path, str]] = None
--- a/docling/models/dummy_picture_enrichment.py
+++ b/docling/models/dummy_picture_enrichment.py
@@ -10,12 +10,19 @@ from docling.models.base_model import BaseEnrichmentModel


 class DummyPictureClassifierEnrichmentModel(BaseEnrichmentModel):
+
+    def __init__(self, enabled: bool):
+        self.enabled = enabled
+
    def is_processable(self, doc: DoclingDocument, element: NodeItem) -> bool:
-        return isinstance(element, PictureItem)
+        return self.enabled and isinstance(element, PictureItem)

    def __call__(
        self, doc: DoclingDocument, element_batch: Iterable[NodeItem]
    ) -> Iterable[Any]:
+        if not self.enabled:
+            return
+
        for element in element_batch:
            assert isinstance(element, PictureItem)
            element.data.classification = PictureClassificationData(
--- a/docling/pipeline/standard_pdf_pipeline.py
+++ b/docling/pipeline/standard_pdf_pipeline.py
@@ -84,7 +84,10 @@ class StandardPdfPipeline(PaginatedPipeline):

        self.enrichment_pipe = [
            # Other models working on `NodeItem` elements in the DoclingDocument
-            # DummyPictureClassifierEnrichmentModel()
+            # TODO Question: should we use the enabled flag or simply not add the model in the list?
+            DummyPictureClassifierEnrichmentModel(
+                enabled=pipeline_options.do_dummy_picture_classifer
+            )
        ]

    @staticmethod
--- a/examples/custom_convert.py
+++ b/examples/custom_convert.py
@@ -58,6 +58,7 @@ def main():
    pipeline_options.do_ocr = False
    pipeline_options.do_table_structure = True
    pipeline_options.table_structure_options.do_cell_matching = True
+    pipeline_options.do_dummy_picture_classifer = True

    doc_converter = DocumentConverter(
        format_options={