Merge pull request #2 from 0xCarbon/feat/disable_image_labeling

Force Image Content OCR
2025-08-01 15:02:21 +00:00 · 2024-12-13 22:52:52 -03:00 · 2024-12-13 22:52:52 -03:00 · 162e89e013
commit 162e89e013
parent 1e016fd776 e0aaa783c5
1 changed files with 2 additions and 1 deletions
--- a/docling/models/layout_model.py
+++ b/docling/models/layout_model.py
@ -48,6 +48,7 @@ class LayoutModel(BasePageModel):
    FIGURE_LABEL = DocItemLabel.PICTURE
    FORMULA_LABEL = DocItemLabel.FORMULA

+    
    def __init__(self, artifacts_path: Path, accelerator_options: AcceleratorOptions):
        device = decide_device(accelerator_options.device)

@ -56,7 +57,7 @@ class LayoutModel(BasePageModel):
            device=device,
            num_threads=accelerator_options.num_threads,
            base_threshold=0.6,
-            blacklist_classes={"Form", "Key-Value Region"},
+            blacklist_classes={"Form", "Key-Value Region", "Picture"}, # Use this to disable picture recognition (trying to force to identify only text)
        )

    def postprocess(self, clusters_in: List[Cluster], cells: List[Cell], page_height):