blacklisted the picture layout tag so that it is forced to interpret the contents of the image and retrieve text that otherwise would be lost with an image tag

2025-08-01 15:02:21 +00:00 · 2024-12-13 13:46:25 -03:00 · 2024-12-13 13:46:25 -03:00 · a4dc21395d
commit a4dc21395d
parent 365a1e7b98
1 changed files with 1 additions and 0 deletions
--- a/docling/models/layout_model.py
+++ b/docling/models/layout_model.py
@ -48,6 +48,7 @@ class LayoutModel(BasePageModel):

    def __init__(self, artifacts_path: Path):
        self.layout_predictor = LayoutPredictor(artifacts_path)  # TODO temporary
+        self.layout_predictor._black_classes.add("Picture")  # Use this to disable picture recognition (trying to force to identify only text)

    def postprocess(self, clusters_in: List[Cluster], cells: List[Cell], page_height):
        MIN_INTERSECTION = 0.2