Add settings to turn visualization on or off

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
2025-07-30 14:04:27 +00:00 · 2024-10-28 13:21:32 +01:00 · 2024-10-28 13:21:32 +01:00 · 747a190b3a
commit 747a190b3a
parent 77a89c3334
7 changed files with 50 additions and 21 deletions
--- a/docling/datamodel/settings.py
+++ b/docling/datamodel/settings.py
@ -26,8 +26,16 @@ class BatchConcurrencySettings(BaseModel):
    # To force models into single core: export OMP_NUM_THREADS=1
 class DebugSettings(BaseModel):
    visualize_cells: bool = False
    visualize_ocr: bool = False
    visualize_layout: bool = False
    visualize_tables: bool = False
 class AppSettings(BaseSettings):
    perf: BatchConcurrencySettings
    debug: DebugSettings
-settings = AppSettings(perf=BatchConcurrencySettings())
+settings = AppSettings(perf=BatchConcurrencySettings(), debug=DebugSettings())
--- a/docling/models/easyocr_model.py
+++ b/docling/models/easyocr_model.py
@ -6,6 +6,7 @@ from docling_core.types.doc import BoundingBox, CoordOrigin
 from docling.datamodel.base_models import OcrCell, Page
 from docling.datamodel.pipeline_options import EasyOcrOptions
 from docling.datamodel.settings import settings
 from docling.models.base_ocr_model import BaseOcrModel
 _log = logging.getLogger(__name__)
@ -85,6 +86,7 @@ class EasyOcrModel(BaseOcrModel):
                page.cells.extend(filtered_ocr_cells)
                # DEBUG code:
-                # self.draw_ocr_rects_and_cells(page, ocr_rects)
+                if settings.debug.visualize_ocr:
                    self.draw_ocr_rects_and_cells(page, ocr_rects)
                yield page
--- a/docling/models/layout_model.py
+++ b/docling/models/layout_model.py
@ -16,6 +16,7 @@ from docling.datamodel.base_models import (
    LayoutPrediction,
    Page,
 )
 from docling.datamodel.settings import settings
 from docling.models.base_model import BasePageModel
 from docling.utils import layout_utils as lu
@ -314,22 +315,24 @@ class LayoutModel(BasePageModel):
                # clusters = self.sort_clusters_by_cell_order(clusters)
                # DEBUG code:
-                def draw_clusters_and_cells():
+                def draw_clusters_and_cells(show: bool = True):
                    image = copy.deepcopy(page.image)
-                    draw = ImageDraw.Draw(image)
+                    if image is not None:
-                    for c in clusters:
+                        draw = ImageDraw.Draw(image)
-                        x0, y0, x1, y1 = c.bbox.as_tuple()
+                        for c in clusters:
-                        draw.rectangle([(x0, y0), (x1, y1)], outline="green")
+                            x0, y0, x1, y1 = c.bbox.as_tuple()
                            draw.rectangle([(x0, y0), (x1, y1)], outline="green")
-                        cell_color = (
+                            cell_color = (
-                            random.randint(30, 140),
+                                random.randint(30, 140),
-                            random.randint(30, 140),
+                                random.randint(30, 140),
-                            random.randint(30, 140),
+                                random.randint(30, 140),
-                        )
+                            )
-                        for tc in c.cells:  # [:1]:
+                            for tc in c.cells:  # [:1]:
-                            x0, y0, x1, y1 = tc.bbox.as_tuple()
+                                x0, y0, x1, y1 = tc.bbox.as_tuple()
-                            draw.rectangle([(x0, y0), (x1, y1)], outline=cell_color)
+                                draw.rectangle([(x0, y0), (x1, y1)], outline=cell_color)
-                    image.show()
+                        if show:
                            image.show()
                # draw_clusters_and_cells()
@ -337,7 +340,8 @@ class LayoutModel(BasePageModel):
                    clusters, page.cells, page.size.height
                )
-                # draw_clusters_and_cells()
+                if settings.debug.visualize_layout:
                    draw_clusters_and_cells()
                page.predictions.layout = LayoutPrediction(clusters=clusters)
--- a/docling/models/table_structure_model.py
+++ b/docling/models/table_structure_model.py
@ -9,6 +9,7 @@ from PIL import ImageDraw
 from docling.datamodel.base_models import Page, Table, TableStructurePrediction
 from docling.datamodel.pipeline_options import TableFormerMode, TableStructureOptions
 from docling.datamodel.settings import settings
 from docling.models.base_model import BasePageModel
@ -35,7 +36,7 @@ class TableStructureModel(BasePageModel):
            self.tf_predictor = TFPredictor(self.tm_config)
            self.scale = 2.0  # Scale up table input images to 144 dpi
-    def draw_table_and_cells(self, page: Page, tbl_list: List[Table]):
+    def draw_table_and_cells(self, page: Page, tbl_list: Iterable[Table]):
        assert page._backend is not None
        image = (
@ -166,6 +167,9 @@ class TableStructureModel(BasePageModel):
                        )
                    # For debugging purposes:
-                    # self.draw_table_and_cells(page, page.predictions.tablestructure.table_map.values())
+                    if settings.debug.visualize_tables:
                        self.draw_table_and_cells(
                            page, page.predictions.tablestructure.table_map.values()
                        )
                yield page
--- a/docling/models/tesseract_ocr_cli_model.py
+++ b/docling/models/tesseract_ocr_cli_model.py
@ -9,6 +9,7 @@ from docling_core.types.doc import BoundingBox, CoordOrigin
 from docling.datamodel.base_models import OcrCell, Page
 from docling.datamodel.pipeline_options import TesseractCliOcrOptions
 from docling.datamodel.settings import settings
 from docling.models.base_ocr_model import BaseOcrModel
 _log = logging.getLogger(__name__)
@ -169,6 +170,7 @@ class TesseractOcrCliModel(BaseOcrModel):
                page.cells.extend(filtered_ocr_cells)
                # DEBUG code:
-                # self.draw_ocr_rects_and_cells(page, ocr_rects)
+                if settings.debug.visualize_ocr:
                    self.draw_ocr_rects_and_cells(page, ocr_rects)
                yield page
--- a/docling/models/tesseract_ocr_model.py
+++ b/docling/models/tesseract_ocr_model.py
@ -5,6 +5,7 @@ from docling_core.types.doc import BoundingBox, CoordOrigin
 from docling.datamodel.base_models import OcrCell, Page
 from docling.datamodel.pipeline_options import TesseractOcrOptions
 from docling.datamodel.settings import settings
 from docling.models.base_ocr_model import BaseOcrModel
 _log = logging.getLogger(__name__)
@ -125,6 +126,7 @@ class TesseractOcrModel(BaseOcrModel):
                page.cells.extend(filtered_ocr_cells)
                # DEBUG code:
-                # self.draw_ocr_rects_and_cells(page, ocr_rects)
+                if settings.debug.visualize_ocr:
                    self.draw_ocr_rects_and_cells(page, ocr_rects)
                yield page
--- a/docs/examples/batch_convert.py
+++ b/docs/examples/batch_convert.py
@ -8,6 +8,7 @@ import yaml
 from docling.datamodel.base_models import ConversionStatus
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.settings import settings
 from docling.document_converter import DocumentConverter
 _log = logging.getLogger(__name__)
@ -113,6 +114,12 @@ def main():
    # docs = [DocumentStream(name="my_doc.pdf", stream=buf)]
    # input = DocumentConversionInput.from_streams(docs)
    # # Turn on inline debug visualizations:
    # settings.debug.visualize_layout = True
    # settings.debug.visualize_ocr = True
    # settings.debug.visualize_tables = True
    # settings.debug.visualize_cells = True
    doc_converter = DocumentConverter()
    start_time = time.time()