diff --git a/docling/datamodel/settings.py b/docling/datamodel/settings.py index 616d41dc..582d6dc2 100644 --- a/docling/datamodel/settings.py +++ b/docling/datamodel/settings.py @@ -26,8 +26,16 @@ class BatchConcurrencySettings(BaseModel): # To force models into single core: export OMP_NUM_THREADS=1 +class DebugSettings(BaseModel): + visualize_cells: bool = False + visualize_ocr: bool = False + visualize_layout: bool = False + visualize_tables: bool = False + + class AppSettings(BaseSettings): perf: BatchConcurrencySettings + debug: DebugSettings -settings = AppSettings(perf=BatchConcurrencySettings()) +settings = AppSettings(perf=BatchConcurrencySettings(), debug=DebugSettings()) diff --git a/docling/models/easyocr_model.py b/docling/models/easyocr_model.py index d535b593..dfabcca9 100644 --- a/docling/models/easyocr_model.py +++ b/docling/models/easyocr_model.py @@ -6,6 +6,7 @@ from docling_core.types.doc import BoundingBox, CoordOrigin from docling.datamodel.base_models import OcrCell, Page from docling.datamodel.pipeline_options import EasyOcrOptions +from docling.datamodel.settings import settings from docling.models.base_ocr_model import BaseOcrModel _log = logging.getLogger(__name__) @@ -85,6 +86,7 @@ class EasyOcrModel(BaseOcrModel): page.cells.extend(filtered_ocr_cells) # DEBUG code: - # self.draw_ocr_rects_and_cells(page, ocr_rects) + if settings.debug.visualize_ocr: + self.draw_ocr_rects_and_cells(page, ocr_rects) yield page diff --git a/docling/models/layout_model.py b/docling/models/layout_model.py index 009a5b92..81d89e4f 100644 --- a/docling/models/layout_model.py +++ b/docling/models/layout_model.py @@ -16,6 +16,7 @@ from docling.datamodel.base_models import ( LayoutPrediction, Page, ) +from docling.datamodel.settings import settings from docling.models.base_model import BasePageModel from docling.utils import layout_utils as lu @@ -314,22 +315,24 @@ class LayoutModel(BasePageModel): # clusters = self.sort_clusters_by_cell_order(clusters) # DEBUG code: - def draw_clusters_and_cells(): + def draw_clusters_and_cells(show: bool = True): image = copy.deepcopy(page.image) - draw = ImageDraw.Draw(image) - for c in clusters: - x0, y0, x1, y1 = c.bbox.as_tuple() - draw.rectangle([(x0, y0), (x1, y1)], outline="green") + if image is not None: + draw = ImageDraw.Draw(image) + for c in clusters: + x0, y0, x1, y1 = c.bbox.as_tuple() + draw.rectangle([(x0, y0), (x1, y1)], outline="green") - cell_color = ( - random.randint(30, 140), - random.randint(30, 140), - random.randint(30, 140), - ) - for tc in c.cells: # [:1]: - x0, y0, x1, y1 = tc.bbox.as_tuple() - draw.rectangle([(x0, y0), (x1, y1)], outline=cell_color) - image.show() + cell_color = ( + random.randint(30, 140), + random.randint(30, 140), + random.randint(30, 140), + ) + for tc in c.cells: # [:1]: + x0, y0, x1, y1 = tc.bbox.as_tuple() + draw.rectangle([(x0, y0), (x1, y1)], outline=cell_color) + if show: + image.show() # draw_clusters_and_cells() @@ -337,7 +340,8 @@ class LayoutModel(BasePageModel): clusters, page.cells, page.size.height ) - # draw_clusters_and_cells() + if settings.debug.visualize_layout: + draw_clusters_and_cells() page.predictions.layout = LayoutPrediction(clusters=clusters) diff --git a/docling/models/table_structure_model.py b/docling/models/table_structure_model.py index a3257ab3..6d75aab4 100644 --- a/docling/models/table_structure_model.py +++ b/docling/models/table_structure_model.py @@ -9,6 +9,7 @@ from PIL import ImageDraw from docling.datamodel.base_models import Page, Table, TableStructurePrediction from docling.datamodel.pipeline_options import TableFormerMode, TableStructureOptions +from docling.datamodel.settings import settings from docling.models.base_model import BasePageModel @@ -35,7 +36,7 @@ class TableStructureModel(BasePageModel): self.tf_predictor = TFPredictor(self.tm_config) self.scale = 2.0 # Scale up table input images to 144 dpi - def draw_table_and_cells(self, page: Page, tbl_list: List[Table]): + def draw_table_and_cells(self, page: Page, tbl_list: Iterable[Table]): assert page._backend is not None image = ( @@ -166,6 +167,9 @@ class TableStructureModel(BasePageModel): ) # For debugging purposes: - # self.draw_table_and_cells(page, page.predictions.tablestructure.table_map.values()) + if settings.debug.visualize_tables: + self.draw_table_and_cells( + page, page.predictions.tablestructure.table_map.values() + ) yield page diff --git a/docling/models/tesseract_ocr_cli_model.py b/docling/models/tesseract_ocr_cli_model.py index b042653b..cb6068cc 100644 --- a/docling/models/tesseract_ocr_cli_model.py +++ b/docling/models/tesseract_ocr_cli_model.py @@ -9,6 +9,7 @@ from docling_core.types.doc import BoundingBox, CoordOrigin from docling.datamodel.base_models import OcrCell, Page from docling.datamodel.pipeline_options import TesseractCliOcrOptions +from docling.datamodel.settings import settings from docling.models.base_ocr_model import BaseOcrModel _log = logging.getLogger(__name__) @@ -169,6 +170,7 @@ class TesseractOcrCliModel(BaseOcrModel): page.cells.extend(filtered_ocr_cells) # DEBUG code: - # self.draw_ocr_rects_and_cells(page, ocr_rects) + if settings.debug.visualize_ocr: + self.draw_ocr_rects_and_cells(page, ocr_rects) yield page diff --git a/docling/models/tesseract_ocr_model.py b/docling/models/tesseract_ocr_model.py index f8a1fe57..e032dd77 100644 --- a/docling/models/tesseract_ocr_model.py +++ b/docling/models/tesseract_ocr_model.py @@ -5,6 +5,7 @@ from docling_core.types.doc import BoundingBox, CoordOrigin from docling.datamodel.base_models import OcrCell, Page from docling.datamodel.pipeline_options import TesseractOcrOptions +from docling.datamodel.settings import settings from docling.models.base_ocr_model import BaseOcrModel _log = logging.getLogger(__name__) @@ -125,6 +126,7 @@ class TesseractOcrModel(BaseOcrModel): page.cells.extend(filtered_ocr_cells) # DEBUG code: - # self.draw_ocr_rects_and_cells(page, ocr_rects) + if settings.debug.visualize_ocr: + self.draw_ocr_rects_and_cells(page, ocr_rects) yield page diff --git a/docs/examples/batch_convert.py b/docs/examples/batch_convert.py index da1c701f..aa063030 100644 --- a/docs/examples/batch_convert.py +++ b/docs/examples/batch_convert.py @@ -8,6 +8,7 @@ import yaml from docling.datamodel.base_models import ConversionStatus from docling.datamodel.document import ConversionResult +from docling.datamodel.settings import settings from docling.document_converter import DocumentConverter _log = logging.getLogger(__name__) @@ -113,6 +114,12 @@ def main(): # docs = [DocumentStream(name="my_doc.pdf", stream=buf)] # input = DocumentConversionInput.from_streams(docs) + # # Turn on inline debug visualizations: + # settings.debug.visualize_layout = True + # settings.debug.visualize_ocr = True + # settings.debug.visualize_tables = True + # settings.debug.visualize_cells = True + doc_converter = DocumentConverter() start_time = time.time()