diff --git a/docling/datamodel/settings.py b/docling/datamodel/settings.py index 7287a499..7daf5047 100644 --- a/docling/datamodel/settings.py +++ b/docling/datamodel/settings.py @@ -36,7 +36,7 @@ class DebugSettings(BaseModel): profile_pipeline_timings: bool = False # Path used to output debug information. - debug_output_path: str = str(Path.cwd()) + debug_output_path: str = str(Path.cwd() / "debug") class AppSettings(BaseSettings): diff --git a/docling/models/base_ocr_model.py b/docling/models/base_ocr_model.py index 5799aa63..9d26a317 100644 --- a/docling/models/base_ocr_model.py +++ b/docling/models/base_ocr_model.py @@ -1,6 +1,7 @@ import copy import logging from abc import abstractmethod +from pathlib import Path from typing import Iterable, List import numpy as np @@ -12,6 +13,7 @@ from scipy.ndimage import find_objects, label from docling.datamodel.base_models import OcrCell, Page from docling.datamodel.document import ConversionResult from docling.datamodel.pipeline_options import OcrOptions +from docling.datamodel.settings import settings from docling.models.base_model import BasePageModel _log = logging.getLogger(__name__) @@ -115,7 +117,7 @@ class BaseOcrModel(BasePageModel): ] return filtered_ocr_cells - def draw_ocr_rects_and_cells(self, page, ocr_rects): + def draw_ocr_rects_and_cells(self, conv_res, page, ocr_rects, show: bool = False): image = copy.deepcopy(page.image) draw = ImageDraw.Draw(image, "RGBA") @@ -132,7 +134,18 @@ class BaseOcrModel(BasePageModel): if isinstance(tc, OcrCell): color = "magenta" draw.rectangle([(x0, y0), (x1, y1)], outline=color) - image.show() + + if show: + image.show() + else: + out_path: Path = ( + Path(settings.debug.debug_output_path) + / f"debug_{conv_res.input.file.stem}" + ) + out_path.mkdir(parents=True, exist_ok=True) + + out_file = out_path / f"ocr_page_{page.page_no:05}.png" + image.save(str(out_file), format="png") @abstractmethod def __call__( diff --git a/docling/models/ds_glm_model.py b/docling/models/ds_glm_model.py index dade1870..e63bad3a 100644 --- a/docling/models/ds_glm_model.py +++ b/docling/models/ds_glm_model.py @@ -1,5 +1,6 @@ import copy import random +from pathlib import Path from typing import List, Union from deepsearch_glm.nlp_utils import init_nlp_model @@ -27,6 +28,7 @@ from pydantic import BaseModel, ConfigDict from docling.datamodel.base_models import Cluster, FigureElement, Table, TextElement from docling.datamodel.document import ConversionResult, layout_label_to_ds_type +from docling.datamodel.settings import settings from docling.utils.profiling import ProfilingScope, TimeRecorder from docling.utils.utils import create_hash @@ -236,15 +238,15 @@ class GlmModel: docling_doc: DoclingDocument = to_docling_document(glm_doc) # Experimental # DEBUG code: - def draw_clusters_and_cells(ds_document, page_no): + def draw_clusters_and_cells(ds_document, page_no, show: bool = False): clusters_to_draw = [] image = copy.deepcopy(conv_res.pages[page_no].image) for ix, elem in enumerate(ds_document.main_text): if isinstance(elem, BaseText): - prov = elem.prov[0] + prov = elem.prov[0] # type: ignore elif isinstance(elem, Ref): _, arr, index = elem.ref.split("/") - index = int(index) + index = int(index) # type: ignore if arr == "tables": prov = ds_document.tables[index].prov[0] elif arr == "figures": @@ -258,7 +260,7 @@ class GlmModel: id=ix, label=elem.name, bbox=BoundingBox.from_tuple( - coord=prov.bbox, + coord=prov.bbox, # type: ignore origin=CoordOrigin.BOTTOMLEFT, ).to_top_left_origin(conv_res.pages[page_no].size.height), ) @@ -278,9 +280,21 @@ class GlmModel: for tc in c.cells: # [:1]: x0, y0, x1, y1 = tc.bbox.as_tuple() draw.rectangle([(x0, y0), (x1, y1)], outline=cell_color) - image.show() - # draw_clusters_and_cells(ds_doc, 0) - # draw_clusters_and_cells(exported_doc, 0) + if show: + image.show() + else: + out_path: Path = ( + Path(settings.debug.debug_output_path) + / f"debug_{conv_res.input.file.stem}" + ) + out_path.mkdir(parents=True, exist_ok=True) + + out_file = out_path / f"doc_page_{page_no:05}.png" + image.save(str(out_file), format="png") + + # for item in ds_doc.page_dimensions: + # page_no = item.page + # draw_clusters_and_cells(ds_doc, page_no) return docling_doc diff --git a/docling/models/easyocr_model.py b/docling/models/easyocr_model.py index d57b5747..c767b8c9 100644 --- a/docling/models/easyocr_model.py +++ b/docling/models/easyocr_model.py @@ -96,6 +96,6 @@ class EasyOcrModel(BaseOcrModel): # DEBUG code: if settings.debug.visualize_ocr: - self.draw_ocr_rects_and_cells(page, ocr_rects) + self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects) yield page diff --git a/docling/models/layout_model.py b/docling/models/layout_model.py index ca7f0602..91897df4 100644 --- a/docling/models/layout_model.py +++ b/docling/models/layout_model.py @@ -324,7 +324,7 @@ class LayoutModel(BasePageModel): # clusters = self.sort_clusters_by_cell_order(clusters) # DEBUG code: - def draw_clusters_and_cells(show: bool = True): + def draw_clusters_and_cells(show: bool = False): image = copy.deepcopy(page.image) if image is not None: draw = ImageDraw.Draw(image) @@ -344,6 +344,17 @@ class LayoutModel(BasePageModel): ) if show: image.show() + else: + out_path: Path = ( + Path(settings.debug.debug_output_path) + / f"debug_{conv_res.input.file.stem}" + ) + out_path.mkdir(parents=True, exist_ok=True) + + out_file = ( + out_path / f"layout_page_{page.page_no:05}.png" + ) + image.save(str(out_file), format="png") # draw_clusters_and_cells() diff --git a/docling/models/page_preprocessing_model.py b/docling/models/page_preprocessing_model.py index ac987874..63f1a4f6 100644 --- a/docling/models/page_preprocessing_model.py +++ b/docling/models/page_preprocessing_model.py @@ -1,3 +1,4 @@ +from pathlib import Path from typing import Iterable, Optional from PIL import ImageDraw @@ -5,6 +6,7 @@ from pydantic import BaseModel from docling.datamodel.base_models import Page from docling.datamodel.document import ConversionResult +from docling.datamodel.settings import settings from docling.models.base_model import BasePageModel from docling.utils.profiling import TimeRecorder @@ -27,7 +29,7 @@ class PagePreprocessingModel(BasePageModel): else: with TimeRecorder(conv_res, "page_parse"): page = self._populate_page_images(page) - page = self._parse_page_cells(page) + page = self._parse_page_cells(conv_res, page) yield page # Generate the page image and store it in the page object @@ -48,19 +50,30 @@ class PagePreprocessingModel(BasePageModel): return page # Extract and populate the page cells and store it in the page object - def _parse_page_cells(self, page: Page) -> Page: + def _parse_page_cells(self, conv_res: ConversionResult, page: Page) -> Page: assert page._backend is not None page.cells = list(page._backend.get_text_cells()) # DEBUG code: - def draw_text_boxes(image, cells): + def draw_text_boxes(image, cells, show: bool = False): draw = ImageDraw.Draw(image) for c in cells: x0, y0, x1, y1 = c.bbox.as_tuple() draw.rectangle([(x0, y0), (x1, y1)], outline="red") - image.show() + if show: + image.show() + else: + out_path: Path = ( + Path(settings.debug.debug_output_path) + / f"debug_{conv_res.input.file.stem}" + ) + out_path.mkdir(parents=True, exist_ok=True) - # draw_text_boxes(page.get_image(scale=1.0), cells) + out_file = out_path / f"cells_page_{page.page_no:05}.png" + image.save(str(out_file), format="png") + + if settings.debug.visualize_cells: + draw_text_boxes(page.get_image(scale=1.0), page.cells) return page diff --git a/docling/models/table_structure_model.py b/docling/models/table_structure_model.py index fdc56f5b..5961c2c0 100644 --- a/docling/models/table_structure_model.py +++ b/docling/models/table_structure_model.py @@ -38,7 +38,13 @@ class TableStructureModel(BasePageModel): self.tf_predictor = TFPredictor(self.tm_config) self.scale = 2.0 # Scale up table input images to 144 dpi - def draw_table_and_cells(self, page: Page, tbl_list: Iterable[Table]): + def draw_table_and_cells( + self, + conv_res: ConversionResult, + page: Page, + tbl_list: Iterable[Table], + show: bool = False, + ): assert page._backend is not None image = ( @@ -64,7 +70,17 @@ class TableStructureModel(BasePageModel): fill="black", ) - image.show() + if show: + image.show() + else: + out_path: Path = ( + Path(settings.debug.debug_output_path) + / f"debug_{conv_res.input.file.stem}" + ) + out_path.mkdir(parents=True, exist_ok=True) + + out_file = out_path / f"table_struct_page_{page.page_no:05}.png" + image.save(str(out_file), format="png") def __call__( self, conv_res: ConversionResult, page_batch: Iterable[Page] @@ -182,7 +198,9 @@ class TableStructureModel(BasePageModel): # For debugging purposes: if settings.debug.visualize_tables: self.draw_table_and_cells( - page, page.predictions.tablestructure.table_map.values() + conv_res, + page, + page.predictions.tablestructure.table_map.values(), ) yield page diff --git a/docling/models/tesseract_ocr_cli_model.py b/docling/models/tesseract_ocr_cli_model.py index 80ea5324..6f939351 100644 --- a/docling/models/tesseract_ocr_cli_model.py +++ b/docling/models/tesseract_ocr_cli_model.py @@ -179,6 +179,6 @@ class TesseractOcrCliModel(BaseOcrModel): # DEBUG code: if settings.debug.visualize_ocr: - self.draw_ocr_rects_and_cells(page, ocr_rects) + self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects) yield page diff --git a/docling/models/tesseract_ocr_model.py b/docling/models/tesseract_ocr_model.py index 7fab2eb2..42513239 100644 --- a/docling/models/tesseract_ocr_model.py +++ b/docling/models/tesseract_ocr_model.py @@ -137,6 +137,6 @@ class TesseractOcrModel(BaseOcrModel): # DEBUG code: if settings.debug.visualize_ocr: - self.draw_ocr_rects_and_cells(page, ocr_rects) + self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects) yield page