mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-30 14:04:27 +00:00
Visualization codes output PNG to debug dir
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
parent
0cdccb3da1
commit
e1b83ec485
@ -36,7 +36,7 @@ class DebugSettings(BaseModel):
|
|||||||
profile_pipeline_timings: bool = False
|
profile_pipeline_timings: bool = False
|
||||||
|
|
||||||
# Path used to output debug information.
|
# Path used to output debug information.
|
||||||
debug_output_path: str = str(Path.cwd())
|
debug_output_path: str = str(Path.cwd() / "debug")
|
||||||
|
|
||||||
|
|
||||||
class AppSettings(BaseSettings):
|
class AppSettings(BaseSettings):
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
import copy
|
import copy
|
||||||
import logging
|
import logging
|
||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
|
from pathlib import Path
|
||||||
from typing import Iterable, List
|
from typing import Iterable, List
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@ -12,6 +13,7 @@ from scipy.ndimage import find_objects, label
|
|||||||
from docling.datamodel.base_models import OcrCell, Page
|
from docling.datamodel.base_models import OcrCell, Page
|
||||||
from docling.datamodel.document import ConversionResult
|
from docling.datamodel.document import ConversionResult
|
||||||
from docling.datamodel.pipeline_options import OcrOptions
|
from docling.datamodel.pipeline_options import OcrOptions
|
||||||
|
from docling.datamodel.settings import settings
|
||||||
from docling.models.base_model import BasePageModel
|
from docling.models.base_model import BasePageModel
|
||||||
|
|
||||||
_log = logging.getLogger(__name__)
|
_log = logging.getLogger(__name__)
|
||||||
@ -115,7 +117,7 @@ class BaseOcrModel(BasePageModel):
|
|||||||
]
|
]
|
||||||
return filtered_ocr_cells
|
return filtered_ocr_cells
|
||||||
|
|
||||||
def draw_ocr_rects_and_cells(self, page, ocr_rects):
|
def draw_ocr_rects_and_cells(self, conv_res, page, ocr_rects, show: bool = False):
|
||||||
image = copy.deepcopy(page.image)
|
image = copy.deepcopy(page.image)
|
||||||
draw = ImageDraw.Draw(image, "RGBA")
|
draw = ImageDraw.Draw(image, "RGBA")
|
||||||
|
|
||||||
@ -132,7 +134,18 @@ class BaseOcrModel(BasePageModel):
|
|||||||
if isinstance(tc, OcrCell):
|
if isinstance(tc, OcrCell):
|
||||||
color = "magenta"
|
color = "magenta"
|
||||||
draw.rectangle([(x0, y0), (x1, y1)], outline=color)
|
draw.rectangle([(x0, y0), (x1, y1)], outline=color)
|
||||||
image.show()
|
|
||||||
|
if show:
|
||||||
|
image.show()
|
||||||
|
else:
|
||||||
|
out_path: Path = (
|
||||||
|
Path(settings.debug.debug_output_path)
|
||||||
|
/ f"debug_{conv_res.input.file.stem}"
|
||||||
|
)
|
||||||
|
out_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
out_file = out_path / f"ocr_page_{page.page_no:05}.png"
|
||||||
|
image.save(str(out_file), format="png")
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def __call__(
|
def __call__(
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
import copy
|
import copy
|
||||||
import random
|
import random
|
||||||
|
from pathlib import Path
|
||||||
from typing import List, Union
|
from typing import List, Union
|
||||||
|
|
||||||
from deepsearch_glm.nlp_utils import init_nlp_model
|
from deepsearch_glm.nlp_utils import init_nlp_model
|
||||||
@ -27,6 +28,7 @@ from pydantic import BaseModel, ConfigDict
|
|||||||
|
|
||||||
from docling.datamodel.base_models import Cluster, FigureElement, Table, TextElement
|
from docling.datamodel.base_models import Cluster, FigureElement, Table, TextElement
|
||||||
from docling.datamodel.document import ConversionResult, layout_label_to_ds_type
|
from docling.datamodel.document import ConversionResult, layout_label_to_ds_type
|
||||||
|
from docling.datamodel.settings import settings
|
||||||
from docling.utils.profiling import ProfilingScope, TimeRecorder
|
from docling.utils.profiling import ProfilingScope, TimeRecorder
|
||||||
from docling.utils.utils import create_hash
|
from docling.utils.utils import create_hash
|
||||||
|
|
||||||
@ -236,15 +238,15 @@ class GlmModel:
|
|||||||
docling_doc: DoclingDocument = to_docling_document(glm_doc) # Experimental
|
docling_doc: DoclingDocument = to_docling_document(glm_doc) # Experimental
|
||||||
|
|
||||||
# DEBUG code:
|
# DEBUG code:
|
||||||
def draw_clusters_and_cells(ds_document, page_no):
|
def draw_clusters_and_cells(ds_document, page_no, show: bool = False):
|
||||||
clusters_to_draw = []
|
clusters_to_draw = []
|
||||||
image = copy.deepcopy(conv_res.pages[page_no].image)
|
image = copy.deepcopy(conv_res.pages[page_no].image)
|
||||||
for ix, elem in enumerate(ds_document.main_text):
|
for ix, elem in enumerate(ds_document.main_text):
|
||||||
if isinstance(elem, BaseText):
|
if isinstance(elem, BaseText):
|
||||||
prov = elem.prov[0]
|
prov = elem.prov[0] # type: ignore
|
||||||
elif isinstance(elem, Ref):
|
elif isinstance(elem, Ref):
|
||||||
_, arr, index = elem.ref.split("/")
|
_, arr, index = elem.ref.split("/")
|
||||||
index = int(index)
|
index = int(index) # type: ignore
|
||||||
if arr == "tables":
|
if arr == "tables":
|
||||||
prov = ds_document.tables[index].prov[0]
|
prov = ds_document.tables[index].prov[0]
|
||||||
elif arr == "figures":
|
elif arr == "figures":
|
||||||
@ -258,7 +260,7 @@ class GlmModel:
|
|||||||
id=ix,
|
id=ix,
|
||||||
label=elem.name,
|
label=elem.name,
|
||||||
bbox=BoundingBox.from_tuple(
|
bbox=BoundingBox.from_tuple(
|
||||||
coord=prov.bbox,
|
coord=prov.bbox, # type: ignore
|
||||||
origin=CoordOrigin.BOTTOMLEFT,
|
origin=CoordOrigin.BOTTOMLEFT,
|
||||||
).to_top_left_origin(conv_res.pages[page_no].size.height),
|
).to_top_left_origin(conv_res.pages[page_no].size.height),
|
||||||
)
|
)
|
||||||
@ -278,9 +280,21 @@ class GlmModel:
|
|||||||
for tc in c.cells: # [:1]:
|
for tc in c.cells: # [:1]:
|
||||||
x0, y0, x1, y1 = tc.bbox.as_tuple()
|
x0, y0, x1, y1 = tc.bbox.as_tuple()
|
||||||
draw.rectangle([(x0, y0), (x1, y1)], outline=cell_color)
|
draw.rectangle([(x0, y0), (x1, y1)], outline=cell_color)
|
||||||
image.show()
|
|
||||||
|
|
||||||
# draw_clusters_and_cells(ds_doc, 0)
|
if show:
|
||||||
# draw_clusters_and_cells(exported_doc, 0)
|
image.show()
|
||||||
|
else:
|
||||||
|
out_path: Path = (
|
||||||
|
Path(settings.debug.debug_output_path)
|
||||||
|
/ f"debug_{conv_res.input.file.stem}"
|
||||||
|
)
|
||||||
|
out_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
out_file = out_path / f"doc_page_{page_no:05}.png"
|
||||||
|
image.save(str(out_file), format="png")
|
||||||
|
|
||||||
|
# for item in ds_doc.page_dimensions:
|
||||||
|
# page_no = item.page
|
||||||
|
# draw_clusters_and_cells(ds_doc, page_no)
|
||||||
|
|
||||||
return docling_doc
|
return docling_doc
|
||||||
|
@ -96,6 +96,6 @@ class EasyOcrModel(BaseOcrModel):
|
|||||||
|
|
||||||
# DEBUG code:
|
# DEBUG code:
|
||||||
if settings.debug.visualize_ocr:
|
if settings.debug.visualize_ocr:
|
||||||
self.draw_ocr_rects_and_cells(page, ocr_rects)
|
self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects)
|
||||||
|
|
||||||
yield page
|
yield page
|
||||||
|
@ -324,7 +324,7 @@ class LayoutModel(BasePageModel):
|
|||||||
# clusters = self.sort_clusters_by_cell_order(clusters)
|
# clusters = self.sort_clusters_by_cell_order(clusters)
|
||||||
|
|
||||||
# DEBUG code:
|
# DEBUG code:
|
||||||
def draw_clusters_and_cells(show: bool = True):
|
def draw_clusters_and_cells(show: bool = False):
|
||||||
image = copy.deepcopy(page.image)
|
image = copy.deepcopy(page.image)
|
||||||
if image is not None:
|
if image is not None:
|
||||||
draw = ImageDraw.Draw(image)
|
draw = ImageDraw.Draw(image)
|
||||||
@ -344,6 +344,17 @@ class LayoutModel(BasePageModel):
|
|||||||
)
|
)
|
||||||
if show:
|
if show:
|
||||||
image.show()
|
image.show()
|
||||||
|
else:
|
||||||
|
out_path: Path = (
|
||||||
|
Path(settings.debug.debug_output_path)
|
||||||
|
/ f"debug_{conv_res.input.file.stem}"
|
||||||
|
)
|
||||||
|
out_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
out_file = (
|
||||||
|
out_path / f"layout_page_{page.page_no:05}.png"
|
||||||
|
)
|
||||||
|
image.save(str(out_file), format="png")
|
||||||
|
|
||||||
# draw_clusters_and_cells()
|
# draw_clusters_and_cells()
|
||||||
|
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
from pathlib import Path
|
||||||
from typing import Iterable, Optional
|
from typing import Iterable, Optional
|
||||||
|
|
||||||
from PIL import ImageDraw
|
from PIL import ImageDraw
|
||||||
@ -5,6 +6,7 @@ from pydantic import BaseModel
|
|||||||
|
|
||||||
from docling.datamodel.base_models import Page
|
from docling.datamodel.base_models import Page
|
||||||
from docling.datamodel.document import ConversionResult
|
from docling.datamodel.document import ConversionResult
|
||||||
|
from docling.datamodel.settings import settings
|
||||||
from docling.models.base_model import BasePageModel
|
from docling.models.base_model import BasePageModel
|
||||||
from docling.utils.profiling import TimeRecorder
|
from docling.utils.profiling import TimeRecorder
|
||||||
|
|
||||||
@ -27,7 +29,7 @@ class PagePreprocessingModel(BasePageModel):
|
|||||||
else:
|
else:
|
||||||
with TimeRecorder(conv_res, "page_parse"):
|
with TimeRecorder(conv_res, "page_parse"):
|
||||||
page = self._populate_page_images(page)
|
page = self._populate_page_images(page)
|
||||||
page = self._parse_page_cells(page)
|
page = self._parse_page_cells(conv_res, page)
|
||||||
yield page
|
yield page
|
||||||
|
|
||||||
# Generate the page image and store it in the page object
|
# Generate the page image and store it in the page object
|
||||||
@ -48,19 +50,30 @@ class PagePreprocessingModel(BasePageModel):
|
|||||||
return page
|
return page
|
||||||
|
|
||||||
# Extract and populate the page cells and store it in the page object
|
# Extract and populate the page cells and store it in the page object
|
||||||
def _parse_page_cells(self, page: Page) -> Page:
|
def _parse_page_cells(self, conv_res: ConversionResult, page: Page) -> Page:
|
||||||
assert page._backend is not None
|
assert page._backend is not None
|
||||||
|
|
||||||
page.cells = list(page._backend.get_text_cells())
|
page.cells = list(page._backend.get_text_cells())
|
||||||
|
|
||||||
# DEBUG code:
|
# DEBUG code:
|
||||||
def draw_text_boxes(image, cells):
|
def draw_text_boxes(image, cells, show: bool = False):
|
||||||
draw = ImageDraw.Draw(image)
|
draw = ImageDraw.Draw(image)
|
||||||
for c in cells:
|
for c in cells:
|
||||||
x0, y0, x1, y1 = c.bbox.as_tuple()
|
x0, y0, x1, y1 = c.bbox.as_tuple()
|
||||||
draw.rectangle([(x0, y0), (x1, y1)], outline="red")
|
draw.rectangle([(x0, y0), (x1, y1)], outline="red")
|
||||||
image.show()
|
if show:
|
||||||
|
image.show()
|
||||||
|
else:
|
||||||
|
out_path: Path = (
|
||||||
|
Path(settings.debug.debug_output_path)
|
||||||
|
/ f"debug_{conv_res.input.file.stem}"
|
||||||
|
)
|
||||||
|
out_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
# draw_text_boxes(page.get_image(scale=1.0), cells)
|
out_file = out_path / f"cells_page_{page.page_no:05}.png"
|
||||||
|
image.save(str(out_file), format="png")
|
||||||
|
|
||||||
|
if settings.debug.visualize_cells:
|
||||||
|
draw_text_boxes(page.get_image(scale=1.0), page.cells)
|
||||||
|
|
||||||
return page
|
return page
|
||||||
|
@ -38,7 +38,13 @@ class TableStructureModel(BasePageModel):
|
|||||||
self.tf_predictor = TFPredictor(self.tm_config)
|
self.tf_predictor = TFPredictor(self.tm_config)
|
||||||
self.scale = 2.0 # Scale up table input images to 144 dpi
|
self.scale = 2.0 # Scale up table input images to 144 dpi
|
||||||
|
|
||||||
def draw_table_and_cells(self, page: Page, tbl_list: Iterable[Table]):
|
def draw_table_and_cells(
|
||||||
|
self,
|
||||||
|
conv_res: ConversionResult,
|
||||||
|
page: Page,
|
||||||
|
tbl_list: Iterable[Table],
|
||||||
|
show: bool = False,
|
||||||
|
):
|
||||||
assert page._backend is not None
|
assert page._backend is not None
|
||||||
|
|
||||||
image = (
|
image = (
|
||||||
@ -64,7 +70,17 @@ class TableStructureModel(BasePageModel):
|
|||||||
fill="black",
|
fill="black",
|
||||||
)
|
)
|
||||||
|
|
||||||
image.show()
|
if show:
|
||||||
|
image.show()
|
||||||
|
else:
|
||||||
|
out_path: Path = (
|
||||||
|
Path(settings.debug.debug_output_path)
|
||||||
|
/ f"debug_{conv_res.input.file.stem}"
|
||||||
|
)
|
||||||
|
out_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
out_file = out_path / f"table_struct_page_{page.page_no:05}.png"
|
||||||
|
image.save(str(out_file), format="png")
|
||||||
|
|
||||||
def __call__(
|
def __call__(
|
||||||
self, conv_res: ConversionResult, page_batch: Iterable[Page]
|
self, conv_res: ConversionResult, page_batch: Iterable[Page]
|
||||||
@ -182,7 +198,9 @@ class TableStructureModel(BasePageModel):
|
|||||||
# For debugging purposes:
|
# For debugging purposes:
|
||||||
if settings.debug.visualize_tables:
|
if settings.debug.visualize_tables:
|
||||||
self.draw_table_and_cells(
|
self.draw_table_and_cells(
|
||||||
page, page.predictions.tablestructure.table_map.values()
|
conv_res,
|
||||||
|
page,
|
||||||
|
page.predictions.tablestructure.table_map.values(),
|
||||||
)
|
)
|
||||||
|
|
||||||
yield page
|
yield page
|
||||||
|
@ -179,6 +179,6 @@ class TesseractOcrCliModel(BaseOcrModel):
|
|||||||
|
|
||||||
# DEBUG code:
|
# DEBUG code:
|
||||||
if settings.debug.visualize_ocr:
|
if settings.debug.visualize_ocr:
|
||||||
self.draw_ocr_rects_and_cells(page, ocr_rects)
|
self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects)
|
||||||
|
|
||||||
yield page
|
yield page
|
||||||
|
@ -137,6 +137,6 @@ class TesseractOcrModel(BaseOcrModel):
|
|||||||
|
|
||||||
# DEBUG code:
|
# DEBUG code:
|
||||||
if settings.debug.visualize_ocr:
|
if settings.debug.visualize_ocr:
|
||||||
self.draw_ocr_rects_and_cells(page, ocr_rects)
|
self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects)
|
||||||
|
|
||||||
yield page
|
yield page
|
||||||
|
Loading…
Reference in New Issue
Block a user