Add settings to turn visualization on or off

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer 2024-10-28 13:21:32 +01:00
parent 77a89c3334
commit 747a190b3a
7 changed files with 50 additions and 21 deletions

View File

@ -26,8 +26,16 @@ class BatchConcurrencySettings(BaseModel):
# To force models into single core: export OMP_NUM_THREADS=1 # To force models into single core: export OMP_NUM_THREADS=1
class DebugSettings(BaseModel):
visualize_cells: bool = False
visualize_ocr: bool = False
visualize_layout: bool = False
visualize_tables: bool = False
class AppSettings(BaseSettings): class AppSettings(BaseSettings):
perf: BatchConcurrencySettings perf: BatchConcurrencySettings
debug: DebugSettings
settings = AppSettings(perf=BatchConcurrencySettings()) settings = AppSettings(perf=BatchConcurrencySettings(), debug=DebugSettings())

View File

@ -6,6 +6,7 @@ from docling_core.types.doc import BoundingBox, CoordOrigin
from docling.datamodel.base_models import OcrCell, Page from docling.datamodel.base_models import OcrCell, Page
from docling.datamodel.pipeline_options import EasyOcrOptions from docling.datamodel.pipeline_options import EasyOcrOptions
from docling.datamodel.settings import settings
from docling.models.base_ocr_model import BaseOcrModel from docling.models.base_ocr_model import BaseOcrModel
_log = logging.getLogger(__name__) _log = logging.getLogger(__name__)
@ -85,6 +86,7 @@ class EasyOcrModel(BaseOcrModel):
page.cells.extend(filtered_ocr_cells) page.cells.extend(filtered_ocr_cells)
# DEBUG code: # DEBUG code:
# self.draw_ocr_rects_and_cells(page, ocr_rects) if settings.debug.visualize_ocr:
self.draw_ocr_rects_and_cells(page, ocr_rects)
yield page yield page

View File

@ -16,6 +16,7 @@ from docling.datamodel.base_models import (
LayoutPrediction, LayoutPrediction,
Page, Page,
) )
from docling.datamodel.settings import settings
from docling.models.base_model import BasePageModel from docling.models.base_model import BasePageModel
from docling.utils import layout_utils as lu from docling.utils import layout_utils as lu
@ -314,22 +315,24 @@ class LayoutModel(BasePageModel):
# clusters = self.sort_clusters_by_cell_order(clusters) # clusters = self.sort_clusters_by_cell_order(clusters)
# DEBUG code: # DEBUG code:
def draw_clusters_and_cells(): def draw_clusters_and_cells(show: bool = True):
image = copy.deepcopy(page.image) image = copy.deepcopy(page.image)
draw = ImageDraw.Draw(image) if image is not None:
for c in clusters: draw = ImageDraw.Draw(image)
x0, y0, x1, y1 = c.bbox.as_tuple() for c in clusters:
draw.rectangle([(x0, y0), (x1, y1)], outline="green") x0, y0, x1, y1 = c.bbox.as_tuple()
draw.rectangle([(x0, y0), (x1, y1)], outline="green")
cell_color = ( cell_color = (
random.randint(30, 140), random.randint(30, 140),
random.randint(30, 140), random.randint(30, 140),
random.randint(30, 140), random.randint(30, 140),
) )
for tc in c.cells: # [:1]: for tc in c.cells: # [:1]:
x0, y0, x1, y1 = tc.bbox.as_tuple() x0, y0, x1, y1 = tc.bbox.as_tuple()
draw.rectangle([(x0, y0), (x1, y1)], outline=cell_color) draw.rectangle([(x0, y0), (x1, y1)], outline=cell_color)
image.show() if show:
image.show()
# draw_clusters_and_cells() # draw_clusters_and_cells()
@ -337,7 +340,8 @@ class LayoutModel(BasePageModel):
clusters, page.cells, page.size.height clusters, page.cells, page.size.height
) )
# draw_clusters_and_cells() if settings.debug.visualize_layout:
draw_clusters_and_cells()
page.predictions.layout = LayoutPrediction(clusters=clusters) page.predictions.layout = LayoutPrediction(clusters=clusters)

View File

@ -9,6 +9,7 @@ from PIL import ImageDraw
from docling.datamodel.base_models import Page, Table, TableStructurePrediction from docling.datamodel.base_models import Page, Table, TableStructurePrediction
from docling.datamodel.pipeline_options import TableFormerMode, TableStructureOptions from docling.datamodel.pipeline_options import TableFormerMode, TableStructureOptions
from docling.datamodel.settings import settings
from docling.models.base_model import BasePageModel from docling.models.base_model import BasePageModel
@ -35,7 +36,7 @@ class TableStructureModel(BasePageModel):
self.tf_predictor = TFPredictor(self.tm_config) self.tf_predictor = TFPredictor(self.tm_config)
self.scale = 2.0 # Scale up table input images to 144 dpi self.scale = 2.0 # Scale up table input images to 144 dpi
def draw_table_and_cells(self, page: Page, tbl_list: List[Table]): def draw_table_and_cells(self, page: Page, tbl_list: Iterable[Table]):
assert page._backend is not None assert page._backend is not None
image = ( image = (
@ -166,6 +167,9 @@ class TableStructureModel(BasePageModel):
) )
# For debugging purposes: # For debugging purposes:
# self.draw_table_and_cells(page, page.predictions.tablestructure.table_map.values()) if settings.debug.visualize_tables:
self.draw_table_and_cells(
page, page.predictions.tablestructure.table_map.values()
)
yield page yield page

View File

@ -9,6 +9,7 @@ from docling_core.types.doc import BoundingBox, CoordOrigin
from docling.datamodel.base_models import OcrCell, Page from docling.datamodel.base_models import OcrCell, Page
from docling.datamodel.pipeline_options import TesseractCliOcrOptions from docling.datamodel.pipeline_options import TesseractCliOcrOptions
from docling.datamodel.settings import settings
from docling.models.base_ocr_model import BaseOcrModel from docling.models.base_ocr_model import BaseOcrModel
_log = logging.getLogger(__name__) _log = logging.getLogger(__name__)
@ -169,6 +170,7 @@ class TesseractOcrCliModel(BaseOcrModel):
page.cells.extend(filtered_ocr_cells) page.cells.extend(filtered_ocr_cells)
# DEBUG code: # DEBUG code:
# self.draw_ocr_rects_and_cells(page, ocr_rects) if settings.debug.visualize_ocr:
self.draw_ocr_rects_and_cells(page, ocr_rects)
yield page yield page

View File

@ -5,6 +5,7 @@ from docling_core.types.doc import BoundingBox, CoordOrigin
from docling.datamodel.base_models import OcrCell, Page from docling.datamodel.base_models import OcrCell, Page
from docling.datamodel.pipeline_options import TesseractOcrOptions from docling.datamodel.pipeline_options import TesseractOcrOptions
from docling.datamodel.settings import settings
from docling.models.base_ocr_model import BaseOcrModel from docling.models.base_ocr_model import BaseOcrModel
_log = logging.getLogger(__name__) _log = logging.getLogger(__name__)
@ -125,6 +126,7 @@ class TesseractOcrModel(BaseOcrModel):
page.cells.extend(filtered_ocr_cells) page.cells.extend(filtered_ocr_cells)
# DEBUG code: # DEBUG code:
# self.draw_ocr_rects_and_cells(page, ocr_rects) if settings.debug.visualize_ocr:
self.draw_ocr_rects_and_cells(page, ocr_rects)
yield page yield page

View File

@ -8,6 +8,7 @@ import yaml
from docling.datamodel.base_models import ConversionStatus from docling.datamodel.base_models import ConversionStatus
from docling.datamodel.document import ConversionResult from docling.datamodel.document import ConversionResult
from docling.datamodel.settings import settings
from docling.document_converter import DocumentConverter from docling.document_converter import DocumentConverter
_log = logging.getLogger(__name__) _log = logging.getLogger(__name__)
@ -113,6 +114,12 @@ def main():
# docs = [DocumentStream(name="my_doc.pdf", stream=buf)] # docs = [DocumentStream(name="my_doc.pdf", stream=buf)]
# input = DocumentConversionInput.from_streams(docs) # input = DocumentConversionInput.from_streams(docs)
# # Turn on inline debug visualizations:
# settings.debug.visualize_layout = True
# settings.debug.visualize_ocr = True
# settings.debug.visualize_tables = True
# settings.debug.visualize_cells = True
doc_converter = DocumentConverter() doc_converter = DocumentConverter()
start_time = time.time() start_time = time.time()