mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-29 21:44:32 +00:00
Add settings to turn visualization on or off
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
parent
77a89c3334
commit
747a190b3a
@ -26,8 +26,16 @@ class BatchConcurrencySettings(BaseModel):
|
||||
# To force models into single core: export OMP_NUM_THREADS=1
|
||||
|
||||
|
||||
class DebugSettings(BaseModel):
|
||||
visualize_cells: bool = False
|
||||
visualize_ocr: bool = False
|
||||
visualize_layout: bool = False
|
||||
visualize_tables: bool = False
|
||||
|
||||
|
||||
class AppSettings(BaseSettings):
|
||||
perf: BatchConcurrencySettings
|
||||
debug: DebugSettings
|
||||
|
||||
|
||||
settings = AppSettings(perf=BatchConcurrencySettings())
|
||||
settings = AppSettings(perf=BatchConcurrencySettings(), debug=DebugSettings())
|
||||
|
@ -6,6 +6,7 @@ from docling_core.types.doc import BoundingBox, CoordOrigin
|
||||
|
||||
from docling.datamodel.base_models import OcrCell, Page
|
||||
from docling.datamodel.pipeline_options import EasyOcrOptions
|
||||
from docling.datamodel.settings import settings
|
||||
from docling.models.base_ocr_model import BaseOcrModel
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
@ -85,6 +86,7 @@ class EasyOcrModel(BaseOcrModel):
|
||||
page.cells.extend(filtered_ocr_cells)
|
||||
|
||||
# DEBUG code:
|
||||
# self.draw_ocr_rects_and_cells(page, ocr_rects)
|
||||
if settings.debug.visualize_ocr:
|
||||
self.draw_ocr_rects_and_cells(page, ocr_rects)
|
||||
|
||||
yield page
|
||||
|
@ -16,6 +16,7 @@ from docling.datamodel.base_models import (
|
||||
LayoutPrediction,
|
||||
Page,
|
||||
)
|
||||
from docling.datamodel.settings import settings
|
||||
from docling.models.base_model import BasePageModel
|
||||
from docling.utils import layout_utils as lu
|
||||
|
||||
@ -314,22 +315,24 @@ class LayoutModel(BasePageModel):
|
||||
# clusters = self.sort_clusters_by_cell_order(clusters)
|
||||
|
||||
# DEBUG code:
|
||||
def draw_clusters_and_cells():
|
||||
def draw_clusters_and_cells(show: bool = True):
|
||||
image = copy.deepcopy(page.image)
|
||||
draw = ImageDraw.Draw(image)
|
||||
for c in clusters:
|
||||
x0, y0, x1, y1 = c.bbox.as_tuple()
|
||||
draw.rectangle([(x0, y0), (x1, y1)], outline="green")
|
||||
if image is not None:
|
||||
draw = ImageDraw.Draw(image)
|
||||
for c in clusters:
|
||||
x0, y0, x1, y1 = c.bbox.as_tuple()
|
||||
draw.rectangle([(x0, y0), (x1, y1)], outline="green")
|
||||
|
||||
cell_color = (
|
||||
random.randint(30, 140),
|
||||
random.randint(30, 140),
|
||||
random.randint(30, 140),
|
||||
)
|
||||
for tc in c.cells: # [:1]:
|
||||
x0, y0, x1, y1 = tc.bbox.as_tuple()
|
||||
draw.rectangle([(x0, y0), (x1, y1)], outline=cell_color)
|
||||
image.show()
|
||||
cell_color = (
|
||||
random.randint(30, 140),
|
||||
random.randint(30, 140),
|
||||
random.randint(30, 140),
|
||||
)
|
||||
for tc in c.cells: # [:1]:
|
||||
x0, y0, x1, y1 = tc.bbox.as_tuple()
|
||||
draw.rectangle([(x0, y0), (x1, y1)], outline=cell_color)
|
||||
if show:
|
||||
image.show()
|
||||
|
||||
# draw_clusters_and_cells()
|
||||
|
||||
@ -337,7 +340,8 @@ class LayoutModel(BasePageModel):
|
||||
clusters, page.cells, page.size.height
|
||||
)
|
||||
|
||||
# draw_clusters_and_cells()
|
||||
if settings.debug.visualize_layout:
|
||||
draw_clusters_and_cells()
|
||||
|
||||
page.predictions.layout = LayoutPrediction(clusters=clusters)
|
||||
|
||||
|
@ -9,6 +9,7 @@ from PIL import ImageDraw
|
||||
|
||||
from docling.datamodel.base_models import Page, Table, TableStructurePrediction
|
||||
from docling.datamodel.pipeline_options import TableFormerMode, TableStructureOptions
|
||||
from docling.datamodel.settings import settings
|
||||
from docling.models.base_model import BasePageModel
|
||||
|
||||
|
||||
@ -35,7 +36,7 @@ class TableStructureModel(BasePageModel):
|
||||
self.tf_predictor = TFPredictor(self.tm_config)
|
||||
self.scale = 2.0 # Scale up table input images to 144 dpi
|
||||
|
||||
def draw_table_and_cells(self, page: Page, tbl_list: List[Table]):
|
||||
def draw_table_and_cells(self, page: Page, tbl_list: Iterable[Table]):
|
||||
assert page._backend is not None
|
||||
|
||||
image = (
|
||||
@ -166,6 +167,9 @@ class TableStructureModel(BasePageModel):
|
||||
)
|
||||
|
||||
# For debugging purposes:
|
||||
# self.draw_table_and_cells(page, page.predictions.tablestructure.table_map.values())
|
||||
if settings.debug.visualize_tables:
|
||||
self.draw_table_and_cells(
|
||||
page, page.predictions.tablestructure.table_map.values()
|
||||
)
|
||||
|
||||
yield page
|
||||
|
@ -9,6 +9,7 @@ from docling_core.types.doc import BoundingBox, CoordOrigin
|
||||
|
||||
from docling.datamodel.base_models import OcrCell, Page
|
||||
from docling.datamodel.pipeline_options import TesseractCliOcrOptions
|
||||
from docling.datamodel.settings import settings
|
||||
from docling.models.base_ocr_model import BaseOcrModel
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
@ -169,6 +170,7 @@ class TesseractOcrCliModel(BaseOcrModel):
|
||||
page.cells.extend(filtered_ocr_cells)
|
||||
|
||||
# DEBUG code:
|
||||
# self.draw_ocr_rects_and_cells(page, ocr_rects)
|
||||
if settings.debug.visualize_ocr:
|
||||
self.draw_ocr_rects_and_cells(page, ocr_rects)
|
||||
|
||||
yield page
|
||||
|
@ -5,6 +5,7 @@ from docling_core.types.doc import BoundingBox, CoordOrigin
|
||||
|
||||
from docling.datamodel.base_models import OcrCell, Page
|
||||
from docling.datamodel.pipeline_options import TesseractOcrOptions
|
||||
from docling.datamodel.settings import settings
|
||||
from docling.models.base_ocr_model import BaseOcrModel
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
@ -125,6 +126,7 @@ class TesseractOcrModel(BaseOcrModel):
|
||||
page.cells.extend(filtered_ocr_cells)
|
||||
|
||||
# DEBUG code:
|
||||
# self.draw_ocr_rects_and_cells(page, ocr_rects)
|
||||
if settings.debug.visualize_ocr:
|
||||
self.draw_ocr_rects_and_cells(page, ocr_rects)
|
||||
|
||||
yield page
|
||||
|
@ -8,6 +8,7 @@ import yaml
|
||||
|
||||
from docling.datamodel.base_models import ConversionStatus
|
||||
from docling.datamodel.document import ConversionResult
|
||||
from docling.datamodel.settings import settings
|
||||
from docling.document_converter import DocumentConverter
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
@ -113,6 +114,12 @@ def main():
|
||||
# docs = [DocumentStream(name="my_doc.pdf", stream=buf)]
|
||||
# input = DocumentConversionInput.from_streams(docs)
|
||||
|
||||
# # Turn on inline debug visualizations:
|
||||
# settings.debug.visualize_layout = True
|
||||
# settings.debug.visualize_ocr = True
|
||||
# settings.debug.visualize_tables = True
|
||||
# settings.debug.visualize_cells = True
|
||||
|
||||
doc_converter = DocumentConverter()
|
||||
|
||||
start_time = time.time()
|
||||
|
Loading…
Reference in New Issue
Block a user