Remove prints

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer
2025-08-14 20:48:34 +02:00
parent c4de11bdb3
commit 52b54b21c3

View File

@@ -3,34 +3,37 @@ from __future__ import annotations
from typing import Protocol
from bs4 import BeautifulSoup, Tag
from docling_core.types.doc.labels import DocItemLabel
from docling_core.types.doc.page import BoundingRectangle, TextCell
from docling_core.types.doc.utils import parse_otsl_table_content
from docling.backend.html_backend import HTMLDocumentBackend
from docling.datamodel.base_models import (
Cluster,
Page,
Table,
TableStructurePrediction,
VlmPrediction,
Cluster,
)
from docling_core.types.doc.labels import DocItemLabel
from docling_core.types.doc.utils import parse_otsl_table_content
from docling_core.types.doc.page import BoundingRectangle, TextCell
class VlmTaskInterpreter(Protocol):
def interpret(self, page: Page, cluster: Cluster, prediction: VlmPrediction) -> None:
...
def interpret(
self, page: Page, cluster: Cluster, prediction: VlmPrediction
) -> None: ...
class PlainTextInterpreter(VlmTaskInterpreter):
def interpret(self, page: Page, cluster: Cluster, prediction: VlmPrediction) -> None:
def interpret(
self, page: Page, cluster: Cluster, prediction: VlmPrediction
) -> None:
text = prediction.text.strip()
if not text:
return
# Attach as a single TextCell to the corresponding cluster
cluster.cells = [
TextCell(
index=0, # TODO: add index, could break stuff.
index=0, # TODO: add index, could break stuff.
text=text,
orig=text,
from_ocr=True,
@@ -40,7 +43,9 @@ class PlainTextInterpreter(VlmTaskInterpreter):
class HtmlTableInterpreter(VlmTaskInterpreter):
def interpret(self, page: Page, cluster: Cluster, prediction: VlmPrediction) -> None:
def interpret(
self, page: Page, cluster: Cluster, prediction: VlmPrediction
) -> None:
# Only process table-like clusters; otherwise, no-op
if cluster.label != DocItemLabel.TABLE:
return
@@ -56,7 +61,6 @@ class HtmlTableInterpreter(VlmTaskInterpreter):
soup = BeautifulSoup(html, "html.parser")
table_tag: Tag | None = soup.find("table") # type: ignore[assignment]
if table_tag is None:
return
@@ -88,8 +92,10 @@ class HtmlTableInterpreter(VlmTaskInterpreter):
class OtslTableInterpreter(VlmTaskInterpreter):
"""Interprets OTSL table predictions from VLM models."""
def interpret(self, page: Page, cluster: Cluster, prediction: VlmPrediction) -> None:
def interpret(
self, page: Page, cluster: Cluster, prediction: VlmPrediction
) -> None:
# Only process table-like clusters; otherwise, no-op
if cluster.label != DocItemLabel.TABLE:
return
@@ -100,7 +106,7 @@ class OtslTableInterpreter(VlmTaskInterpreter):
try:
data = parse_otsl_table_content(otsl_content)
except Exception as e:
except Exception:
return
# Create or update the TableStructurePrediction for this page
@@ -122,5 +128,3 @@ class OtslTableInterpreter(VlmTaskInterpreter):
)
page.predictions.tablestructure.table_map[cluster.id] = tbl