mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 12:48:28 +00:00
@@ -3,34 +3,37 @@ from __future__ import annotations
|
||||
from typing import Protocol
|
||||
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
from docling_core.types.doc.labels import DocItemLabel
|
||||
from docling_core.types.doc.page import BoundingRectangle, TextCell
|
||||
from docling_core.types.doc.utils import parse_otsl_table_content
|
||||
|
||||
from docling.backend.html_backend import HTMLDocumentBackend
|
||||
from docling.datamodel.base_models import (
|
||||
Cluster,
|
||||
Page,
|
||||
Table,
|
||||
TableStructurePrediction,
|
||||
VlmPrediction,
|
||||
Cluster,
|
||||
)
|
||||
from docling_core.types.doc.labels import DocItemLabel
|
||||
from docling_core.types.doc.utils import parse_otsl_table_content
|
||||
from docling_core.types.doc.page import BoundingRectangle, TextCell
|
||||
|
||||
|
||||
class VlmTaskInterpreter(Protocol):
|
||||
def interpret(self, page: Page, cluster: Cluster, prediction: VlmPrediction) -> None:
|
||||
...
|
||||
def interpret(
|
||||
self, page: Page, cluster: Cluster, prediction: VlmPrediction
|
||||
) -> None: ...
|
||||
|
||||
|
||||
class PlainTextInterpreter(VlmTaskInterpreter):
|
||||
def interpret(self, page: Page, cluster: Cluster, prediction: VlmPrediction) -> None:
|
||||
def interpret(
|
||||
self, page: Page, cluster: Cluster, prediction: VlmPrediction
|
||||
) -> None:
|
||||
text = prediction.text.strip()
|
||||
if not text:
|
||||
return
|
||||
# Attach as a single TextCell to the corresponding cluster
|
||||
cluster.cells = [
|
||||
TextCell(
|
||||
index=0, # TODO: add index, could break stuff.
|
||||
index=0, # TODO: add index, could break stuff.
|
||||
text=text,
|
||||
orig=text,
|
||||
from_ocr=True,
|
||||
@@ -40,7 +43,9 @@ class PlainTextInterpreter(VlmTaskInterpreter):
|
||||
|
||||
|
||||
class HtmlTableInterpreter(VlmTaskInterpreter):
|
||||
def interpret(self, page: Page, cluster: Cluster, prediction: VlmPrediction) -> None:
|
||||
def interpret(
|
||||
self, page: Page, cluster: Cluster, prediction: VlmPrediction
|
||||
) -> None:
|
||||
# Only process table-like clusters; otherwise, no-op
|
||||
if cluster.label != DocItemLabel.TABLE:
|
||||
return
|
||||
@@ -56,7 +61,6 @@ class HtmlTableInterpreter(VlmTaskInterpreter):
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
table_tag: Tag | None = soup.find("table") # type: ignore[assignment]
|
||||
|
||||
|
||||
if table_tag is None:
|
||||
return
|
||||
|
||||
@@ -88,8 +92,10 @@ class HtmlTableInterpreter(VlmTaskInterpreter):
|
||||
|
||||
class OtslTableInterpreter(VlmTaskInterpreter):
|
||||
"""Interprets OTSL table predictions from VLM models."""
|
||||
|
||||
def interpret(self, page: Page, cluster: Cluster, prediction: VlmPrediction) -> None:
|
||||
|
||||
def interpret(
|
||||
self, page: Page, cluster: Cluster, prediction: VlmPrediction
|
||||
) -> None:
|
||||
# Only process table-like clusters; otherwise, no-op
|
||||
if cluster.label != DocItemLabel.TABLE:
|
||||
return
|
||||
@@ -100,7 +106,7 @@ class OtslTableInterpreter(VlmTaskInterpreter):
|
||||
|
||||
try:
|
||||
data = parse_otsl_table_content(otsl_content)
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
return
|
||||
|
||||
# Create or update the TableStructurePrediction for this page
|
||||
@@ -122,5 +128,3 @@ class OtslTableInterpreter(VlmTaskInterpreter):
|
||||
)
|
||||
|
||||
page.predictions.tablestructure.table_map[cluster.id] = tbl
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user