mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 04:24:45 +00:00
Support Document Index as a layout class
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
parent
9b82ae3324
commit
dd8a0e9e44
@ -35,7 +35,7 @@ _log = logging.getLogger(__name__)
|
||||
|
||||
layout_label_to_ds_type = {
|
||||
"Title": "title",
|
||||
"Document Index": "table-of-path_or_stream",
|
||||
"Document Index": "table-of-contents",
|
||||
"Section-header": "subtitle-level-1",
|
||||
"Checkbox-Selected": "checkbox-selected",
|
||||
"Checkbox-Unselected": "checkbox-unselected",
|
||||
|
@ -38,7 +38,7 @@ class LayoutModel:
|
||||
]
|
||||
PAGE_HEADER_LABELS = ["Page-header", "Page-footer"]
|
||||
|
||||
TABLE_LABEL = "Table"
|
||||
TABLE_LABELS = ["Table", "Document Index"]
|
||||
FIGURE_LABEL = "Picture"
|
||||
FORMULA_LABEL = "Formula"
|
||||
|
||||
@ -70,7 +70,7 @@ class LayoutModel:
|
||||
"Key-Value Region": 0.45,
|
||||
}
|
||||
|
||||
CLASS_REMAPPINGS = {"Document Index": "Table", "Title": "Section-header"}
|
||||
CLASS_REMAPPINGS = {"Title": "Section-header"}
|
||||
|
||||
_log.debug("================= Start postprocess function ====================")
|
||||
start_time = time.time()
|
||||
|
@ -75,7 +75,7 @@ class PageAssembleModel:
|
||||
headers.append(text_el)
|
||||
else:
|
||||
body.append(text_el)
|
||||
elif cluster.label == LayoutModel.TABLE_LABEL:
|
||||
elif cluster.label in LayoutModel.TABLE_LABELS:
|
||||
tbl = None
|
||||
if page.predictions.tablestructure:
|
||||
tbl = page.predictions.tablestructure.table_map.get(
|
||||
|
@ -85,7 +85,7 @@ class TableStructureModel:
|
||||
],
|
||||
)
|
||||
for cluster in page.predictions.layout.clusters
|
||||
if cluster.label == "Table"
|
||||
if cluster.label in ["Table", "Document Index"]
|
||||
]
|
||||
if not len(in_tables):
|
||||
yield page
|
||||
@ -149,7 +149,7 @@ class TableStructureModel:
|
||||
id=table_cluster.id,
|
||||
page_no=page.page_no,
|
||||
cluster=table_cluster,
|
||||
label="Table",
|
||||
label=table_cluster.label,
|
||||
)
|
||||
|
||||
page.predictions.tablestructure.table_map[table_cluster.id] = tbl
|
||||
|
Loading…
Reference in New Issue
Block a user