Support Document Index as a layout class

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer 2024-10-08 12:32:03 +02:00
parent 9b82ae3324
commit dd8a0e9e44
4 changed files with 6 additions and 6 deletions

View File

@ -35,7 +35,7 @@ _log = logging.getLogger(__name__)
layout_label_to_ds_type = {
"Title": "title",
"Document Index": "table-of-path_or_stream",
"Document Index": "table-of-contents",
"Section-header": "subtitle-level-1",
"Checkbox-Selected": "checkbox-selected",
"Checkbox-Unselected": "checkbox-unselected",

View File

@ -38,7 +38,7 @@ class LayoutModel:
]
PAGE_HEADER_LABELS = ["Page-header", "Page-footer"]
TABLE_LABEL = "Table"
TABLE_LABELS = ["Table", "Document Index"]
FIGURE_LABEL = "Picture"
FORMULA_LABEL = "Formula"
@ -70,7 +70,7 @@ class LayoutModel:
"Key-Value Region": 0.45,
}
CLASS_REMAPPINGS = {"Document Index": "Table", "Title": "Section-header"}
CLASS_REMAPPINGS = {"Title": "Section-header"}
_log.debug("================= Start postprocess function ====================")
start_time = time.time()

View File

@ -75,7 +75,7 @@ class PageAssembleModel:
headers.append(text_el)
else:
body.append(text_el)
elif cluster.label == LayoutModel.TABLE_LABEL:
elif cluster.label in LayoutModel.TABLE_LABELS:
tbl = None
if page.predictions.tablestructure:
tbl = page.predictions.tablestructure.table_map.get(

View File

@ -85,7 +85,7 @@ class TableStructureModel:
],
)
for cluster in page.predictions.layout.clusters
if cluster.label == "Table"
if cluster.label in ["Table", "Document Index"]
]
if not len(in_tables):
yield page
@ -149,7 +149,7 @@ class TableStructureModel:
id=table_cluster.id,
page_no=page.page_no,
cluster=table_cluster,
label="Table",
label=table_cluster.label,
)
page.predictions.tablestructure.table_map[table_cluster.id] = tbl