Support Document Index as a layout class

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer 2024-10-08 12:32:03 +02:00
parent 9b82ae3324
commit dd8a0e9e44
4 changed files with 6 additions and 6 deletions

View File

@ -35,7 +35,7 @@ _log = logging.getLogger(__name__)
layout_label_to_ds_type = { layout_label_to_ds_type = {
"Title": "title", "Title": "title",
"Document Index": "table-of-path_or_stream", "Document Index": "table-of-contents",
"Section-header": "subtitle-level-1", "Section-header": "subtitle-level-1",
"Checkbox-Selected": "checkbox-selected", "Checkbox-Selected": "checkbox-selected",
"Checkbox-Unselected": "checkbox-unselected", "Checkbox-Unselected": "checkbox-unselected",

View File

@ -38,7 +38,7 @@ class LayoutModel:
] ]
PAGE_HEADER_LABELS = ["Page-header", "Page-footer"] PAGE_HEADER_LABELS = ["Page-header", "Page-footer"]
TABLE_LABEL = "Table" TABLE_LABELS = ["Table", "Document Index"]
FIGURE_LABEL = "Picture" FIGURE_LABEL = "Picture"
FORMULA_LABEL = "Formula" FORMULA_LABEL = "Formula"
@ -70,7 +70,7 @@ class LayoutModel:
"Key-Value Region": 0.45, "Key-Value Region": 0.45,
} }
CLASS_REMAPPINGS = {"Document Index": "Table", "Title": "Section-header"} CLASS_REMAPPINGS = {"Title": "Section-header"}
_log.debug("================= Start postprocess function ====================") _log.debug("================= Start postprocess function ====================")
start_time = time.time() start_time = time.time()

View File

@ -75,7 +75,7 @@ class PageAssembleModel:
headers.append(text_el) headers.append(text_el)
else: else:
body.append(text_el) body.append(text_el)
elif cluster.label == LayoutModel.TABLE_LABEL: elif cluster.label in LayoutModel.TABLE_LABELS:
tbl = None tbl = None
if page.predictions.tablestructure: if page.predictions.tablestructure:
tbl = page.predictions.tablestructure.table_map.get( tbl = page.predictions.tablestructure.table_map.get(

View File

@ -85,7 +85,7 @@ class TableStructureModel:
], ],
) )
for cluster in page.predictions.layout.clusters for cluster in page.predictions.layout.clusters
if cluster.label == "Table" if cluster.label in ["Table", "Document Index"]
] ]
if not len(in_tables): if not len(in_tables):
yield page yield page
@ -149,7 +149,7 @@ class TableStructureModel:
id=table_cluster.id, id=table_cluster.id,
page_no=page.page_no, page_no=page.page_no,
cluster=table_cluster, cluster=table_cluster,
label="Table", label=table_cluster.label,
) )
page.predictions.tablestructure.table_map[table_cluster.id] = tbl page.predictions.tablestructure.table_map[table_cluster.id] = tbl