mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 12:34:22 +00:00
Support Document Index as a layout class
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
parent
9b82ae3324
commit
dd8a0e9e44
@ -35,7 +35,7 @@ _log = logging.getLogger(__name__)
|
|||||||
|
|
||||||
layout_label_to_ds_type = {
|
layout_label_to_ds_type = {
|
||||||
"Title": "title",
|
"Title": "title",
|
||||||
"Document Index": "table-of-path_or_stream",
|
"Document Index": "table-of-contents",
|
||||||
"Section-header": "subtitle-level-1",
|
"Section-header": "subtitle-level-1",
|
||||||
"Checkbox-Selected": "checkbox-selected",
|
"Checkbox-Selected": "checkbox-selected",
|
||||||
"Checkbox-Unselected": "checkbox-unselected",
|
"Checkbox-Unselected": "checkbox-unselected",
|
||||||
|
@ -38,7 +38,7 @@ class LayoutModel:
|
|||||||
]
|
]
|
||||||
PAGE_HEADER_LABELS = ["Page-header", "Page-footer"]
|
PAGE_HEADER_LABELS = ["Page-header", "Page-footer"]
|
||||||
|
|
||||||
TABLE_LABEL = "Table"
|
TABLE_LABELS = ["Table", "Document Index"]
|
||||||
FIGURE_LABEL = "Picture"
|
FIGURE_LABEL = "Picture"
|
||||||
FORMULA_LABEL = "Formula"
|
FORMULA_LABEL = "Formula"
|
||||||
|
|
||||||
@ -70,7 +70,7 @@ class LayoutModel:
|
|||||||
"Key-Value Region": 0.45,
|
"Key-Value Region": 0.45,
|
||||||
}
|
}
|
||||||
|
|
||||||
CLASS_REMAPPINGS = {"Document Index": "Table", "Title": "Section-header"}
|
CLASS_REMAPPINGS = {"Title": "Section-header"}
|
||||||
|
|
||||||
_log.debug("================= Start postprocess function ====================")
|
_log.debug("================= Start postprocess function ====================")
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
@ -75,7 +75,7 @@ class PageAssembleModel:
|
|||||||
headers.append(text_el)
|
headers.append(text_el)
|
||||||
else:
|
else:
|
||||||
body.append(text_el)
|
body.append(text_el)
|
||||||
elif cluster.label == LayoutModel.TABLE_LABEL:
|
elif cluster.label in LayoutModel.TABLE_LABELS:
|
||||||
tbl = None
|
tbl = None
|
||||||
if page.predictions.tablestructure:
|
if page.predictions.tablestructure:
|
||||||
tbl = page.predictions.tablestructure.table_map.get(
|
tbl = page.predictions.tablestructure.table_map.get(
|
||||||
|
@ -85,7 +85,7 @@ class TableStructureModel:
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
for cluster in page.predictions.layout.clusters
|
for cluster in page.predictions.layout.clusters
|
||||||
if cluster.label == "Table"
|
if cluster.label in ["Table", "Document Index"]
|
||||||
]
|
]
|
||||||
if not len(in_tables):
|
if not len(in_tables):
|
||||||
yield page
|
yield page
|
||||||
@ -149,7 +149,7 @@ class TableStructureModel:
|
|||||||
id=table_cluster.id,
|
id=table_cluster.id,
|
||||||
page_no=page.page_no,
|
page_no=page.page_no,
|
||||||
cluster=table_cluster,
|
cluster=table_cluster,
|
||||||
label="Table",
|
label=table_cluster.label,
|
||||||
)
|
)
|
||||||
|
|
||||||
page.predictions.tablestructure.table_map[table_cluster.id] = tbl
|
page.predictions.tablestructure.table_map[table_cluster.id] = tbl
|
||||||
|
Loading…
Reference in New Issue
Block a user