diff --git a/docling/datamodel/document.py b/docling/datamodel/document.py index 9dd71e4a..97c2d71f 100644 --- a/docling/datamodel/document.py +++ b/docling/datamodel/document.py @@ -35,7 +35,7 @@ _log = logging.getLogger(__name__) layout_label_to_ds_type = { "Title": "title", - "Document Index": "table-of-path_or_stream", + "Document Index": "table-of-contents", "Section-header": "subtitle-level-1", "Checkbox-Selected": "checkbox-selected", "Checkbox-Unselected": "checkbox-unselected", diff --git a/docling/models/layout_model.py b/docling/models/layout_model.py index 2ca20a42..3c681a85 100644 --- a/docling/models/layout_model.py +++ b/docling/models/layout_model.py @@ -38,7 +38,7 @@ class LayoutModel: ] PAGE_HEADER_LABELS = ["Page-header", "Page-footer"] - TABLE_LABEL = "Table" + TABLE_LABELS = ["Table", "Document Index"] FIGURE_LABEL = "Picture" FORMULA_LABEL = "Formula" @@ -70,7 +70,7 @@ class LayoutModel: "Key-Value Region": 0.45, } - CLASS_REMAPPINGS = {"Document Index": "Table", "Title": "Section-header"} + CLASS_REMAPPINGS = {"Title": "Section-header"} _log.debug("================= Start postprocess function ====================") start_time = time.time() diff --git a/docling/models/page_assemble_model.py b/docling/models/page_assemble_model.py index 2b9db544..8f84ebb7 100644 --- a/docling/models/page_assemble_model.py +++ b/docling/models/page_assemble_model.py @@ -75,7 +75,7 @@ class PageAssembleModel: headers.append(text_el) else: body.append(text_el) - elif cluster.label == LayoutModel.TABLE_LABEL: + elif cluster.label in LayoutModel.TABLE_LABELS: tbl = None if page.predictions.tablestructure: tbl = page.predictions.tablestructure.table_map.get( diff --git a/docling/models/table_structure_model.py b/docling/models/table_structure_model.py index f722c13a..b0431024 100644 --- a/docling/models/table_structure_model.py +++ b/docling/models/table_structure_model.py @@ -85,7 +85,7 @@ class TableStructureModel: ], ) for cluster in page.predictions.layout.clusters - if cluster.label == "Table" + if cluster.label in ["Table", "Document Index"] ] if not len(in_tables): yield page @@ -149,7 +149,7 @@ class TableStructureModel: id=table_cluster.id, page_no=page.page_no, cluster=table_cluster, - label="Table", + label=table_cluster.label, ) page.predictions.tablestructure.table_map[table_cluster.id] = tbl