From 6a02ec0f024ff3e8c88eda76c04d2d6f027e25de Mon Sep 17 00:00:00 2001 From: Peter Staar Date: Sat, 7 Jun 2025 07:31:54 +0200 Subject: [PATCH] fix: prov for merged-elems Signed-off-by: Peter Staar --- docling/models/readingorder_model.py | 6 +++--- pyproject.toml | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/docling/models/readingorder_model.py b/docling/models/readingorder_model.py index 8ec92785..b9e154a9 100644 --- a/docling/models/readingorder_model.py +++ b/docling/models/readingorder_model.py @@ -334,17 +334,17 @@ class ReadingOrderModel: "Labels of merged elements must match." ) prov = ProvenanceItem( - page_no=element.page_no + 1, + page_no=merged_elem.page_no + 1, charspan=( len(new_item.text) + 1, len(new_item.text) + 1 + len(merged_elem.text), ), - bbox=element.cluster.bbox.to_bottom_left_origin(page_height), + bbox=merged_elem.cluster.bbox.to_bottom_left_origin(page_height), ) new_item.text += f" {merged_elem.text}" new_item.orig += f" {merged_elem.text}" # TODO: This is incomplete, we don't have the `orig` field of the merged element. new_item.prov.append(prov) - + def __call__(self, conv_res: ConversionResult) -> DoclingDocument: with TimeRecorder(conv_res, "reading_order", scope=ProfilingScope.DOCUMENT): page_elements = self._assembled_to_readingorder_elements(conv_res) diff --git a/pyproject.toml b/pyproject.toml index ce9b9c90..d880ea29 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -143,7 +143,8 @@ constraints = [ [tool.uv] package = true -default-groups = "all" +# default-groups = ["all"] +default-groups = ["dev", "docs", "examples"] [tool.setuptools.packages.find] include = ["docling*"]