fix: prov for merged-elems

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
This commit is contained in:
Peter Staar 2025-06-07 07:31:54 +02:00
parent 9dbcb3d7d4
commit 6a02ec0f02
2 changed files with 5 additions and 4 deletions

View File

@ -334,17 +334,17 @@ class ReadingOrderModel:
"Labels of merged elements must match." "Labels of merged elements must match."
) )
prov = ProvenanceItem( prov = ProvenanceItem(
page_no=element.page_no + 1, page_no=merged_elem.page_no + 1,
charspan=( charspan=(
len(new_item.text) + 1, len(new_item.text) + 1,
len(new_item.text) + 1 + len(merged_elem.text), len(new_item.text) + 1 + len(merged_elem.text),
), ),
bbox=element.cluster.bbox.to_bottom_left_origin(page_height), bbox=merged_elem.cluster.bbox.to_bottom_left_origin(page_height),
) )
new_item.text += f" {merged_elem.text}" new_item.text += f" {merged_elem.text}"
new_item.orig += f" {merged_elem.text}" # TODO: This is incomplete, we don't have the `orig` field of the merged element. new_item.orig += f" {merged_elem.text}" # TODO: This is incomplete, we don't have the `orig` field of the merged element.
new_item.prov.append(prov) new_item.prov.append(prov)
def __call__(self, conv_res: ConversionResult) -> DoclingDocument: def __call__(self, conv_res: ConversionResult) -> DoclingDocument:
with TimeRecorder(conv_res, "reading_order", scope=ProfilingScope.DOCUMENT): with TimeRecorder(conv_res, "reading_order", scope=ProfilingScope.DOCUMENT):
page_elements = self._assembled_to_readingorder_elements(conv_res) page_elements = self._assembled_to_readingorder_elements(conv_res)

View File

@ -143,7 +143,8 @@ constraints = [
[tool.uv] [tool.uv]
package = true package = true
default-groups = "all" # default-groups = ["all"]
default-groups = ["dev", "docs", "examples"]
[tool.setuptools.packages.find] [tool.setuptools.packages.find]
include = ["docling*"] include = ["docling*"]