mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-10 13:48:13 +00:00
fix: prov for merged-elems (#1728)
* fix: prov for merged-elems Signed-off-by: Peter Staar <taa@zurich.ibm.com> * reformatted the code Signed-off-by: Peter Staar <taa@zurich.ibm.com> * Reset pyproject.toml Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Fix tests Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: Peter Staar <taa@zurich.ibm.com> Signed-off-by: Christoph Auer <cau@zurich.ibm.com> Co-authored-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
committed by
GitHub
parent
e979750ce9
commit
6613b9e98b
@@ -334,12 +334,12 @@ class ReadingOrderModel:
|
||||
"Labels of merged elements must match."
|
||||
)
|
||||
prov = ProvenanceItem(
|
||||
page_no=element.page_no + 1,
|
||||
page_no=merged_elem.page_no + 1,
|
||||
charspan=(
|
||||
len(new_item.text) + 1,
|
||||
len(new_item.text) + 1 + len(merged_elem.text),
|
||||
),
|
||||
bbox=element.cluster.bbox.to_bottom_left_origin(page_height),
|
||||
bbox=merged_elem.cluster.bbox.to_bottom_left_origin(page_height),
|
||||
)
|
||||
new_item.text += f" {merged_elem.text}"
|
||||
new_item.orig += f" {merged_elem.text}" # TODO: This is incomplete, we don't have the `orig` field of the merged element.
|
||||
|
||||
Reference in New Issue
Block a user