mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 20:14:47 +00:00
fix: prov for merged-elems
Signed-off-by: Peter Staar <taa@zurich.ibm.com>
This commit is contained in:
parent
9dbcb3d7d4
commit
6a02ec0f02
@ -334,17 +334,17 @@ class ReadingOrderModel:
|
|||||||
"Labels of merged elements must match."
|
"Labels of merged elements must match."
|
||||||
)
|
)
|
||||||
prov = ProvenanceItem(
|
prov = ProvenanceItem(
|
||||||
page_no=element.page_no + 1,
|
page_no=merged_elem.page_no + 1,
|
||||||
charspan=(
|
charspan=(
|
||||||
len(new_item.text) + 1,
|
len(new_item.text) + 1,
|
||||||
len(new_item.text) + 1 + len(merged_elem.text),
|
len(new_item.text) + 1 + len(merged_elem.text),
|
||||||
),
|
),
|
||||||
bbox=element.cluster.bbox.to_bottom_left_origin(page_height),
|
bbox=merged_elem.cluster.bbox.to_bottom_left_origin(page_height),
|
||||||
)
|
)
|
||||||
new_item.text += f" {merged_elem.text}"
|
new_item.text += f" {merged_elem.text}"
|
||||||
new_item.orig += f" {merged_elem.text}" # TODO: This is incomplete, we don't have the `orig` field of the merged element.
|
new_item.orig += f" {merged_elem.text}" # TODO: This is incomplete, we don't have the `orig` field of the merged element.
|
||||||
new_item.prov.append(prov)
|
new_item.prov.append(prov)
|
||||||
|
|
||||||
def __call__(self, conv_res: ConversionResult) -> DoclingDocument:
|
def __call__(self, conv_res: ConversionResult) -> DoclingDocument:
|
||||||
with TimeRecorder(conv_res, "reading_order", scope=ProfilingScope.DOCUMENT):
|
with TimeRecorder(conv_res, "reading_order", scope=ProfilingScope.DOCUMENT):
|
||||||
page_elements = self._assembled_to_readingorder_elements(conv_res)
|
page_elements = self._assembled_to_readingorder_elements(conv_res)
|
||||||
|
@ -143,7 +143,8 @@ constraints = [
|
|||||||
|
|
||||||
[tool.uv]
|
[tool.uv]
|
||||||
package = true
|
package = true
|
||||||
default-groups = "all"
|
# default-groups = ["all"]
|
||||||
|
default-groups = ["dev", "docs", "examples"]
|
||||||
|
|
||||||
[tool.setuptools.packages.find]
|
[tool.setuptools.packages.find]
|
||||||
include = ["docling*"]
|
include = ["docling*"]
|
||||||
|
Loading…
Reference in New Issue
Block a user