From 3a09ca50bbc0bb27fa52dddc7fd11260b1a6c5e4 Mon Sep 17 00:00:00 2001 From: Yusik Kim Date: Thu, 20 Mar 2025 16:59:35 +0100 Subject: [PATCH] fix: make sure page_items are sorted by page_no Signed-off-by: Yusik Kim --- docling/utils/doctags_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docling/utils/doctags_utils.py b/docling/utils/doctags_utils.py index e3e61866..7a5bffdf 100644 --- a/docling/utils/doctags_utils.py +++ b/docling/utils/doctags_utils.py @@ -38,6 +38,7 @@ def remove_doctags_content(doctags: str, images: list[PILImage.Image]) -> str: page_items[page_no].append(ser.serialize(item=item)) else: page_items[page_no] = [ser.serialize(item=item)] - pages = [ser.serialize_page(parts=parts) for parts in page_items.values()] + sorted_items = [page_items[key] for key in sorted(page_items.keys())] + pages = [ser.serialize_page(parts=parts) for parts in sorted_items] return ser.serialize_doc(pages=pages).text