diff --git a/docling/models/layout_model.py b/docling/models/layout_model.py
index ae373012..c607a237 100644
--- a/docling/models/layout_model.py
+++ b/docling/models/layout_model.py
@@ -16,6 +16,7 @@ from docling.datamodel.settings import settings
from docling.models.base_model import BasePageModel
from docling.utils.accelerator_utils import decide_device
from docling.utils.layout_postprocessor import LayoutPostprocessor
+from docling.utils.orientation import detect_orientation
from docling.utils.profiling import TimeRecorder
from docling.utils.visualization import draw_clusters
@@ -152,7 +153,9 @@ class LayoutModel(BasePageModel):
assert page.size is not None
page_image = page.get_image(scale=1.0)
assert page_image is not None
-
+ page_orientation = detect_orientation(page.cells)
+ if page_orientation:
+ page_image = page_image.rotate(-page_orientation, expand=True)
clusters = []
for ix, pred_item in enumerate(
self.layout_predictor.predict(page_image)
diff --git a/docling/models/tesseract_ocr_cli_model.py b/docling/models/tesseract_ocr_cli_model.py
index 48d6da4a..28d197ad 100644
--- a/docling/models/tesseract_ocr_cli_model.py
+++ b/docling/models/tesseract_ocr_cli_model.py
@@ -266,7 +266,6 @@ class TesseractOcrCliModel(BaseOcrModel):
orientation=doc_orientation,
rotated_image_size=high_res_image.size,
)
-
cell = TextCell(
index=ix,
text=str(text),
diff --git a/docling/utils/ocr_utils.py b/docling/utils/ocr_utils.py
index 66e96c5f..48d1755d 100644
--- a/docling/utils/ocr_utils.py
+++ b/docling/utils/ocr_utils.py
@@ -24,22 +24,27 @@ def map_tesseract_script(script: str) -> str:
def reverse_tesseract_preprocessing_rotation(
box: Box, orientation: int, rotated_im_size: Size
) -> tuple[Point, Point, Point, Point]:
+ # The box is left top width height in TOPLEFT coordinates
+ # Bounding rectangle start with r_0 at the bottom left whatever the
+ # coordinate system. Then other corners are found rotating counterclockwise
l, t, w, h = box
- rotated_w, rotated_h = rotated_im_size
+ rotated_im_w, rotated_im_h = rotated_im_size
if orientation == 0:
- return (l, t), (l + w, t), (l + w, t + h), (l, t + h)
+ r0_x = l
+ r0_y = t + h
+ return (r0_x, r0_y), (r0_x + w, r0_y), (r0_x + w, r0_y - h), (r0_x, r0_y - h)
if orientation == 90:
- x0 = rotated_h - t
- y0 = l
- return (x0, y0), (x0, y0 + w), (x0 - h, y0 + w), (x0 - h, y0)
+ r0_x = rotated_im_h - (t + h)
+ r0_y = l
+ return (r0_x, r0_y), (r0_x, r0_y + w), (r0_x + h, r0_y + w), (r0_x, r0_y + w)
if orientation == 180:
- x0 = rotated_w - l
- y0 = rotated_h - t
- return (x0, y0), (x0 - w, y0), (x0 - w, y0 - h), (x0, y0 - h)
+ r0_x = rotated_im_w - l
+ r0_y = rotated_im_h - (t + h)
+ return (r0_x, r0_y), (r0_x - w, r0_y), (r0_x - w, r0_y + h), (r0_x, r0_y + h)
if orientation == 270:
- x0 = t
- y0 = rotated_w - l
- return (x0, y0), (x0, y0 - w), (x0 + h, y0 - w), (x0 + h, y0)
+ r0_x = t + h
+ r0_y = rotated_im_w - l
+ return (r0_x, r0_y), (r0_x, r0_y - w), (r0_x - h, r0_y - w), (r0_x - h, r0_y)
msg = (
f"invalid tesseract document orientation {orientation}, "
f"expected orientation: {sorted(_TESSERACT_ORIENTATIONS)}"
diff --git a/docling/utils/orientation.py b/docling/utils/orientation.py
new file mode 100644
index 00000000..da076378
--- /dev/null
+++ b/docling/utils/orientation.py
@@ -0,0 +1,17 @@
+from collections import Counter
+from operator import itemgetter
+
+from docling_core.types.doc.page import TextCell
+
+_ORIENTATIONS = [0, 90, 180, 270]
+
+
+def _clipped_orientation(angle: float) -> int:
+ return min((abs(angle - o) % 360, o) for o in _ORIENTATIONS)[1]
+
+
+def detect_orientation(cells: list[TextCell]) -> int:
+ if not cells:
+ return 0
+ orientation_counter = Counter(_clipped_orientation(c.rect.angle_360) for c in cells)
+ return max(orientation_counter.items(), key=itemgetter(1))[0]
diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.doctags.txt b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.doctags.txt
index 029be08d..50f50834 100644
--- a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.doctags.txt
+++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.doctags.txt
@@ -1,4 +1,4 @@
-package
+package
Docling bundles PDF document conversion to JSON and Markdown in an easy self contained
\ No newline at end of file
diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.json b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.json
index 982320c3..2babec15 100644
--- a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.json
+++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.json
@@ -1 +1 @@
-{"_name": "", "type": "pdf-document", "description": {"title": null, "abstract": null, "authors": null, "affiliations": null, "subjects": null, "keywords": null, "publication_date": null, "languages": null, "license": null, "publishers": null, "url_refs": null, "references": null, "publication": null, "reference_count": null, "citation_count": null, "citation_date": null, "advanced": null, "analytics": null, "logs": [], "collection": null, "acquisition": null}, "file-info": {"filename": "ocr_test_rotated_180.pdf", "filename-prov": null, "document-hash": "a9cbfe0f2a71171face9ee31d2347ca4195649670ad75680520d67d4a863f982", "#-pages": 1, "collection-name": null, "description": null, "page-hashes": [{"hash": "baca27070f05dd84cf0903ded39bcf0fc1fa6ef0ac390e79cf8ba90c8c33ba49", "model": "default", "page": 1}]}, "main-text": [{"prov": [{"bbox": [441.304584329099, 132.09610360960653, 521.9863114205704, 151.67751306395223], "page": 1, "span": [0, 7], "__ref_s3_data": null}], "text": "package", "type": "paragraph", "payload": null, "name": "Text", "font": null}, {"prov": [{"bbox": [89.12133215549848, 77.02339849621205, 523.3501733013318, 124.86176457554109], "page": 1, "span": [0, 86], "__ref_s3_data": null}], "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained", "type": "paragraph", "payload": null, "name": "Text", "font": null}], "figures": [], "tables": [], "bitmaps": null, "equations": [], "footnotes": [], "page-dimensions": [{"height": 841.9216918945312, "page": 1, "width": 595.201171875}], "page-footers": [], "page-headers": [], "_s3_data": null, "identifiers": null}
\ No newline at end of file
+{"_name": "", "type": "pdf-document", "description": {"title": null, "abstract": null, "authors": null, "affiliations": null, "subjects": null, "keywords": null, "publication_date": null, "languages": null, "license": null, "publishers": null, "url_refs": null, "references": null, "publication": null, "reference_count": null, "citation_count": null, "citation_date": null, "advanced": null, "analytics": null, "logs": [], "collection": null, "acquisition": null}, "file-info": {"filename": "ocr_test_rotated_180.pdf", "filename-prov": null, "document-hash": "a9cbfe0f2a71171face9ee31d2347ca4195649670ad75680520d67d4a863f982", "#-pages": 1, "collection-name": null, "description": null, "page-hashes": [{"hash": "baca27070f05dd84cf0903ded39bcf0fc1fa6ef0ac390e79cf8ba90c8c33ba49", "model": "default", "page": 1}]}, "main-text": [{"prov": [{"bbox": [444.6666666666667, 131.58835856119788, 521.6666666666666, 150.25502522786462], "page": 1, "span": [0, 7], "__ref_s3_data": null}], "text": "package", "type": "paragraph", "payload": null, "name": "Text", "font": null}, {"prov": [{"bbox": [92.0, 77.92169189453125, 523.0, 123.25502522786462], "page": 1, "span": [0, 86], "__ref_s3_data": null}], "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained", "type": "paragraph", "payload": null, "name": "Text", "font": null}], "figures": [], "tables": [], "bitmaps": null, "equations": [], "footnotes": [], "page-dimensions": [{"height": 841.9216918945312, "page": 1, "width": 595.201171875}], "page-footers": [], "page-headers": [], "_s3_data": null, "identifiers": null}
\ No newline at end of file
diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.pages.json b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.pages.json
index a57c3401..8baebf53 100644
--- a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.pages.json
+++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.pages.json
@@ -1 +1 @@
-[{"page_no": 0, "size": {"width": 595.201171875, "height": 841.9216918945312}, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 90.46133071208328, "r_y0": 764.8982933983192, "r_x1": 520.7638616365624, "r_y1": 764.8982933983192, "r_x2": 520.7638616365624, "r_y2": 744.0929853742306, "r_x3": 90.46133071208328, "r_y3": 744.0929853742306, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 89.12133215549848, "r_y0": 741.5247710689902, "r_x1": 523.3501733013318, "r_y1": 741.5247710689902, "r_x2": 523.3501733013318, "r_y2": 717.0599273189902, "r_x3": 89.12133215549848, "r_y3": 717.0599273189902, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 441.304584329099, "r_y0": 709.8255882849247, "r_x1": 521.9863114205704, "r_y1": 709.8255882849247, "r_x2": 521.9863114205704, "r_y2": 690.244178830579, "r_x3": 441.304584329099, "r_y3": 690.244178830579, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "parsed_page": null, "predictions": {"layout": {"clusters": [{"id": 0, "label": "text", "bbox": {"l": 89.12133215549848, "t": 717.0599273189902, "r": 523.3501733013318, "b": 764.8982933983192, "coord_origin": "TOPLEFT"}, "confidence": 0.7318570613861084, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 90.46133071208328, "r_y0": 764.8982933983192, "r_x1": 520.7638616365624, "r_y1": 764.8982933983192, "r_x2": 520.7638616365624, "r_y2": 744.0929853742306, "r_x3": 90.46133071208328, "r_y3": 744.0929853742306, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 89.12133215549848, "r_y0": 741.5247710689902, "r_x1": 523.3501733013318, "r_y1": 741.5247710689902, "r_x2": 523.3501733013318, "r_y2": 717.0599273189902, "r_x3": 89.12133215549848, "r_y3": 717.0599273189902, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}, {"id": 2, "label": "text", "bbox": {"l": 441.304584329099, "t": 690.244178830579, "r": 521.9863114205704, "b": 709.8255882849247, "coord_origin": "TOPLEFT"}, "confidence": 0.5982133150100708, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 441.304584329099, "r_y0": 709.8255882849247, "r_x1": 521.9863114205704, "r_y1": 709.8255882849247, "r_x2": 521.9863114205704, "r_y2": 690.244178830579, "r_x3": 441.304584329099, "r_y3": 690.244178830579, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}]}, "tablestructure": {"table_map": {}}, "figures_classification": null, "equations_prediction": null, "vlm_response": null}, "assembled": {"elements": [{"label": "text", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "text", "bbox": {"l": 89.12133215549848, "t": 717.0599273189902, "r": 523.3501733013318, "b": 764.8982933983192, "coord_origin": "TOPLEFT"}, "confidence": 0.7318570613861084, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 90.46133071208328, "r_y0": 764.8982933983192, "r_x1": 520.7638616365624, "r_y1": 764.8982933983192, "r_x2": 520.7638616365624, "r_y2": 744.0929853742306, "r_x3": 90.46133071208328, "r_y3": 744.0929853742306, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 89.12133215549848, "r_y0": 741.5247710689902, "r_x1": 523.3501733013318, "r_y1": 741.5247710689902, "r_x2": 523.3501733013318, "r_y2": 717.0599273189902, "r_x3": 89.12133215549848, "r_y3": 717.0599273189902, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"}, {"label": "text", "id": 2, "page_no": 0, "cluster": {"id": 2, "label": "text", "bbox": {"l": 441.304584329099, "t": 690.244178830579, "r": 521.9863114205704, "b": 709.8255882849247, "coord_origin": "TOPLEFT"}, "confidence": 0.5982133150100708, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 441.304584329099, "r_y0": 709.8255882849247, "r_x1": 521.9863114205704, "r_y1": 709.8255882849247, "r_x2": 521.9863114205704, "r_y2": 690.244178830579, "r_x3": 441.304584329099, "r_y3": 690.244178830579, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}, "text": "package"}], "body": [{"label": "text", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "text", "bbox": {"l": 89.12133215549848, "t": 717.0599273189902, "r": 523.3501733013318, "b": 764.8982933983192, "coord_origin": "TOPLEFT"}, "confidence": 0.7318570613861084, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 90.46133071208328, "r_y0": 764.8982933983192, "r_x1": 520.7638616365624, "r_y1": 764.8982933983192, "r_x2": 520.7638616365624, "r_y2": 744.0929853742306, "r_x3": 90.46133071208328, "r_y3": 744.0929853742306, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 89.12133215549848, "r_y0": 741.5247710689902, "r_x1": 523.3501733013318, "r_y1": 741.5247710689902, "r_x2": 523.3501733013318, "r_y2": 717.0599273189902, "r_x3": 89.12133215549848, "r_y3": 717.0599273189902, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"}, {"label": "text", "id": 2, "page_no": 0, "cluster": {"id": 2, "label": "text", "bbox": {"l": 441.304584329099, "t": 690.244178830579, "r": 521.9863114205704, "b": 709.8255882849247, "coord_origin": "TOPLEFT"}, "confidence": 0.5982133150100708, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 441.304584329099, "r_y0": 709.8255882849247, "r_x1": 521.9863114205704, "r_y1": 709.8255882849247, "r_x2": 521.9863114205704, "r_y2": 690.244178830579, "r_x3": 441.304584329099, "r_y3": 690.244178830579, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}, "text": "package"}], "headers": []}}]
\ No newline at end of file
+[{"page_no": 0, "size": {"width": 595.201171875, "height": 841.9216918945312}, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 521.3333333333334, "r_y0": 745.3333333333334, "r_x1": 92.0, "r_y1": 745.3333333333334, "r_x2": 92.0, "r_y2": 764.0, "r_x3": 521.3333333333334, "r_y3": 764.0, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 94.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 523.0, "r_y0": 718.6666666666666, "r_x1": 92.0, "r_y1": 718.6666666666666, "r_x2": 92.0, "r_y2": 737.3333333333334, "r_x3": 523.0, "r_y3": 737.3333333333334, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 92.0, "from_ocr": true}, {"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 521.6666666666666, "r_y0": 691.6666666666666, "r_x1": 444.6666666666667, "r_y1": 691.6666666666666, "r_x2": 444.6666666666667, "r_y2": 710.3333333333334, "r_x3": 521.6666666666666, "r_y3": 710.3333333333334, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 90.0, "from_ocr": true}], "parsed_page": null, "predictions": {"layout": {"clusters": [{"id": 0, "label": "text", "bbox": {"l": 92.0, "t": 718.6666666666666, "r": 523.0, "b": 764.0, "coord_origin": "TOPLEFT"}, "confidence": 0.7318570613861084, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 521.3333333333334, "r_y0": 745.3333333333334, "r_x1": 92.0, "r_y1": 745.3333333333334, "r_x2": 92.0, "r_y2": 764.0, "r_x3": 521.3333333333334, "r_y3": 764.0, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 94.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 523.0, "r_y0": 718.6666666666666, "r_x1": 92.0, "r_y1": 718.6666666666666, "r_x2": 92.0, "r_y2": 737.3333333333334, "r_x3": 523.0, "r_y3": 737.3333333333334, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 92.0, "from_ocr": true}], "children": []}, {"id": 2, "label": "text", "bbox": {"l": 444.6666666666667, "t": 691.6666666666666, "r": 521.6666666666666, "b": 710.3333333333334, "coord_origin": "TOPLEFT"}, "confidence": 0.5982133150100708, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 521.6666666666666, "r_y0": 691.6666666666666, "r_x1": 444.6666666666667, "r_y1": 691.6666666666666, "r_x2": 444.6666666666667, "r_y2": 710.3333333333334, "r_x3": 521.6666666666666, "r_y3": 710.3333333333334, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 90.0, "from_ocr": true}], "children": []}]}, "tablestructure": {"table_map": {}}, "figures_classification": null, "equations_prediction": null, "vlm_response": null}, "assembled": {"elements": [{"label": "text", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "text", "bbox": {"l": 92.0, "t": 718.6666666666666, "r": 523.0, "b": 764.0, "coord_origin": "TOPLEFT"}, "confidence": 0.7318570613861084, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 521.3333333333334, "r_y0": 745.3333333333334, "r_x1": 92.0, "r_y1": 745.3333333333334, "r_x2": 92.0, "r_y2": 764.0, "r_x3": 521.3333333333334, "r_y3": 764.0, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 94.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 523.0, "r_y0": 718.6666666666666, "r_x1": 92.0, "r_y1": 718.6666666666666, "r_x2": 92.0, "r_y2": 737.3333333333334, "r_x3": 523.0, "r_y3": 737.3333333333334, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 92.0, "from_ocr": true}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"}, {"label": "text", "id": 2, "page_no": 0, "cluster": {"id": 2, "label": "text", "bbox": {"l": 444.6666666666667, "t": 691.6666666666666, "r": 521.6666666666666, "b": 710.3333333333334, "coord_origin": "TOPLEFT"}, "confidence": 0.5982133150100708, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 521.6666666666666, "r_y0": 691.6666666666666, "r_x1": 444.6666666666667, "r_y1": 691.6666666666666, "r_x2": 444.6666666666667, "r_y2": 710.3333333333334, "r_x3": 521.6666666666666, "r_y3": 710.3333333333334, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 90.0, "from_ocr": true}], "children": []}, "text": "package"}], "body": [{"label": "text", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "text", "bbox": {"l": 92.0, "t": 718.6666666666666, "r": 523.0, "b": 764.0, "coord_origin": "TOPLEFT"}, "confidence": 0.7318570613861084, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 521.3333333333334, "r_y0": 745.3333333333334, "r_x1": 92.0, "r_y1": 745.3333333333334, "r_x2": 92.0, "r_y2": 764.0, "r_x3": 521.3333333333334, "r_y3": 764.0, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 94.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 523.0, "r_y0": 718.6666666666666, "r_x1": 92.0, "r_y1": 718.6666666666666, "r_x2": 92.0, "r_y2": 737.3333333333334, "r_x3": 523.0, "r_y3": 737.3333333333334, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 92.0, "from_ocr": true}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"}, {"label": "text", "id": 2, "page_no": 0, "cluster": {"id": 2, "label": "text", "bbox": {"l": 444.6666666666667, "t": 691.6666666666666, "r": 521.6666666666666, "b": 710.3333333333334, "coord_origin": "TOPLEFT"}, "confidence": 0.5982133150100708, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 521.6666666666666, "r_y0": 691.6666666666666, "r_x1": 444.6666666666667, "r_y1": 691.6666666666666, "r_x2": 444.6666666666667, "r_y2": 710.3333333333334, "r_x3": 521.6666666666666, "r_y3": 710.3333333333334, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 90.0, "from_ocr": true}], "children": []}, "text": "package"}], "headers": []}}]
\ No newline at end of file
diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.doctags.txt b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.doctags.txt
index d5c2972a..8350737b 100644
--- a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.doctags.txt
+++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.doctags.txt
@@ -1,3 +1,3 @@
-package
+package
\ No newline at end of file
diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.json b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.json
index 42e30bf7..6b843dca 100644
--- a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.json
+++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.json
@@ -1 +1 @@
-{"_name": "", "type": "pdf-document", "description": {"title": null, "abstract": null, "authors": null, "affiliations": null, "subjects": null, "keywords": null, "publication_date": null, "languages": null, "license": null, "publishers": null, "url_refs": null, "references": null, "publication": null, "reference_count": null, "citation_count": null, "citation_date": null, "advanced": null, "analytics": null, "logs": [], "collection": null, "acquisition": null}, "file-info": {"filename": "ocr_test_rotated_270.pdf", "filename-prov": null, "document-hash": "52f54e7183bdb73aa3713c7b169baca93e276963a138418c26e7d6a1ea128f14", "#-pages": 1, "collection-name": null, "description": null, "page-hashes": [{"hash": "59bc9ddba89e7b008185dd16d384493beb034686e5670546786390c5d237a304", "model": "default", "page": 1}]}, "main-text": [{"prov": [{"bbox": [691.4680194659409, 442.3948768148814, 709.8255850278712, 523.0765988200898], "page": 1, "span": [0, 7], "__ref_s3_data": null}], "text": "package", "type": "paragraph", "payload": null, "name": "Text", "font": null}], "figures": [], "tables": [], "bitmaps": null, "equations": [], "footnotes": [], "page-dimensions": [{"height": 595.201171875, "page": 1, "width": 841.9216918945312}], "page-footers": [], "page-headers": [], "_s3_data": null, "identifiers": null}
\ No newline at end of file
+{"_name": "", "type": "pdf-document", "description": {"title": null, "abstract": null, "authors": null, "affiliations": null, "subjects": null, "keywords": null, "publication_date": null, "languages": null, "license": null, "publishers": null, "url_refs": null, "references": null, "publication": null, "reference_count": null, "citation_count": null, "citation_date": null, "advanced": null, "analytics": null, "logs": [], "collection": null, "acquisition": null}, "file-info": {"filename": "ocr_test_rotated_270.pdf", "filename-prov": null, "document-hash": "52f54e7183bdb73aa3713c7b169baca93e276963a138418c26e7d6a1ea128f14", "#-pages": 1, "collection-name": null, "description": null, "page-hashes": [{"hash": "59bc9ddba89e7b008185dd16d384493beb034686e5670546786390c5d237a304", "model": "default", "page": 1}]}, "main-text": [{"prov": [{"bbox": [691.6666666666666, 444.53450520833337, 710.3333333333334, 521.5345052083334], "page": 1, "span": [0, 7], "__ref_s3_data": null}], "text": "package", "type": "paragraph", "payload": null, "name": "Text", "font": null}], "figures": [], "tables": [], "bitmaps": null, "equations": [], "footnotes": [], "page-dimensions": [{"height": 595.201171875, "page": 1, "width": 841.9216918945312}], "page-footers": [], "page-headers": [], "_s3_data": null, "identifiers": null}
\ No newline at end of file
diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.pages.json b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.pages.json
index 5f76e79a..c4416b3b 100644
--- a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.pages.json
+++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.pages.json
@@ -1 +1 @@
-[{"page_no": 0, "size": {"width": 841.9216918945312, "height": 595.201171875}, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 744.0930045534915, "r_y0": 504.87200373583954, "r_x1": 764.8982839673505, "r_y1": 504.87200373583954, "r_x2": 764.8982839673505, "r_y2": 73.34702001188118, "r_x3": 744.0930045534915, "r_y3": 73.34702001188118, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 717.1685859527342, "r_y0": 504.8720063438988, "r_x1": 737.9738558298501, "r_y1": 504.8720063438988, "r_x2": 737.9738558298501, "r_y2": 70.90211702098213, "r_x3": 717.1685859527342, "r_y3": 70.90211702098213, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 691.4680194659409, "r_y0": 152.80629506011857, "r_x1": 709.8255850278712, "r_y1": 152.80629506011857, "r_x2": 709.8255850278712, "r_y2": 72.12457305491027, "r_x3": 691.4680194659409, "r_y3": 72.12457305491027, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "parsed_page": null, "predictions": {"layout": {"clusters": [{"id": 0, "label": "page_header", "bbox": {"l": 717.1685859527342, "t": 70.90211702098213, "r": 764.8982839673505, "b": 504.8720063438988, "coord_origin": "TOPLEFT"}, "confidence": 0.6915205121040344, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 744.0930045534915, "r_y0": 504.87200373583954, "r_x1": 764.8982839673505, "r_y1": 504.87200373583954, "r_x2": 764.8982839673505, "r_y2": 73.34702001188118, "r_x3": 744.0930045534915, "r_y3": 73.34702001188118, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 717.1685859527342, "r_y0": 504.8720063438988, "r_x1": 737.9738558298501, "r_y1": 504.8720063438988, "r_x2": 737.9738558298501, "r_y2": 70.90211702098213, "r_x3": 717.1685859527342, "r_y3": 70.90211702098213, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}, {"id": 8, "label": "text", "bbox": {"l": 691.4680194659409, "t": 72.12457305491027, "r": 709.8255850278712, "b": 152.80629506011857, "coord_origin": "TOPLEFT"}, "confidence": 1.0, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 691.4680194659409, "r_y0": 152.80629506011857, "r_x1": 709.8255850278712, "r_y1": 152.80629506011857, "r_x2": 709.8255850278712, "r_y2": 72.12457305491027, "r_x3": 691.4680194659409, "r_y3": 72.12457305491027, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}]}, "tablestructure": {"table_map": {}}, "figures_classification": null, "equations_prediction": null, "vlm_response": null}, "assembled": {"elements": [{"label": "page_header", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "page_header", "bbox": {"l": 717.1685859527342, "t": 70.90211702098213, "r": 764.8982839673505, "b": 504.8720063438988, "coord_origin": "TOPLEFT"}, "confidence": 0.6915205121040344, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 744.0930045534915, "r_y0": 504.87200373583954, "r_x1": 764.8982839673505, "r_y1": 504.87200373583954, "r_x2": 764.8982839673505, "r_y2": 73.34702001188118, "r_x3": 744.0930045534915, "r_y3": 73.34702001188118, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 717.1685859527342, "r_y0": 504.8720063438988, "r_x1": 737.9738558298501, "r_y1": 504.8720063438988, "r_x2": 737.9738558298501, "r_y2": 70.90211702098213, "r_x3": 717.1685859527342, "r_y3": 70.90211702098213, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"}, {"label": "text", "id": 8, "page_no": 0, "cluster": {"id": 8, "label": "text", "bbox": {"l": 691.4680194659409, "t": 72.12457305491027, "r": 709.8255850278712, "b": 152.80629506011857, "coord_origin": "TOPLEFT"}, "confidence": 1.0, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 691.4680194659409, "r_y0": 152.80629506011857, "r_x1": 709.8255850278712, "r_y1": 152.80629506011857, "r_x2": 709.8255850278712, "r_y2": 72.12457305491027, "r_x3": 691.4680194659409, "r_y3": 72.12457305491027, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}, "text": "package"}], "body": [{"label": "text", "id": 8, "page_no": 0, "cluster": {"id": 8, "label": "text", "bbox": {"l": 691.4680194659409, "t": 72.12457305491027, "r": 709.8255850278712, "b": 152.80629506011857, "coord_origin": "TOPLEFT"}, "confidence": 1.0, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 691.4680194659409, "r_y0": 152.80629506011857, "r_x1": 709.8255850278712, "r_y1": 152.80629506011857, "r_x2": 709.8255850278712, "r_y2": 72.12457305491027, "r_x3": 691.4680194659409, "r_y3": 72.12457305491027, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}, "text": "package"}], "headers": [{"label": "page_header", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "page_header", "bbox": {"l": 717.1685859527342, "t": 70.90211702098213, "r": 764.8982839673505, "b": 504.8720063438988, "coord_origin": "TOPLEFT"}, "confidence": 0.6915205121040344, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 744.0930045534915, "r_y0": 504.87200373583954, "r_x1": 764.8982839673505, "r_y1": 504.87200373583954, "r_x2": 764.8982839673505, "r_y2": 73.34702001188118, "r_x3": 744.0930045534915, "r_y3": 73.34702001188118, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 717.1685859527342, "r_y0": 504.8720063438988, "r_x1": 737.9738558298501, "r_y1": 504.8720063438988, "r_x2": 737.9738558298501, "r_y2": 70.90211702098213, "r_x3": 717.1685859527342, "r_y3": 70.90211702098213, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"}]}}]
\ No newline at end of file
+[{"page_no": 0, "size": {"width": 841.9216918945312, "height": 595.201171875}, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 745.3333333333334, "r_y0": 74.0, "r_x1": 745.3333333333334, "r_y1": 503.3333333333333, "r_x2": 764.0, "r_y2": 503.3333333333333, "r_x3": 745.3333333333334, "r_y3": 503.3333333333333, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 94.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 718.6666666666666, "r_y0": 72.33333333333333, "r_x1": 718.6666666666666, "r_y1": 503.3333333333333, "r_x2": 737.3333333333334, "r_y2": 503.3333333333333, "r_x3": 718.6666666666666, "r_y3": 503.3333333333333, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 92.0, "from_ocr": true}, {"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 691.6666666666666, "r_y0": 73.66666666666667, "r_x1": 691.6666666666666, "r_y1": 150.66666666666666, "r_x2": 710.3333333333334, "r_y2": 150.66666666666666, "r_x3": 691.6666666666666, "r_y3": 150.66666666666666, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 89.0, "from_ocr": true}], "parsed_page": null, "predictions": {"layout": {"clusters": [{"id": 0, "label": "page_header", "bbox": {"l": 718.6666666666666, "t": 72.33333333333333, "r": 764.0, "b": 503.3333333333333, "coord_origin": "TOPLEFT"}, "confidence": 0.6915205121040344, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 745.3333333333334, "r_y0": 74.0, "r_x1": 745.3333333333334, "r_y1": 503.3333333333333, "r_x2": 764.0, "r_y2": 503.3333333333333, "r_x3": 745.3333333333334, "r_y3": 503.3333333333333, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 94.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 718.6666666666666, "r_y0": 72.33333333333333, "r_x1": 718.6666666666666, "r_y1": 503.3333333333333, "r_x2": 737.3333333333334, "r_y2": 503.3333333333333, "r_x3": 718.6666666666666, "r_y3": 503.3333333333333, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 92.0, "from_ocr": true}], "children": []}, {"id": 8, "label": "text", "bbox": {"l": 691.6666666666666, "t": 73.66666666666667, "r": 710.3333333333334, "b": 150.66666666666666, "coord_origin": "TOPLEFT"}, "confidence": 89.0, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 691.6666666666666, "r_y0": 73.66666666666667, "r_x1": 691.6666666666666, "r_y1": 150.66666666666666, "r_x2": 710.3333333333334, "r_y2": 150.66666666666666, "r_x3": 691.6666666666666, "r_y3": 150.66666666666666, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 89.0, "from_ocr": true}], "children": []}]}, "tablestructure": {"table_map": {}}, "figures_classification": null, "equations_prediction": null, "vlm_response": null}, "assembled": {"elements": [{"label": "page_header", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "page_header", "bbox": {"l": 718.6666666666666, "t": 72.33333333333333, "r": 764.0, "b": 503.3333333333333, "coord_origin": "TOPLEFT"}, "confidence": 0.6915205121040344, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 745.3333333333334, "r_y0": 74.0, "r_x1": 745.3333333333334, "r_y1": 503.3333333333333, "r_x2": 764.0, "r_y2": 503.3333333333333, "r_x3": 745.3333333333334, "r_y3": 503.3333333333333, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 94.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 718.6666666666666, "r_y0": 72.33333333333333, "r_x1": 718.6666666666666, "r_y1": 503.3333333333333, "r_x2": 737.3333333333334, "r_y2": 503.3333333333333, "r_x3": 718.6666666666666, "r_y3": 503.3333333333333, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 92.0, "from_ocr": true}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"}, {"label": "text", "id": 8, "page_no": 0, "cluster": {"id": 8, "label": "text", "bbox": {"l": 691.6666666666666, "t": 73.66666666666667, "r": 710.3333333333334, "b": 150.66666666666666, "coord_origin": "TOPLEFT"}, "confidence": 89.0, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 691.6666666666666, "r_y0": 73.66666666666667, "r_x1": 691.6666666666666, "r_y1": 150.66666666666666, "r_x2": 710.3333333333334, "r_y2": 150.66666666666666, "r_x3": 691.6666666666666, "r_y3": 150.66666666666666, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 89.0, "from_ocr": true}], "children": []}, "text": "package"}], "body": [{"label": "text", "id": 8, "page_no": 0, "cluster": {"id": 8, "label": "text", "bbox": {"l": 691.6666666666666, "t": 73.66666666666667, "r": 710.3333333333334, "b": 150.66666666666666, "coord_origin": "TOPLEFT"}, "confidence": 89.0, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 691.6666666666666, "r_y0": 73.66666666666667, "r_x1": 691.6666666666666, "r_y1": 150.66666666666666, "r_x2": 710.3333333333334, "r_y2": 150.66666666666666, "r_x3": 691.6666666666666, "r_y3": 150.66666666666666, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 89.0, "from_ocr": true}], "children": []}, "text": "package"}], "headers": [{"label": "page_header", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "page_header", "bbox": {"l": 718.6666666666666, "t": 72.33333333333333, "r": 764.0, "b": 503.3333333333333, "coord_origin": "TOPLEFT"}, "confidence": 0.6915205121040344, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 745.3333333333334, "r_y0": 74.0, "r_x1": 745.3333333333334, "r_y1": 503.3333333333333, "r_x2": 764.0, "r_y2": 503.3333333333333, "r_x3": 745.3333333333334, "r_y3": 503.3333333333333, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 94.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 718.6666666666666, "r_y0": 72.33333333333333, "r_x1": 718.6666666666666, "r_y1": 503.3333333333333, "r_x2": 737.3333333333334, "r_y2": 503.3333333333333, "r_x3": 718.6666666666666, "r_y3": 503.3333333333333, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 92.0, "from_ocr": true}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"}]}}]
\ No newline at end of file
diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.doctags.txt b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.doctags.txt
index 0b7a3a14..c1068b56 100644
--- a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.doctags.txt
+++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.doctags.txt
@@ -1,3 +1,4 @@
-package
+Docling bundles PDF document conversion to
+JSON and Markdown in an easy self contained package
\ No newline at end of file
diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.json b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.json
index cd086df8..b8076e9e 100644
--- a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.json
+++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.json
@@ -1 +1 @@
-{"_name": "", "type": "pdf-document", "description": {"title": null, "abstract": null, "authors": null, "affiliations": null, "subjects": null, "keywords": null, "publication_date": null, "languages": null, "license": null, "publishers": null, "url_refs": null, "references": null, "publication": null, "reference_count": null, "citation_count": null, "citation_date": null, "advanced": null, "analytics": null, "logs": [], "collection": null, "acquisition": null}, "file-info": {"filename": "ocr_test_rotated_90.pdf", "filename-prov": null, "document-hash": "4a282813d93824eaa9bc2a0b2a0d6d626ecc8f5f380bd1320e2dd3e8e53c2ba6", "#-pages": 1, "collection-name": null, "description": null, "page-hashes": [{"hash": "f8a4dc72d8b159f69d0bc968b97f3fb9e0ac59dcb3113492432755835935d9b3", "model": "default", "page": 1}]}, "main-text": [{"prov": [{"bbox": [131.21306574279092, 74.12495603322407, 152.19606490864376, 154.19400205373182], "page": 1, "span": [0, 7], "__ref_s3_data": null}], "text": "package", "type": "paragraph", "payload": null, "name": "Text", "font": null}], "figures": [], "tables": [], "bitmaps": null, "equations": [], "footnotes": [], "page-dimensions": [{"height": 595.201171875, "page": 1, "width": 841.9216918945312}], "page-footers": [], "page-headers": [], "_s3_data": null, "identifiers": null}
\ No newline at end of file
+{"_name": "", "type": "pdf-document", "description": {"title": null, "abstract": null, "authors": null, "affiliations": null, "subjects": null, "keywords": null, "publication_date": null, "languages": null, "license": null, "publishers": null, "url_refs": null, "references": null, "publication": null, "reference_count": null, "citation_count": null, "citation_date": null, "advanced": null, "analytics": null, "logs": [], "collection": null, "acquisition": null}, "file-info": {"filename": "ocr_test_rotated_90.pdf", "filename-prov": null, "document-hash": "4a282813d93824eaa9bc2a0b2a0d6d626ecc8f5f380bd1320e2dd3e8e53c2ba6", "#-pages": 1, "collection-name": null, "description": null, "page-hashes": [{"hash": "f8a4dc72d8b159f69d0bc968b97f3fb9e0ac59dcb3113492432755835935d9b3", "model": "default", "page": 1}]}, "main-text": [{"prov": [{"bbox": [78.0, 73.86783854166663, 96.66666666666667, 503.201171875], "page": 1, "span": [0, 42], "__ref_s3_data": null}], "text": "Docling bundles PDF document conversion to", "type": "paragraph", "payload": null, "name": "Text", "font": null}, {"prov": [{"bbox": [104.66666666666667, 72.201171875, 123.33333333333333, 503.201171875], "page": 1, "span": [0, 51], "__ref_s3_data": null}, {"bbox": [104.66666666666667, 72.201171875, 123.33333333333333, 503.201171875], "page": 1, "span": [0, 51], "__ref_s3_data": null}], "text": "JSON and Markdown in an easy self contained package", "type": "paragraph", "payload": null, "name": "Text", "font": null}], "figures": [], "tables": [], "bitmaps": null, "equations": [], "footnotes": [], "page-dimensions": [{"height": 595.201171875, "page": 1, "width": 841.9216918945312}], "page-footers": [], "page-headers": [], "_s3_data": null, "identifiers": null}
\ No newline at end of file
diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.md b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.md
index 597acc76..8d77a437 100644
--- a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.md
+++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.md
@@ -1 +1,3 @@
-package
\ No newline at end of file
+Docling bundles PDF document conversion to
+
+JSON and Markdown in an easy self contained package
\ No newline at end of file
diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.pages.json b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.pages.json
index 89e716e1..71c1e2bd 100644
--- a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.pages.json
+++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.pages.json
@@ -1 +1 @@
-[{"page_no": 0, "size": {"width": 841.9216918945312, "height": 595.201171875}, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 77.10171546422428, "r_y0": 520.7638577050515, "r_x1": 96.6831586150625, "r_y1": 520.7638577050515, "r_x2": 96.6831586150625, "r_y2": 89.23887398109309, "r_x3": 77.10171546422428, "r_y3": 89.23887398109309, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 100.55299576256091, "r_y0": 523.3155494272656, "r_x1": 124.91101654503161, "r_y1": 523.3155494272656, "r_x2": 124.91101654503161, "r_y2": 89.12381765643227, "r_x3": 100.55299576256091, "r_y3": 89.12381765643227, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 131.21306574279092, "r_y0": 521.0762158417759, "r_x1": 152.19606490864376, "r_y1": 521.0762158417759, "r_x2": 152.19606490864376, "r_y2": 441.0071698212682, "r_x3": 131.21306574279092, "r_y3": 441.0071698212682, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "parsed_page": null, "predictions": {"layout": {"clusters": [{"id": 0, "label": "page_header", "bbox": {"l": 77.10171546422428, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}, "confidence": 0.6016772389411926, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 77.10171546422428, "r_y0": 520.7638577050515, "r_x1": 96.6831586150625, "r_y1": 520.7638577050515, "r_x2": 96.6831586150625, "r_y2": 89.23887398109309, "r_x3": 77.10171546422428, "r_y3": 89.23887398109309, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 100.55299576256091, "r_y0": 523.3155494272656, "r_x1": 124.91101654503161, "r_y1": 523.3155494272656, "r_x2": 124.91101654503161, "r_y2": 89.12381765643227, "r_x3": 100.55299576256091, "r_y3": 89.12381765643227, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}, {"id": 1, "label": "text", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}, "confidence": 0.5234212875366211, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 131.21306574279092, "r_y0": 521.0762158417759, "r_x1": 152.19606490864376, "r_y1": 521.0762158417759, "r_x2": 152.19606490864376, "r_y2": 441.0071698212682, "r_x3": 131.21306574279092, "r_y3": 441.0071698212682, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}]}, "tablestructure": {"table_map": {}}, "figures_classification": null, "equations_prediction": null, "vlm_response": null}, "assembled": {"elements": [{"label": "page_header", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "page_header", "bbox": {"l": 77.10171546422428, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}, "confidence": 0.6016772389411926, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 77.10171546422428, "r_y0": 520.7638577050515, "r_x1": 96.6831586150625, "r_y1": 520.7638577050515, "r_x2": 96.6831586150625, "r_y2": 89.23887398109309, "r_x3": 77.10171546422428, "r_y3": 89.23887398109309, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 100.55299576256091, "r_y0": 523.3155494272656, "r_x1": 124.91101654503161, "r_y1": 523.3155494272656, "r_x2": 124.91101654503161, "r_y2": 89.12381765643227, "r_x3": 100.55299576256091, "r_y3": 89.12381765643227, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"}, {"label": "text", "id": 1, "page_no": 0, "cluster": {"id": 1, "label": "text", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}, "confidence": 0.5234212875366211, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 131.21306574279092, "r_y0": 521.0762158417759, "r_x1": 152.19606490864376, "r_y1": 521.0762158417759, "r_x2": 152.19606490864376, "r_y2": 441.0071698212682, "r_x3": 131.21306574279092, "r_y3": 441.0071698212682, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}, "text": "package"}], "body": [{"label": "text", "id": 1, "page_no": 0, "cluster": {"id": 1, "label": "text", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}, "confidence": 0.5234212875366211, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 131.21306574279092, "r_y0": 521.0762158417759, "r_x1": 152.19606490864376, "r_y1": 521.0762158417759, "r_x2": 152.19606490864376, "r_y2": 441.0071698212682, "r_x3": 131.21306574279092, "r_y3": 441.0071698212682, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}, "text": "package"}], "headers": [{"label": "page_header", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "page_header", "bbox": {"l": 77.10171546422428, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}, "confidence": 0.6016772389411926, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 77.10171546422428, "r_y0": 520.7638577050515, "r_x1": 96.6831586150625, "r_y1": 520.7638577050515, "r_x2": 96.6831586150625, "r_y2": 89.23887398109309, "r_x3": 77.10171546422428, "r_y3": 89.23887398109309, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 100.55299576256091, "r_y0": 523.3155494272656, "r_x1": 124.91101654503161, "r_y1": 523.3155494272656, "r_x2": 124.91101654503161, "r_y2": 89.12381765643227, "r_x3": 100.55299576256091, "r_y3": 89.12381765643227, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"}]}}]
\ No newline at end of file
+[{"page_no": 0, "size": {"width": 841.9216918945312, "height": 595.201171875}, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 96.66666666666667, "r_y0": 521.3333333333334, "r_x1": 96.66666666666667, "r_y1": 92.0, "r_x2": 78.0, "r_y2": 92.0, "r_x3": 78.0, "r_y3": 521.3333333333334, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 94.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 123.33333333333333, "r_y0": 523.0, "r_x1": 123.33333333333333, "r_y1": 92.0, "r_x2": 104.66666666666667, "r_y2": 92.0, "r_x3": 104.66666666666667, "r_y3": 523.0, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 92.0, "from_ocr": true}, {"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 150.33333333333334, "r_y0": 521.6666666666666, "r_x1": 150.33333333333334, "r_y1": 444.6666666666667, "r_x2": 131.66666666666666, "r_y2": 444.6666666666667, "r_x3": 131.66666666666666, "r_y3": 521.6666666666666, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 89.0, "from_ocr": true}], "parsed_page": null, "predictions": {"layout": {"clusters": [{"id": 1, "label": "text", "bbox": {"l": 78.0, "t": 92.0, "r": 96.66666666666667, "b": 521.3333333333334, "coord_origin": "TOPLEFT"}, "confidence": 94.0, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 96.66666666666667, "r_y0": 521.3333333333334, "r_x1": 96.66666666666667, "r_y1": 92.0, "r_x2": 78.0, "r_y2": 92.0, "r_x3": 78.0, "r_y3": 521.3333333333334, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 94.0, "from_ocr": true}], "children": []}, {"id": 2, "label": "text", "bbox": {"l": 104.66666666666667, "t": 92.0, "r": 123.33333333333333, "b": 523.0, "coord_origin": "TOPLEFT"}, "confidence": 92.0, "cells": [{"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 123.33333333333333, "r_y0": 523.0, "r_x1": 123.33333333333333, "r_y1": 92.0, "r_x2": 104.66666666666667, "r_y2": 92.0, "r_x3": 104.66666666666667, "r_y3": 523.0, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 92.0, "from_ocr": true}], "children": []}, {"id": 3, "label": "text", "bbox": {"l": 131.66666666666666, "t": 444.6666666666667, "r": 150.33333333333334, "b": 521.6666666666666, "coord_origin": "TOPLEFT"}, "confidence": 89.0, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 150.33333333333334, "r_y0": 521.6666666666666, "r_x1": 150.33333333333334, "r_y1": 444.6666666666667, "r_x2": 131.66666666666666, "r_y2": 444.6666666666667, "r_x3": 131.66666666666666, "r_y3": 521.6666666666666, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 89.0, "from_ocr": true}], "children": []}]}, "tablestructure": {"table_map": {}}, "figures_classification": null, "equations_prediction": null, "vlm_response": null}, "assembled": {"elements": [{"label": "text", "id": 1, "page_no": 0, "cluster": {"id": 1, "label": "text", "bbox": {"l": 78.0, "t": 92.0, "r": 96.66666666666667, "b": 521.3333333333334, "coord_origin": "TOPLEFT"}, "confidence": 94.0, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 96.66666666666667, "r_y0": 521.3333333333334, "r_x1": 96.66666666666667, "r_y1": 92.0, "r_x2": 78.0, "r_y2": 92.0, "r_x3": 78.0, "r_y3": 521.3333333333334, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 94.0, "from_ocr": true}], "children": []}, "text": "Docling bundles PDF document conversion to"}, {"label": "text", "id": 2, "page_no": 0, "cluster": {"id": 2, "label": "text", "bbox": {"l": 104.66666666666667, "t": 92.0, "r": 123.33333333333333, "b": 523.0, "coord_origin": "TOPLEFT"}, "confidence": 92.0, "cells": [{"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 123.33333333333333, "r_y0": 523.0, "r_x1": 123.33333333333333, "r_y1": 92.0, "r_x2": 104.66666666666667, "r_y2": 92.0, "r_x3": 104.66666666666667, "r_y3": 523.0, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 92.0, "from_ocr": true}], "children": []}, "text": "JSON and Markdown in an easy self contained"}, {"label": "text", "id": 3, "page_no": 0, "cluster": {"id": 3, "label": "text", "bbox": {"l": 131.66666666666666, "t": 444.6666666666667, "r": 150.33333333333334, "b": 521.6666666666666, "coord_origin": "TOPLEFT"}, "confidence": 89.0, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 150.33333333333334, "r_y0": 521.6666666666666, "r_x1": 150.33333333333334, "r_y1": 444.6666666666667, "r_x2": 131.66666666666666, "r_y2": 444.6666666666667, "r_x3": 131.66666666666666, "r_y3": 521.6666666666666, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 89.0, "from_ocr": true}], "children": []}, "text": "package"}], "body": [{"label": "text", "id": 1, "page_no": 0, "cluster": {"id": 1, "label": "text", "bbox": {"l": 78.0, "t": 92.0, "r": 96.66666666666667, "b": 521.3333333333334, "coord_origin": "TOPLEFT"}, "confidence": 94.0, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 96.66666666666667, "r_y0": 521.3333333333334, "r_x1": 96.66666666666667, "r_y1": 92.0, "r_x2": 78.0, "r_y2": 92.0, "r_x3": 78.0, "r_y3": 521.3333333333334, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 94.0, "from_ocr": true}], "children": []}, "text": "Docling bundles PDF document conversion to"}, {"label": "text", "id": 2, "page_no": 0, "cluster": {"id": 2, "label": "text", "bbox": {"l": 104.66666666666667, "t": 92.0, "r": 123.33333333333333, "b": 523.0, "coord_origin": "TOPLEFT"}, "confidence": 92.0, "cells": [{"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 123.33333333333333, "r_y0": 523.0, "r_x1": 123.33333333333333, "r_y1": 92.0, "r_x2": 104.66666666666667, "r_y2": 92.0, "r_x3": 104.66666666666667, "r_y3": 523.0, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 92.0, "from_ocr": true}], "children": []}, "text": "JSON and Markdown in an easy self contained"}, {"label": "text", "id": 3, "page_no": 0, "cluster": {"id": 3, "label": "text", "bbox": {"l": 131.66666666666666, "t": 444.6666666666667, "r": 150.33333333333334, "b": 521.6666666666666, "coord_origin": "TOPLEFT"}, "confidence": 89.0, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 150.33333333333334, "r_y0": 521.6666666666666, "r_x1": 150.33333333333334, "r_y1": 444.6666666666667, "r_x2": 131.66666666666666, "r_y2": 444.6666666666667, "r_x3": 131.66666666666666, "r_y3": 521.6666666666666, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 89.0, "from_ocr": true}], "children": []}, "text": "package"}], "headers": []}}]
\ No newline at end of file
diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.doctags.txt b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.doctags.txt
index da0deb0b..f270ebec 100644
--- a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.doctags.txt
+++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.doctags.txt
@@ -1,3 +1,3 @@
-package
-Docling bundles PDF document conversion to JSON and Markdown in an easy self contained
+package
+Docling bundles PDF document conversion to JSON and Markdown in an easy self contained
\ No newline at end of file
diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.json b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.json
index 1cb0a4f6..bfb728c2 100644
--- a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.json
+++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.json
@@ -1 +1 @@
-{"schema_name": "DoclingDocument", "version": "1.3.0", "name": "ocr_test_rotated_180", "origin": {"mimetype": "application/pdf", "binary_hash": 2530576989861832966, "filename": "ocr_test_rotated_180.pdf", "uri": null}, "furniture": {"self_ref": "#/furniture", "parent": null, "children": [], "content_layer": "furniture", "name": "_root_", "label": "unspecified"}, "body": {"self_ref": "#/body", "parent": null, "children": [{"cref": "#/texts/0"}, {"cref": "#/texts/1"}], "content_layer": "body", "name": "_root_", "label": "unspecified"}, "groups": [], "texts": [{"self_ref": "#/texts/0", "parent": {"cref": "#/body"}, "children": [], "content_layer": "body", "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 441.304584329099, "t": 151.67751306395223, "r": 521.9863114205704, "b": 132.09610360960653, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 7]}], "orig": "package", "text": "package", "formatting": null, "hyperlink": null}, {"self_ref": "#/texts/1", "parent": {"cref": "#/body"}, "children": [], "content_layer": "body", "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 89.12133215549848, "t": 124.86176457554109, "r": 523.3501733013318, "b": 77.02339849621205, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 86]}], "orig": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained", "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained", "formatting": null, "hyperlink": null}], "pictures": [], "tables": [], "key_value_items": [], "form_items": [], "pages": {"1": {"size": {"width": 595.201171875, "height": 841.9216918945312}, "image": null, "page_no": 1}}}
\ No newline at end of file
+{"schema_name": "DoclingDocument", "version": "1.3.0", "name": "ocr_test_rotated_180", "origin": {"mimetype": "application/pdf", "binary_hash": 2530576989861832966, "filename": "ocr_test_rotated_180.pdf", "uri": null}, "furniture": {"self_ref": "#/furniture", "parent": null, "children": [], "content_layer": "furniture", "name": "_root_", "label": "unspecified"}, "body": {"self_ref": "#/body", "parent": null, "children": [{"cref": "#/texts/0"}, {"cref": "#/texts/1"}], "content_layer": "body", "name": "_root_", "label": "unspecified"}, "groups": [], "texts": [{"self_ref": "#/texts/0", "parent": {"cref": "#/body"}, "children": [], "content_layer": "body", "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 444.6666666666667, "t": 150.25502522786462, "r": 521.6666666666666, "b": 131.58835856119788, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 7]}], "orig": "package", "text": "package", "formatting": null, "hyperlink": null}, {"self_ref": "#/texts/1", "parent": {"cref": "#/body"}, "children": [], "content_layer": "body", "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 92.0, "t": 123.25502522786462, "r": 523.0, "b": 77.92169189453125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 86]}], "orig": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained", "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained", "formatting": null, "hyperlink": null}], "pictures": [], "tables": [], "key_value_items": [], "form_items": [], "pages": {"1": {"size": {"width": 595.201171875, "height": 841.9216918945312}, "image": null, "page_no": 1}}}
\ No newline at end of file
diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.pages.json b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.pages.json
index a57c3401..8baebf53 100644
--- a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.pages.json
+++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.pages.json
@@ -1 +1 @@
-[{"page_no": 0, "size": {"width": 595.201171875, "height": 841.9216918945312}, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 90.46133071208328, "r_y0": 764.8982933983192, "r_x1": 520.7638616365624, "r_y1": 764.8982933983192, "r_x2": 520.7638616365624, "r_y2": 744.0929853742306, "r_x3": 90.46133071208328, "r_y3": 744.0929853742306, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 89.12133215549848, "r_y0": 741.5247710689902, "r_x1": 523.3501733013318, "r_y1": 741.5247710689902, "r_x2": 523.3501733013318, "r_y2": 717.0599273189902, "r_x3": 89.12133215549848, "r_y3": 717.0599273189902, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 441.304584329099, "r_y0": 709.8255882849247, "r_x1": 521.9863114205704, "r_y1": 709.8255882849247, "r_x2": 521.9863114205704, "r_y2": 690.244178830579, "r_x3": 441.304584329099, "r_y3": 690.244178830579, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "parsed_page": null, "predictions": {"layout": {"clusters": [{"id": 0, "label": "text", "bbox": {"l": 89.12133215549848, "t": 717.0599273189902, "r": 523.3501733013318, "b": 764.8982933983192, "coord_origin": "TOPLEFT"}, "confidence": 0.7318570613861084, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 90.46133071208328, "r_y0": 764.8982933983192, "r_x1": 520.7638616365624, "r_y1": 764.8982933983192, "r_x2": 520.7638616365624, "r_y2": 744.0929853742306, "r_x3": 90.46133071208328, "r_y3": 744.0929853742306, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 89.12133215549848, "r_y0": 741.5247710689902, "r_x1": 523.3501733013318, "r_y1": 741.5247710689902, "r_x2": 523.3501733013318, "r_y2": 717.0599273189902, "r_x3": 89.12133215549848, "r_y3": 717.0599273189902, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}, {"id": 2, "label": "text", "bbox": {"l": 441.304584329099, "t": 690.244178830579, "r": 521.9863114205704, "b": 709.8255882849247, "coord_origin": "TOPLEFT"}, "confidence": 0.5982133150100708, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 441.304584329099, "r_y0": 709.8255882849247, "r_x1": 521.9863114205704, "r_y1": 709.8255882849247, "r_x2": 521.9863114205704, "r_y2": 690.244178830579, "r_x3": 441.304584329099, "r_y3": 690.244178830579, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}]}, "tablestructure": {"table_map": {}}, "figures_classification": null, "equations_prediction": null, "vlm_response": null}, "assembled": {"elements": [{"label": "text", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "text", "bbox": {"l": 89.12133215549848, "t": 717.0599273189902, "r": 523.3501733013318, "b": 764.8982933983192, "coord_origin": "TOPLEFT"}, "confidence": 0.7318570613861084, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 90.46133071208328, "r_y0": 764.8982933983192, "r_x1": 520.7638616365624, "r_y1": 764.8982933983192, "r_x2": 520.7638616365624, "r_y2": 744.0929853742306, "r_x3": 90.46133071208328, "r_y3": 744.0929853742306, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 89.12133215549848, "r_y0": 741.5247710689902, "r_x1": 523.3501733013318, "r_y1": 741.5247710689902, "r_x2": 523.3501733013318, "r_y2": 717.0599273189902, "r_x3": 89.12133215549848, "r_y3": 717.0599273189902, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"}, {"label": "text", "id": 2, "page_no": 0, "cluster": {"id": 2, "label": "text", "bbox": {"l": 441.304584329099, "t": 690.244178830579, "r": 521.9863114205704, "b": 709.8255882849247, "coord_origin": "TOPLEFT"}, "confidence": 0.5982133150100708, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 441.304584329099, "r_y0": 709.8255882849247, "r_x1": 521.9863114205704, "r_y1": 709.8255882849247, "r_x2": 521.9863114205704, "r_y2": 690.244178830579, "r_x3": 441.304584329099, "r_y3": 690.244178830579, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}, "text": "package"}], "body": [{"label": "text", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "text", "bbox": {"l": 89.12133215549848, "t": 717.0599273189902, "r": 523.3501733013318, "b": 764.8982933983192, "coord_origin": "TOPLEFT"}, "confidence": 0.7318570613861084, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 90.46133071208328, "r_y0": 764.8982933983192, "r_x1": 520.7638616365624, "r_y1": 764.8982933983192, "r_x2": 520.7638616365624, "r_y2": 744.0929853742306, "r_x3": 90.46133071208328, "r_y3": 744.0929853742306, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 89.12133215549848, "r_y0": 741.5247710689902, "r_x1": 523.3501733013318, "r_y1": 741.5247710689902, "r_x2": 523.3501733013318, "r_y2": 717.0599273189902, "r_x3": 89.12133215549848, "r_y3": 717.0599273189902, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"}, {"label": "text", "id": 2, "page_no": 0, "cluster": {"id": 2, "label": "text", "bbox": {"l": 441.304584329099, "t": 690.244178830579, "r": 521.9863114205704, "b": 709.8255882849247, "coord_origin": "TOPLEFT"}, "confidence": 0.5982133150100708, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 441.304584329099, "r_y0": 709.8255882849247, "r_x1": 521.9863114205704, "r_y1": 709.8255882849247, "r_x2": 521.9863114205704, "r_y2": 690.244178830579, "r_x3": 441.304584329099, "r_y3": 690.244178830579, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}, "text": "package"}], "headers": []}}]
\ No newline at end of file
+[{"page_no": 0, "size": {"width": 595.201171875, "height": 841.9216918945312}, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 521.3333333333334, "r_y0": 745.3333333333334, "r_x1": 92.0, "r_y1": 745.3333333333334, "r_x2": 92.0, "r_y2": 764.0, "r_x3": 521.3333333333334, "r_y3": 764.0, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 94.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 523.0, "r_y0": 718.6666666666666, "r_x1": 92.0, "r_y1": 718.6666666666666, "r_x2": 92.0, "r_y2": 737.3333333333334, "r_x3": 523.0, "r_y3": 737.3333333333334, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 92.0, "from_ocr": true}, {"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 521.6666666666666, "r_y0": 691.6666666666666, "r_x1": 444.6666666666667, "r_y1": 691.6666666666666, "r_x2": 444.6666666666667, "r_y2": 710.3333333333334, "r_x3": 521.6666666666666, "r_y3": 710.3333333333334, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 90.0, "from_ocr": true}], "parsed_page": null, "predictions": {"layout": {"clusters": [{"id": 0, "label": "text", "bbox": {"l": 92.0, "t": 718.6666666666666, "r": 523.0, "b": 764.0, "coord_origin": "TOPLEFT"}, "confidence": 0.7318570613861084, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 521.3333333333334, "r_y0": 745.3333333333334, "r_x1": 92.0, "r_y1": 745.3333333333334, "r_x2": 92.0, "r_y2": 764.0, "r_x3": 521.3333333333334, "r_y3": 764.0, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 94.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 523.0, "r_y0": 718.6666666666666, "r_x1": 92.0, "r_y1": 718.6666666666666, "r_x2": 92.0, "r_y2": 737.3333333333334, "r_x3": 523.0, "r_y3": 737.3333333333334, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 92.0, "from_ocr": true}], "children": []}, {"id": 2, "label": "text", "bbox": {"l": 444.6666666666667, "t": 691.6666666666666, "r": 521.6666666666666, "b": 710.3333333333334, "coord_origin": "TOPLEFT"}, "confidence": 0.5982133150100708, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 521.6666666666666, "r_y0": 691.6666666666666, "r_x1": 444.6666666666667, "r_y1": 691.6666666666666, "r_x2": 444.6666666666667, "r_y2": 710.3333333333334, "r_x3": 521.6666666666666, "r_y3": 710.3333333333334, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 90.0, "from_ocr": true}], "children": []}]}, "tablestructure": {"table_map": {}}, "figures_classification": null, "equations_prediction": null, "vlm_response": null}, "assembled": {"elements": [{"label": "text", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "text", "bbox": {"l": 92.0, "t": 718.6666666666666, "r": 523.0, "b": 764.0, "coord_origin": "TOPLEFT"}, "confidence": 0.7318570613861084, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 521.3333333333334, "r_y0": 745.3333333333334, "r_x1": 92.0, "r_y1": 745.3333333333334, "r_x2": 92.0, "r_y2": 764.0, "r_x3": 521.3333333333334, "r_y3": 764.0, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 94.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 523.0, "r_y0": 718.6666666666666, "r_x1": 92.0, "r_y1": 718.6666666666666, "r_x2": 92.0, "r_y2": 737.3333333333334, "r_x3": 523.0, "r_y3": 737.3333333333334, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 92.0, "from_ocr": true}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"}, {"label": "text", "id": 2, "page_no": 0, "cluster": {"id": 2, "label": "text", "bbox": {"l": 444.6666666666667, "t": 691.6666666666666, "r": 521.6666666666666, "b": 710.3333333333334, "coord_origin": "TOPLEFT"}, "confidence": 0.5982133150100708, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 521.6666666666666, "r_y0": 691.6666666666666, "r_x1": 444.6666666666667, "r_y1": 691.6666666666666, "r_x2": 444.6666666666667, "r_y2": 710.3333333333334, "r_x3": 521.6666666666666, "r_y3": 710.3333333333334, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 90.0, "from_ocr": true}], "children": []}, "text": "package"}], "body": [{"label": "text", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "text", "bbox": {"l": 92.0, "t": 718.6666666666666, "r": 523.0, "b": 764.0, "coord_origin": "TOPLEFT"}, "confidence": 0.7318570613861084, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 521.3333333333334, "r_y0": 745.3333333333334, "r_x1": 92.0, "r_y1": 745.3333333333334, "r_x2": 92.0, "r_y2": 764.0, "r_x3": 521.3333333333334, "r_y3": 764.0, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 94.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 523.0, "r_y0": 718.6666666666666, "r_x1": 92.0, "r_y1": 718.6666666666666, "r_x2": 92.0, "r_y2": 737.3333333333334, "r_x3": 523.0, "r_y3": 737.3333333333334, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 92.0, "from_ocr": true}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"}, {"label": "text", "id": 2, "page_no": 0, "cluster": {"id": 2, "label": "text", "bbox": {"l": 444.6666666666667, "t": 691.6666666666666, "r": 521.6666666666666, "b": 710.3333333333334, "coord_origin": "TOPLEFT"}, "confidence": 0.5982133150100708, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 521.6666666666666, "r_y0": 691.6666666666666, "r_x1": 444.6666666666667, "r_y1": 691.6666666666666, "r_x2": 444.6666666666667, "r_y2": 710.3333333333334, "r_x3": 521.6666666666666, "r_y3": 710.3333333333334, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 90.0, "from_ocr": true}], "children": []}, "text": "package"}], "headers": []}}]
\ No newline at end of file
diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.doctags.txt b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.doctags.txt
index 95999c0c..2c343d7b 100644
--- a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.doctags.txt
+++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.doctags.txt
@@ -1,3 +1,3 @@
-Docling bundles PDF document conversion to JSON and Markdown in an easy self contained
-package
+Docling bundles PDF document conversion to JSON and Markdown in an easy self contained
+package
\ No newline at end of file
diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.json b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.json
index 9a2e18bb..580ed117 100644
--- a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.json
+++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.json
@@ -1 +1 @@
-{"schema_name": "DoclingDocument", "version": "1.3.0", "name": "ocr_test_rotated_270", "origin": {"mimetype": "application/pdf", "binary_hash": 10890858393843077593, "filename": "ocr_test_rotated_270.pdf", "uri": null}, "furniture": {"self_ref": "#/furniture", "parent": null, "children": [], "content_layer": "furniture", "name": "_root_", "label": "unspecified"}, "body": {"self_ref": "#/body", "parent": null, "children": [{"cref": "#/texts/0"}, {"cref": "#/texts/1"}], "content_layer": "body", "name": "_root_", "label": "unspecified"}, "groups": [], "texts": [{"self_ref": "#/texts/0", "parent": {"cref": "#/body"}, "children": [], "content_layer": "furniture", "label": "page_header", "prov": [{"page_no": 1, "bbox": {"l": 717.1685859527342, "t": 524.2990548540179, "r": 764.8982839673505, "b": 90.32916553110118, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 86]}], "orig": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained", "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained", "formatting": null, "hyperlink": null}, {"self_ref": "#/texts/1", "parent": {"cref": "#/body"}, "children": [], "content_layer": "body", "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 691.4680194659409, "t": 523.0765988200898, "r": 709.8255850278712, "b": 442.3948768148814, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 7]}], "orig": "package", "text": "package", "formatting": null, "hyperlink": null}], "pictures": [], "tables": [], "key_value_items": [], "form_items": [], "pages": {"1": {"size": {"width": 841.9216918945312, "height": 595.201171875}, "image": null, "page_no": 1}}}
\ No newline at end of file
+{"schema_name": "DoclingDocument", "version": "1.3.0", "name": "ocr_test_rotated_270", "origin": {"mimetype": "application/pdf", "binary_hash": 10890858393843077593, "filename": "ocr_test_rotated_270.pdf", "uri": null}, "furniture": {"self_ref": "#/furniture", "parent": null, "children": [], "content_layer": "furniture", "name": "_root_", "label": "unspecified"}, "body": {"self_ref": "#/body", "parent": null, "children": [{"cref": "#/texts/0"}, {"cref": "#/texts/1"}], "content_layer": "body", "name": "_root_", "label": "unspecified"}, "groups": [], "texts": [{"self_ref": "#/texts/0", "parent": {"cref": "#/body"}, "children": [], "content_layer": "furniture", "label": "page_header", "prov": [{"page_no": 1, "bbox": {"l": 718.6666666666666, "t": 522.8678385416666, "r": 764.0, "b": 91.86783854166669, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 86]}], "orig": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained", "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained", "formatting": null, "hyperlink": null}, {"self_ref": "#/texts/1", "parent": {"cref": "#/body"}, "children": [], "content_layer": "body", "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 691.6666666666666, "t": 521.5345052083334, "r": 710.3333333333334, "b": 444.53450520833337, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 7]}], "orig": "package", "text": "package", "formatting": null, "hyperlink": null}], "pictures": [], "tables": [], "key_value_items": [], "form_items": [], "pages": {"1": {"size": {"width": 841.9216918945312, "height": 595.201171875}, "image": null, "page_no": 1}}}
\ No newline at end of file
diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.pages.json b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.pages.json
index 5f76e79a..c4416b3b 100644
--- a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.pages.json
+++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.pages.json
@@ -1 +1 @@
-[{"page_no": 0, "size": {"width": 841.9216918945312, "height": 595.201171875}, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 744.0930045534915, "r_y0": 504.87200373583954, "r_x1": 764.8982839673505, "r_y1": 504.87200373583954, "r_x2": 764.8982839673505, "r_y2": 73.34702001188118, "r_x3": 744.0930045534915, "r_y3": 73.34702001188118, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 717.1685859527342, "r_y0": 504.8720063438988, "r_x1": 737.9738558298501, "r_y1": 504.8720063438988, "r_x2": 737.9738558298501, "r_y2": 70.90211702098213, "r_x3": 717.1685859527342, "r_y3": 70.90211702098213, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 691.4680194659409, "r_y0": 152.80629506011857, "r_x1": 709.8255850278712, "r_y1": 152.80629506011857, "r_x2": 709.8255850278712, "r_y2": 72.12457305491027, "r_x3": 691.4680194659409, "r_y3": 72.12457305491027, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "parsed_page": null, "predictions": {"layout": {"clusters": [{"id": 0, "label": "page_header", "bbox": {"l": 717.1685859527342, "t": 70.90211702098213, "r": 764.8982839673505, "b": 504.8720063438988, "coord_origin": "TOPLEFT"}, "confidence": 0.6915205121040344, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 744.0930045534915, "r_y0": 504.87200373583954, "r_x1": 764.8982839673505, "r_y1": 504.87200373583954, "r_x2": 764.8982839673505, "r_y2": 73.34702001188118, "r_x3": 744.0930045534915, "r_y3": 73.34702001188118, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 717.1685859527342, "r_y0": 504.8720063438988, "r_x1": 737.9738558298501, "r_y1": 504.8720063438988, "r_x2": 737.9738558298501, "r_y2": 70.90211702098213, "r_x3": 717.1685859527342, "r_y3": 70.90211702098213, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}, {"id": 8, "label": "text", "bbox": {"l": 691.4680194659409, "t": 72.12457305491027, "r": 709.8255850278712, "b": 152.80629506011857, "coord_origin": "TOPLEFT"}, "confidence": 1.0, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 691.4680194659409, "r_y0": 152.80629506011857, "r_x1": 709.8255850278712, "r_y1": 152.80629506011857, "r_x2": 709.8255850278712, "r_y2": 72.12457305491027, "r_x3": 691.4680194659409, "r_y3": 72.12457305491027, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}]}, "tablestructure": {"table_map": {}}, "figures_classification": null, "equations_prediction": null, "vlm_response": null}, "assembled": {"elements": [{"label": "page_header", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "page_header", "bbox": {"l": 717.1685859527342, "t": 70.90211702098213, "r": 764.8982839673505, "b": 504.8720063438988, "coord_origin": "TOPLEFT"}, "confidence": 0.6915205121040344, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 744.0930045534915, "r_y0": 504.87200373583954, "r_x1": 764.8982839673505, "r_y1": 504.87200373583954, "r_x2": 764.8982839673505, "r_y2": 73.34702001188118, "r_x3": 744.0930045534915, "r_y3": 73.34702001188118, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 717.1685859527342, "r_y0": 504.8720063438988, "r_x1": 737.9738558298501, "r_y1": 504.8720063438988, "r_x2": 737.9738558298501, "r_y2": 70.90211702098213, "r_x3": 717.1685859527342, "r_y3": 70.90211702098213, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"}, {"label": "text", "id": 8, "page_no": 0, "cluster": {"id": 8, "label": "text", "bbox": {"l": 691.4680194659409, "t": 72.12457305491027, "r": 709.8255850278712, "b": 152.80629506011857, "coord_origin": "TOPLEFT"}, "confidence": 1.0, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 691.4680194659409, "r_y0": 152.80629506011857, "r_x1": 709.8255850278712, "r_y1": 152.80629506011857, "r_x2": 709.8255850278712, "r_y2": 72.12457305491027, "r_x3": 691.4680194659409, "r_y3": 72.12457305491027, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}, "text": "package"}], "body": [{"label": "text", "id": 8, "page_no": 0, "cluster": {"id": 8, "label": "text", "bbox": {"l": 691.4680194659409, "t": 72.12457305491027, "r": 709.8255850278712, "b": 152.80629506011857, "coord_origin": "TOPLEFT"}, "confidence": 1.0, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 691.4680194659409, "r_y0": 152.80629506011857, "r_x1": 709.8255850278712, "r_y1": 152.80629506011857, "r_x2": 709.8255850278712, "r_y2": 72.12457305491027, "r_x3": 691.4680194659409, "r_y3": 72.12457305491027, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}, "text": "package"}], "headers": [{"label": "page_header", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "page_header", "bbox": {"l": 717.1685859527342, "t": 70.90211702098213, "r": 764.8982839673505, "b": 504.8720063438988, "coord_origin": "TOPLEFT"}, "confidence": 0.6915205121040344, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 744.0930045534915, "r_y0": 504.87200373583954, "r_x1": 764.8982839673505, "r_y1": 504.87200373583954, "r_x2": 764.8982839673505, "r_y2": 73.34702001188118, "r_x3": 744.0930045534915, "r_y3": 73.34702001188118, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 717.1685859527342, "r_y0": 504.8720063438988, "r_x1": 737.9738558298501, "r_y1": 504.8720063438988, "r_x2": 737.9738558298501, "r_y2": 70.90211702098213, "r_x3": 717.1685859527342, "r_y3": 70.90211702098213, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"}]}}]
\ No newline at end of file
+[{"page_no": 0, "size": {"width": 841.9216918945312, "height": 595.201171875}, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 745.3333333333334, "r_y0": 74.0, "r_x1": 745.3333333333334, "r_y1": 503.3333333333333, "r_x2": 764.0, "r_y2": 503.3333333333333, "r_x3": 745.3333333333334, "r_y3": 503.3333333333333, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 94.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 718.6666666666666, "r_y0": 72.33333333333333, "r_x1": 718.6666666666666, "r_y1": 503.3333333333333, "r_x2": 737.3333333333334, "r_y2": 503.3333333333333, "r_x3": 718.6666666666666, "r_y3": 503.3333333333333, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 92.0, "from_ocr": true}, {"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 691.6666666666666, "r_y0": 73.66666666666667, "r_x1": 691.6666666666666, "r_y1": 150.66666666666666, "r_x2": 710.3333333333334, "r_y2": 150.66666666666666, "r_x3": 691.6666666666666, "r_y3": 150.66666666666666, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 89.0, "from_ocr": true}], "parsed_page": null, "predictions": {"layout": {"clusters": [{"id": 0, "label": "page_header", "bbox": {"l": 718.6666666666666, "t": 72.33333333333333, "r": 764.0, "b": 503.3333333333333, "coord_origin": "TOPLEFT"}, "confidence": 0.6915205121040344, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 745.3333333333334, "r_y0": 74.0, "r_x1": 745.3333333333334, "r_y1": 503.3333333333333, "r_x2": 764.0, "r_y2": 503.3333333333333, "r_x3": 745.3333333333334, "r_y3": 503.3333333333333, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 94.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 718.6666666666666, "r_y0": 72.33333333333333, "r_x1": 718.6666666666666, "r_y1": 503.3333333333333, "r_x2": 737.3333333333334, "r_y2": 503.3333333333333, "r_x3": 718.6666666666666, "r_y3": 503.3333333333333, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 92.0, "from_ocr": true}], "children": []}, {"id": 8, "label": "text", "bbox": {"l": 691.6666666666666, "t": 73.66666666666667, "r": 710.3333333333334, "b": 150.66666666666666, "coord_origin": "TOPLEFT"}, "confidence": 89.0, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 691.6666666666666, "r_y0": 73.66666666666667, "r_x1": 691.6666666666666, "r_y1": 150.66666666666666, "r_x2": 710.3333333333334, "r_y2": 150.66666666666666, "r_x3": 691.6666666666666, "r_y3": 150.66666666666666, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 89.0, "from_ocr": true}], "children": []}]}, "tablestructure": {"table_map": {}}, "figures_classification": null, "equations_prediction": null, "vlm_response": null}, "assembled": {"elements": [{"label": "page_header", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "page_header", "bbox": {"l": 718.6666666666666, "t": 72.33333333333333, "r": 764.0, "b": 503.3333333333333, "coord_origin": "TOPLEFT"}, "confidence": 0.6915205121040344, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 745.3333333333334, "r_y0": 74.0, "r_x1": 745.3333333333334, "r_y1": 503.3333333333333, "r_x2": 764.0, "r_y2": 503.3333333333333, "r_x3": 745.3333333333334, "r_y3": 503.3333333333333, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 94.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 718.6666666666666, "r_y0": 72.33333333333333, "r_x1": 718.6666666666666, "r_y1": 503.3333333333333, "r_x2": 737.3333333333334, "r_y2": 503.3333333333333, "r_x3": 718.6666666666666, "r_y3": 503.3333333333333, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 92.0, "from_ocr": true}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"}, {"label": "text", "id": 8, "page_no": 0, "cluster": {"id": 8, "label": "text", "bbox": {"l": 691.6666666666666, "t": 73.66666666666667, "r": 710.3333333333334, "b": 150.66666666666666, "coord_origin": "TOPLEFT"}, "confidence": 89.0, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 691.6666666666666, "r_y0": 73.66666666666667, "r_x1": 691.6666666666666, "r_y1": 150.66666666666666, "r_x2": 710.3333333333334, "r_y2": 150.66666666666666, "r_x3": 691.6666666666666, "r_y3": 150.66666666666666, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 89.0, "from_ocr": true}], "children": []}, "text": "package"}], "body": [{"label": "text", "id": 8, "page_no": 0, "cluster": {"id": 8, "label": "text", "bbox": {"l": 691.6666666666666, "t": 73.66666666666667, "r": 710.3333333333334, "b": 150.66666666666666, "coord_origin": "TOPLEFT"}, "confidence": 89.0, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 691.6666666666666, "r_y0": 73.66666666666667, "r_x1": 691.6666666666666, "r_y1": 150.66666666666666, "r_x2": 710.3333333333334, "r_y2": 150.66666666666666, "r_x3": 691.6666666666666, "r_y3": 150.66666666666666, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 89.0, "from_ocr": true}], "children": []}, "text": "package"}], "headers": [{"label": "page_header", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "page_header", "bbox": {"l": 718.6666666666666, "t": 72.33333333333333, "r": 764.0, "b": 503.3333333333333, "coord_origin": "TOPLEFT"}, "confidence": 0.6915205121040344, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 745.3333333333334, "r_y0": 74.0, "r_x1": 745.3333333333334, "r_y1": 503.3333333333333, "r_x2": 764.0, "r_y2": 503.3333333333333, "r_x3": 745.3333333333334, "r_y3": 503.3333333333333, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 94.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 718.6666666666666, "r_y0": 72.33333333333333, "r_x1": 718.6666666666666, "r_y1": 503.3333333333333, "r_x2": 737.3333333333334, "r_y2": 503.3333333333333, "r_x3": 718.6666666666666, "r_y3": 503.3333333333333, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 92.0, "from_ocr": true}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"}]}}]
\ No newline at end of file
diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.doctags.txt b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.doctags.txt
index c99f4b1f..3d262655 100644
--- a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.doctags.txt
+++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.doctags.txt
@@ -1,3 +1,3 @@
-Docling bundles PDF document conversion to JSON and Markdown in an easy self contained
-package
+Docling bundles PDF document conversion to
+JSON and Markdown in an easy self contained package
\ No newline at end of file
diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.json b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.json
index d4bca1a6..3022d1ca 100644
--- a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.json
+++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.json
@@ -1 +1 @@
-{"schema_name": "DoclingDocument", "version": "1.3.0", "name": "ocr_test_rotated_90", "origin": {"mimetype": "application/pdf", "binary_hash": 6989291015361162334, "filename": "ocr_test_rotated_90.pdf", "uri": null}, "furniture": {"self_ref": "#/furniture", "parent": null, "children": [], "content_layer": "furniture", "name": "_root_", "label": "unspecified"}, "body": {"self_ref": "#/body", "parent": null, "children": [{"cref": "#/texts/0"}, {"cref": "#/texts/1"}], "content_layer": "body", "name": "_root_", "label": "unspecified"}, "groups": [], "texts": [{"self_ref": "#/texts/0", "parent": {"cref": "#/body"}, "children": [], "content_layer": "furniture", "label": "page_header", "prov": [{"page_no": 1, "bbox": {"l": 77.10171546422428, "t": 506.07735421856773, "r": 124.91101654503161, "b": 71.88562244773436, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 86]}], "orig": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained", "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained", "formatting": null, "hyperlink": null}, {"self_ref": "#/texts/1", "parent": {"cref": "#/body"}, "children": [], "content_layer": "body", "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 131.21306574279092, "t": 154.19400205373182, "r": 152.19606490864376, "b": 74.12495603322407, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 7]}], "orig": "package", "text": "package", "formatting": null, "hyperlink": null}], "pictures": [], "tables": [], "key_value_items": [], "form_items": [], "pages": {"1": {"size": {"width": 841.9216918945312, "height": 595.201171875}, "image": null, "page_no": 1}}}
\ No newline at end of file
+{"schema_name": "DoclingDocument", "version": "1.3.0", "name": "ocr_test_rotated_90", "origin": {"mimetype": "application/pdf", "binary_hash": 6989291015361162334, "filename": "ocr_test_rotated_90.pdf", "uri": null}, "furniture": {"self_ref": "#/furniture", "parent": null, "children": [], "content_layer": "furniture", "name": "_root_", "label": "unspecified"}, "body": {"self_ref": "#/body", "parent": null, "children": [{"cref": "#/texts/0"}, {"cref": "#/texts/1"}], "content_layer": "body", "name": "_root_", "label": "unspecified"}, "groups": [], "texts": [{"self_ref": "#/texts/0", "parent": {"cref": "#/body"}, "children": [], "content_layer": "body", "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 78.0, "t": 503.201171875, "r": 96.66666666666667, "b": 73.86783854166663, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 42]}], "orig": "Docling bundles PDF document conversion to", "text": "Docling bundles PDF document conversion to", "formatting": null, "hyperlink": null}, {"self_ref": "#/texts/1", "parent": {"cref": "#/body"}, "children": [], "content_layer": "body", "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 104.66666666666667, "t": 503.201171875, "r": 123.33333333333333, "b": 72.201171875, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 43]}, {"page_no": 1, "bbox": {"l": 104.66666666666667, "t": 503.201171875, "r": 123.33333333333333, "b": 72.201171875, "coord_origin": "BOTTOMLEFT"}, "charspan": [44, 51]}], "orig": "JSON and Markdown in an easy self contained package", "text": "JSON and Markdown in an easy self contained package", "formatting": null, "hyperlink": null}], "pictures": [], "tables": [], "key_value_items": [], "form_items": [], "pages": {"1": {"size": {"width": 841.9216918945312, "height": 595.201171875}, "image": null, "page_no": 1}}}
\ No newline at end of file
diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.md b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.md
index 597acc76..8d77a437 100644
--- a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.md
+++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.md
@@ -1 +1,3 @@
-package
\ No newline at end of file
+Docling bundles PDF document conversion to
+
+JSON and Markdown in an easy self contained package
\ No newline at end of file
diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.pages.json b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.pages.json
index 89e716e1..71c1e2bd 100644
--- a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.pages.json
+++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.pages.json
@@ -1 +1 @@
-[{"page_no": 0, "size": {"width": 841.9216918945312, "height": 595.201171875}, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 77.10171546422428, "r_y0": 520.7638577050515, "r_x1": 96.6831586150625, "r_y1": 520.7638577050515, "r_x2": 96.6831586150625, "r_y2": 89.23887398109309, "r_x3": 77.10171546422428, "r_y3": 89.23887398109309, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 100.55299576256091, "r_y0": 523.3155494272656, "r_x1": 124.91101654503161, "r_y1": 523.3155494272656, "r_x2": 124.91101654503161, "r_y2": 89.12381765643227, "r_x3": 100.55299576256091, "r_y3": 89.12381765643227, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 131.21306574279092, "r_y0": 521.0762158417759, "r_x1": 152.19606490864376, "r_y1": 521.0762158417759, "r_x2": 152.19606490864376, "r_y2": 441.0071698212682, "r_x3": 131.21306574279092, "r_y3": 441.0071698212682, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "parsed_page": null, "predictions": {"layout": {"clusters": [{"id": 0, "label": "page_header", "bbox": {"l": 77.10171546422428, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}, "confidence": 0.6016772389411926, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 77.10171546422428, "r_y0": 520.7638577050515, "r_x1": 96.6831586150625, "r_y1": 520.7638577050515, "r_x2": 96.6831586150625, "r_y2": 89.23887398109309, "r_x3": 77.10171546422428, "r_y3": 89.23887398109309, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 100.55299576256091, "r_y0": 523.3155494272656, "r_x1": 124.91101654503161, "r_y1": 523.3155494272656, "r_x2": 124.91101654503161, "r_y2": 89.12381765643227, "r_x3": 100.55299576256091, "r_y3": 89.12381765643227, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}, {"id": 1, "label": "text", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}, "confidence": 0.5234212875366211, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 131.21306574279092, "r_y0": 521.0762158417759, "r_x1": 152.19606490864376, "r_y1": 521.0762158417759, "r_x2": 152.19606490864376, "r_y2": 441.0071698212682, "r_x3": 131.21306574279092, "r_y3": 441.0071698212682, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}]}, "tablestructure": {"table_map": {}}, "figures_classification": null, "equations_prediction": null, "vlm_response": null}, "assembled": {"elements": [{"label": "page_header", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "page_header", "bbox": {"l": 77.10171546422428, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}, "confidence": 0.6016772389411926, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 77.10171546422428, "r_y0": 520.7638577050515, "r_x1": 96.6831586150625, "r_y1": 520.7638577050515, "r_x2": 96.6831586150625, "r_y2": 89.23887398109309, "r_x3": 77.10171546422428, "r_y3": 89.23887398109309, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 100.55299576256091, "r_y0": 523.3155494272656, "r_x1": 124.91101654503161, "r_y1": 523.3155494272656, "r_x2": 124.91101654503161, "r_y2": 89.12381765643227, "r_x3": 100.55299576256091, "r_y3": 89.12381765643227, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"}, {"label": "text", "id": 1, "page_no": 0, "cluster": {"id": 1, "label": "text", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}, "confidence": 0.5234212875366211, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 131.21306574279092, "r_y0": 521.0762158417759, "r_x1": 152.19606490864376, "r_y1": 521.0762158417759, "r_x2": 152.19606490864376, "r_y2": 441.0071698212682, "r_x3": 131.21306574279092, "r_y3": 441.0071698212682, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}, "text": "package"}], "body": [{"label": "text", "id": 1, "page_no": 0, "cluster": {"id": 1, "label": "text", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}, "confidence": 0.5234212875366211, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 131.21306574279092, "r_y0": 521.0762158417759, "r_x1": 152.19606490864376, "r_y1": 521.0762158417759, "r_x2": 152.19606490864376, "r_y2": 441.0071698212682, "r_x3": 131.21306574279092, "r_y3": 441.0071698212682, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}, "text": "package"}], "headers": [{"label": "page_header", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "page_header", "bbox": {"l": 77.10171546422428, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}, "confidence": 0.6016772389411926, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 77.10171546422428, "r_y0": 520.7638577050515, "r_x1": 96.6831586150625, "r_y1": 520.7638577050515, "r_x2": 96.6831586150625, "r_y2": 89.23887398109309, "r_x3": 77.10171546422428, "r_y3": 89.23887398109309, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 100.55299576256091, "r_y0": 523.3155494272656, "r_x1": 124.91101654503161, "r_y1": 523.3155494272656, "r_x2": 124.91101654503161, "r_y2": 89.12381765643227, "r_x3": 100.55299576256091, "r_y3": 89.12381765643227, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"}]}}]
\ No newline at end of file
+[{"page_no": 0, "size": {"width": 841.9216918945312, "height": 595.201171875}, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 96.66666666666667, "r_y0": 521.3333333333334, "r_x1": 96.66666666666667, "r_y1": 92.0, "r_x2": 78.0, "r_y2": 92.0, "r_x3": 78.0, "r_y3": 521.3333333333334, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 94.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 123.33333333333333, "r_y0": 523.0, "r_x1": 123.33333333333333, "r_y1": 92.0, "r_x2": 104.66666666666667, "r_y2": 92.0, "r_x3": 104.66666666666667, "r_y3": 523.0, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 92.0, "from_ocr": true}, {"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 150.33333333333334, "r_y0": 521.6666666666666, "r_x1": 150.33333333333334, "r_y1": 444.6666666666667, "r_x2": 131.66666666666666, "r_y2": 444.6666666666667, "r_x3": 131.66666666666666, "r_y3": 521.6666666666666, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 89.0, "from_ocr": true}], "parsed_page": null, "predictions": {"layout": {"clusters": [{"id": 1, "label": "text", "bbox": {"l": 78.0, "t": 92.0, "r": 96.66666666666667, "b": 521.3333333333334, "coord_origin": "TOPLEFT"}, "confidence": 94.0, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 96.66666666666667, "r_y0": 521.3333333333334, "r_x1": 96.66666666666667, "r_y1": 92.0, "r_x2": 78.0, "r_y2": 92.0, "r_x3": 78.0, "r_y3": 521.3333333333334, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 94.0, "from_ocr": true}], "children": []}, {"id": 2, "label": "text", "bbox": {"l": 104.66666666666667, "t": 92.0, "r": 123.33333333333333, "b": 523.0, "coord_origin": "TOPLEFT"}, "confidence": 92.0, "cells": [{"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 123.33333333333333, "r_y0": 523.0, "r_x1": 123.33333333333333, "r_y1": 92.0, "r_x2": 104.66666666666667, "r_y2": 92.0, "r_x3": 104.66666666666667, "r_y3": 523.0, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 92.0, "from_ocr": true}], "children": []}, {"id": 3, "label": "text", "bbox": {"l": 131.66666666666666, "t": 444.6666666666667, "r": 150.33333333333334, "b": 521.6666666666666, "coord_origin": "TOPLEFT"}, "confidence": 89.0, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 150.33333333333334, "r_y0": 521.6666666666666, "r_x1": 150.33333333333334, "r_y1": 444.6666666666667, "r_x2": 131.66666666666666, "r_y2": 444.6666666666667, "r_x3": 131.66666666666666, "r_y3": 521.6666666666666, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 89.0, "from_ocr": true}], "children": []}]}, "tablestructure": {"table_map": {}}, "figures_classification": null, "equations_prediction": null, "vlm_response": null}, "assembled": {"elements": [{"label": "text", "id": 1, "page_no": 0, "cluster": {"id": 1, "label": "text", "bbox": {"l": 78.0, "t": 92.0, "r": 96.66666666666667, "b": 521.3333333333334, "coord_origin": "TOPLEFT"}, "confidence": 94.0, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 96.66666666666667, "r_y0": 521.3333333333334, "r_x1": 96.66666666666667, "r_y1": 92.0, "r_x2": 78.0, "r_y2": 92.0, "r_x3": 78.0, "r_y3": 521.3333333333334, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 94.0, "from_ocr": true}], "children": []}, "text": "Docling bundles PDF document conversion to"}, {"label": "text", "id": 2, "page_no": 0, "cluster": {"id": 2, "label": "text", "bbox": {"l": 104.66666666666667, "t": 92.0, "r": 123.33333333333333, "b": 523.0, "coord_origin": "TOPLEFT"}, "confidence": 92.0, "cells": [{"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 123.33333333333333, "r_y0": 523.0, "r_x1": 123.33333333333333, "r_y1": 92.0, "r_x2": 104.66666666666667, "r_y2": 92.0, "r_x3": 104.66666666666667, "r_y3": 523.0, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 92.0, "from_ocr": true}], "children": []}, "text": "JSON and Markdown in an easy self contained"}, {"label": "text", "id": 3, "page_no": 0, "cluster": {"id": 3, "label": "text", "bbox": {"l": 131.66666666666666, "t": 444.6666666666667, "r": 150.33333333333334, "b": 521.6666666666666, "coord_origin": "TOPLEFT"}, "confidence": 89.0, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 150.33333333333334, "r_y0": 521.6666666666666, "r_x1": 150.33333333333334, "r_y1": 444.6666666666667, "r_x2": 131.66666666666666, "r_y2": 444.6666666666667, "r_x3": 131.66666666666666, "r_y3": 521.6666666666666, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 89.0, "from_ocr": true}], "children": []}, "text": "package"}], "body": [{"label": "text", "id": 1, "page_no": 0, "cluster": {"id": 1, "label": "text", "bbox": {"l": 78.0, "t": 92.0, "r": 96.66666666666667, "b": 521.3333333333334, "coord_origin": "TOPLEFT"}, "confidence": 94.0, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 96.66666666666667, "r_y0": 521.3333333333334, "r_x1": 96.66666666666667, "r_y1": 92.0, "r_x2": 78.0, "r_y2": 92.0, "r_x3": 78.0, "r_y3": 521.3333333333334, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 94.0, "from_ocr": true}], "children": []}, "text": "Docling bundles PDF document conversion to"}, {"label": "text", "id": 2, "page_no": 0, "cluster": {"id": 2, "label": "text", "bbox": {"l": 104.66666666666667, "t": 92.0, "r": 123.33333333333333, "b": 523.0, "coord_origin": "TOPLEFT"}, "confidence": 92.0, "cells": [{"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 123.33333333333333, "r_y0": 523.0, "r_x1": 123.33333333333333, "r_y1": 92.0, "r_x2": 104.66666666666667, "r_y2": 92.0, "r_x3": 104.66666666666667, "r_y3": 523.0, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 92.0, "from_ocr": true}], "children": []}, "text": "JSON and Markdown in an easy self contained"}, {"label": "text", "id": 3, "page_no": 0, "cluster": {"id": 3, "label": "text", "bbox": {"l": 131.66666666666666, "t": 444.6666666666667, "r": 150.33333333333334, "b": 521.6666666666666, "coord_origin": "TOPLEFT"}, "confidence": 89.0, "cells": [{"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 150.33333333333334, "r_y0": 521.6666666666666, "r_x1": 150.33333333333334, "r_y1": 444.6666666666667, "r_x2": 131.66666666666666, "r_y2": 444.6666666666667, "r_x3": 131.66666666666666, "r_y3": 521.6666666666666, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 89.0, "from_ocr": true}], "children": []}, "text": "package"}], "headers": []}}]
\ No newline at end of file