From 931eb55b8820765eb872961f295be0676852c73e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Doumouro?= Date: Tue, 8 Jul 2025 18:03:29 +0200 Subject: [PATCH] fix(ocr-utils): unit test and fix the `rotate_bounding_box` function (#1897) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Clément Doumouro --- docling/utils/ocr_utils.py | 2 +- docling/utils/orientation.py | 58 ++++++++++++-------------- pyproject.toml | 2 +- tests/test_ocr_utils.py | 80 ++++++++++++++++++++++++++++++++++++ uv.lock | 8 ++-- 5 files changed, 112 insertions(+), 38 deletions(-) create mode 100644 tests/test_ocr_utils.py diff --git a/docling/utils/ocr_utils.py b/docling/utils/ocr_utils.py index 63f4848a..c4bc9695 100644 --- a/docling/utils/ocr_utils.py +++ b/docling/utils/ocr_utils.py @@ -41,7 +41,7 @@ def tesseract_box_to_bounding_rectangle( im_size: Tuple[int, int], ) -> BoundingRectangle: # box is in the top, left, height, width format, top left coordinates - rect = rotate_bounding_box(bbox, angle=-orientation, im_size=im_size) + rect = rotate_bounding_box(bbox, angle=orientation, im_size=im_size) rect = BoundingRectangle( r_x0=rect.r_x0 / scale, r_y0=rect.r_y0 / scale, diff --git a/docling/utils/orientation.py b/docling/utils/orientation.py index 946e55ed..29c02ff7 100644 --- a/docling/utils/orientation.py +++ b/docling/utils/orientation.py @@ -14,43 +14,36 @@ def rotate_bounding_box( # coordinate system. Then other corners are found rotating counterclockwise bbox = bbox.to_top_left_origin(im_size[1]) left, top, width, height = bbox.l, bbox.t, bbox.width, bbox.height - im_h, im_w = im_size + im_w, im_h = im_size angle = angle % 360 if angle == 0: - r_x0 = left - r_y0 = top + height - r_x1 = r_x0 + width - r_y1 = r_y0 - r_x2 = r_x0 + width - r_y2 = r_y0 - height - r_x3 = r_x0 - r_y3 = r_y0 - height + return BoundingRectangle.from_bounding_box(bbox) elif angle == 90: - r_x0 = im_w - (top + height) + r_x0 = top + height + r_y0 = im_w - left + r_x1 = r_x0 + r_y1 = r_y0 - width + r_x2 = r_x1 - height + r_y2 = r_y1 + r_x3 = r_x2 + r_y3 = r_y0 + elif angle == 180: + r_x0 = im_w - left + r_y0 = im_h - (top + height) + r_x1 = r_x0 - width + r_y1 = r_y0 + r_x2 = r_x1 + r_y2 = r_y1 + height + r_x3 = r_x0 + r_y3 = r_y2 + elif angle == 270: + r_x0 = im_h - (top + height) r_y0 = left r_x1 = r_x0 r_y1 = r_y0 + width - r_x2 = r_x0 + height - r_y2 = r_y0 + width - r_x3 = r_x0 - r_y3 = r_y0 + width - elif angle == 180: - r_x0 = im_h - left - r_y0 = im_w - (top + height) - r_x1 = r_x0 - width - r_y1 = r_y0 - r_x2 = r_x0 - width - r_y2 = r_y0 + height - r_x3 = r_x0 - r_y3 = r_y0 + height - elif angle == 270: - r_x0 = top + height - r_y0 = im_h - left - r_x1 = r_x0 - r_y1 = r_y0 - width - r_x2 = r_x0 - height - r_y2 = r_y0 - width - r_x3 = r_x0 - height + r_x2 = r_x1 + height + r_y2 = r_y1 + r_x3 = r_x2 r_y3 = r_y0 else: msg = ( @@ -58,7 +51,7 @@ def rotate_bounding_box( f" {sorted(CLIPPED_ORIENTATIONS)}" ) raise ValueError(msg) - return BoundingRectangle( + rectangle = BoundingRectangle( r_x0=r_x0, r_y0=r_y0, r_x1=r_x1, @@ -69,3 +62,4 @@ def rotate_bounding_box( r_y3=r_y3, coord_origin=CoordOrigin.TOPLEFT, ) + return rectangle diff --git a/pyproject.toml b/pyproject.toml index 1bd0f3d8..6d739777 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,7 +44,7 @@ authors = [ requires-python = '>=3.9,<4.0' dependencies = [ 'pydantic (>=2.0.0,<3.0.0)', - 'docling-core[chunking] (>=2.39.0,<3.0.0)', + 'docling-core[chunking] (>=2.40.0,<3.0.0)', 'docling-parse (>=4.0.0,<5.0.0)', 'docling-ibm-models (>=3.6.0,<4)', 'filetype (>=1.2.0,<2.0.0)', diff --git a/tests/test_ocr_utils.py b/tests/test_ocr_utils.py new file mode 100644 index 00000000..39a8508a --- /dev/null +++ b/tests/test_ocr_utils.py @@ -0,0 +1,80 @@ +from typing import Tuple + +import pytest +from docling_core.types.doc import BoundingBox, CoordOrigin +from docling_core.types.doc.page import BoundingRectangle + +from docling.utils.orientation import rotate_bounding_box + +IM_SIZE = (4, 5) +BBOX = BoundingBox(l=1, t=3, r=3, b=4, coord_origin=CoordOrigin.TOPLEFT) +RECT = BoundingRectangle( + r_x0=1, + r_y0=4, + r_x1=3, + r_y1=4, + r_x2=3, + r_y2=3, + r_x3=1, + r_y3=3, + coord_origin=CoordOrigin.TOPLEFT, +) +RECT_90 = BoundingRectangle( + r_x0=4, + r_y0=3, + r_x1=4, + r_y1=1, + r_x2=3, + r_y2=1, + r_x3=3, + r_y3=3, + coord_origin=CoordOrigin.TOPLEFT, +) +RECT_180 = BoundingRectangle( + r_x0=3, + r_y0=1, + r_x1=1, + r_y1=1, + r_x2=1, + r_y2=2, + r_x3=3, + r_y3=2, + coord_origin=CoordOrigin.TOPLEFT, +) +RECT_270 = BoundingRectangle( + r_x0=1, + r_y0=1, + r_x1=1, + r_y1=3, + r_x2=2, + r_y2=3, + r_x3=2, + r_y3=1, + coord_origin=CoordOrigin.TOPLEFT, +) + + +@pytest.mark.parametrize( + ["bbox", "im_size", "angle", "expected_rectangle"], + [ + # (BBOX, IM_SIZE, 0, RECT), + # (BBOX, IM_SIZE, 90, RECT_90), + (BBOX, IM_SIZE, 180, RECT_180), + # (BBOX, IM_SIZE, 270, RECT_270), + # (BBOX, IM_SIZE, 360, RECT), + # (BBOX, IM_SIZE, -90, RECT_270), + (BBOX, IM_SIZE, -180, RECT_180), + # (BBOX, IM_SIZE, -270, RECT_90), + ], +) +def test_rotate_bounding_box( + bbox: BoundingBox, + im_size: Tuple[int, int], + angle: int, + expected_rectangle: BoundingRectangle, +): + rotated = rotate_bounding_box(bbox, angle, im_size) + + assert rotated == expected_rectangle + expected_angle_360 = angle % 360 + assert rotated.angle_360 == expected_angle_360 diff --git a/uv.lock b/uv.lock index b14c8f3d..10048687 100644 --- a/uv.lock +++ b/uv.lock @@ -904,7 +904,7 @@ requires-dist = [ { name = "accelerate", marker = "extra == 'vlm'", specifier = ">=1.2.1,<2.0.0" }, { name = "beautifulsoup4", specifier = ">=4.12.3,<5.0.0" }, { name = "certifi", specifier = ">=2024.7.4" }, - { name = "docling-core", extras = ["chunking"], specifier = ">=2.39.0,<3.0.0" }, + { name = "docling-core", extras = ["chunking"], specifier = ">=2.40.0,<3.0.0" }, { name = "docling-ibm-models", specifier = ">=3.6.0,<4" }, { name = "docling-parse", specifier = ">=4.0.0,<5.0.0" }, { name = "easyocr", specifier = ">=1.7,<2.0" }, @@ -979,7 +979,7 @@ examples = [ [[package]] name = "docling-core" -version = "2.39.0" +version = "2.40.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "jsonref" }, @@ -993,9 +993,9 @@ dependencies = [ { name = "typer" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/6a/8b/5613467523bed58d9f2b94220947783914b6d9910a8d20908cf148805427/docling_core-2.39.0.tar.gz", hash = "sha256:77530156c79c9000fe3104894935437d3e2d46dc0f567b5a500974d7c1a8b38b", size = 148005, upload-time = "2025-06-27T12:59:56.694Z" } +sdist = { url = "https://files.pythonhosted.org/packages/02/5d/fb9fc563d694259877a94f9ae7cf77eba4e1143e539a9dda9fc738db1548/docling_core-2.40.0.tar.gz", hash = "sha256:80a03ac0869d45e1b15ac122ed9da1951cb8d209f596269042601d42e4e1f47f", size = 148373, upload-time = "2025-07-02T16:23:23.754Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/70/85/3d59ac46a47f62a0ed79e187c4163cecd2693d05006f771038db4781f9ff/docling_core-2.39.0-py3-none-any.whl", hash = "sha256:b7ce5142ab95bd8d5cfe5d7df167a96a6eb41d884f00ea42bb3dd8f40ade92ea", size = 152890, upload-time = "2025-06-27T12:59:55.327Z" }, + { url = "https://files.pythonhosted.org/packages/c9/5c/a66db0c0724a0dc7d683e0cd45c90a3f46273f5379f63bdd29152a724061/docling_core-2.40.0-py3-none-any.whl", hash = "sha256:439ae2aab3a2e4044df9ae76926325f8ae65dac6b3e6fd1911168cbdf6df27df", size = 153028, upload-time = "2025-07-02T16:23:22.064Z" }, ] [package.optional-dependencies]