fix(ocr-utils): unit test and fix the rotate_bounding_box function (#1897)

Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>
This commit is contained in:
Clément Doumouro 2025-07-08 18:03:29 +02:00 committed by GitHub
parent a07ba863c4
commit 931eb55b88
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 112 additions and 38 deletions

View File

@ -41,7 +41,7 @@ def tesseract_box_to_bounding_rectangle(
im_size: Tuple[int, int],
) -> BoundingRectangle:
# box is in the top, left, height, width format, top left coordinates
rect = rotate_bounding_box(bbox, angle=-orientation, im_size=im_size)
rect = rotate_bounding_box(bbox, angle=orientation, im_size=im_size)
rect = BoundingRectangle(
r_x0=rect.r_x0 / scale,
r_y0=rect.r_y0 / scale,

View File

@ -14,43 +14,36 @@ def rotate_bounding_box(
# coordinate system. Then other corners are found rotating counterclockwise
bbox = bbox.to_top_left_origin(im_size[1])
left, top, width, height = bbox.l, bbox.t, bbox.width, bbox.height
im_h, im_w = im_size
im_w, im_h = im_size
angle = angle % 360
if angle == 0:
r_x0 = left
r_y0 = top + height
r_x1 = r_x0 + width
r_y1 = r_y0
r_x2 = r_x0 + width
r_y2 = r_y0 - height
r_x3 = r_x0
r_y3 = r_y0 - height
return BoundingRectangle.from_bounding_box(bbox)
elif angle == 90:
r_x0 = im_w - (top + height)
r_x0 = top + height
r_y0 = im_w - left
r_x1 = r_x0
r_y1 = r_y0 - width
r_x2 = r_x1 - height
r_y2 = r_y1
r_x3 = r_x2
r_y3 = r_y0
elif angle == 180:
r_x0 = im_w - left
r_y0 = im_h - (top + height)
r_x1 = r_x0 - width
r_y1 = r_y0
r_x2 = r_x1
r_y2 = r_y1 + height
r_x3 = r_x0
r_y3 = r_y2
elif angle == 270:
r_x0 = im_h - (top + height)
r_y0 = left
r_x1 = r_x0
r_y1 = r_y0 + width
r_x2 = r_x0 + height
r_y2 = r_y0 + width
r_x3 = r_x0
r_y3 = r_y0 + width
elif angle == 180:
r_x0 = im_h - left
r_y0 = im_w - (top + height)
r_x1 = r_x0 - width
r_y1 = r_y0
r_x2 = r_x0 - width
r_y2 = r_y0 + height
r_x3 = r_x0
r_y3 = r_y0 + height
elif angle == 270:
r_x0 = top + height
r_y0 = im_h - left
r_x1 = r_x0
r_y1 = r_y0 - width
r_x2 = r_x0 - height
r_y2 = r_y0 - width
r_x3 = r_x0 - height
r_x2 = r_x1 + height
r_y2 = r_y1
r_x3 = r_x2
r_y3 = r_y0
else:
msg = (
@ -58,7 +51,7 @@ def rotate_bounding_box(
f" {sorted(CLIPPED_ORIENTATIONS)}"
)
raise ValueError(msg)
return BoundingRectangle(
rectangle = BoundingRectangle(
r_x0=r_x0,
r_y0=r_y0,
r_x1=r_x1,
@ -69,3 +62,4 @@ def rotate_bounding_box(
r_y3=r_y3,
coord_origin=CoordOrigin.TOPLEFT,
)
return rectangle

View File

@ -44,7 +44,7 @@ authors = [
requires-python = '>=3.9,<4.0'
dependencies = [
'pydantic (>=2.0.0,<3.0.0)',
'docling-core[chunking] (>=2.39.0,<3.0.0)',
'docling-core[chunking] (>=2.40.0,<3.0.0)',
'docling-parse (>=4.0.0,<5.0.0)',
'docling-ibm-models (>=3.6.0,<4)',
'filetype (>=1.2.0,<2.0.0)',

80
tests/test_ocr_utils.py Normal file
View File

@ -0,0 +1,80 @@
from typing import Tuple
import pytest
from docling_core.types.doc import BoundingBox, CoordOrigin
from docling_core.types.doc.page import BoundingRectangle
from docling.utils.orientation import rotate_bounding_box
IM_SIZE = (4, 5)
BBOX = BoundingBox(l=1, t=3, r=3, b=4, coord_origin=CoordOrigin.TOPLEFT)
RECT = BoundingRectangle(
r_x0=1,
r_y0=4,
r_x1=3,
r_y1=4,
r_x2=3,
r_y2=3,
r_x3=1,
r_y3=3,
coord_origin=CoordOrigin.TOPLEFT,
)
RECT_90 = BoundingRectangle(
r_x0=4,
r_y0=3,
r_x1=4,
r_y1=1,
r_x2=3,
r_y2=1,
r_x3=3,
r_y3=3,
coord_origin=CoordOrigin.TOPLEFT,
)
RECT_180 = BoundingRectangle(
r_x0=3,
r_y0=1,
r_x1=1,
r_y1=1,
r_x2=1,
r_y2=2,
r_x3=3,
r_y3=2,
coord_origin=CoordOrigin.TOPLEFT,
)
RECT_270 = BoundingRectangle(
r_x0=1,
r_y0=1,
r_x1=1,
r_y1=3,
r_x2=2,
r_y2=3,
r_x3=2,
r_y3=1,
coord_origin=CoordOrigin.TOPLEFT,
)
@pytest.mark.parametrize(
["bbox", "im_size", "angle", "expected_rectangle"],
[
# (BBOX, IM_SIZE, 0, RECT),
# (BBOX, IM_SIZE, 90, RECT_90),
(BBOX, IM_SIZE, 180, RECT_180),
# (BBOX, IM_SIZE, 270, RECT_270),
# (BBOX, IM_SIZE, 360, RECT),
# (BBOX, IM_SIZE, -90, RECT_270),
(BBOX, IM_SIZE, -180, RECT_180),
# (BBOX, IM_SIZE, -270, RECT_90),
],
)
def test_rotate_bounding_box(
bbox: BoundingBox,
im_size: Tuple[int, int],
angle: int,
expected_rectangle: BoundingRectangle,
):
rotated = rotate_bounding_box(bbox, angle, im_size)
assert rotated == expected_rectangle
expected_angle_360 = angle % 360
assert rotated.angle_360 == expected_angle_360

8
uv.lock generated
View File

@ -904,7 +904,7 @@ requires-dist = [
{ name = "accelerate", marker = "extra == 'vlm'", specifier = ">=1.2.1,<2.0.0" },
{ name = "beautifulsoup4", specifier = ">=4.12.3,<5.0.0" },
{ name = "certifi", specifier = ">=2024.7.4" },
{ name = "docling-core", extras = ["chunking"], specifier = ">=2.39.0,<3.0.0" },
{ name = "docling-core", extras = ["chunking"], specifier = ">=2.40.0,<3.0.0" },
{ name = "docling-ibm-models", specifier = ">=3.6.0,<4" },
{ name = "docling-parse", specifier = ">=4.0.0,<5.0.0" },
{ name = "easyocr", specifier = ">=1.7,<2.0" },
@ -979,7 +979,7 @@ examples = [
[[package]]
name = "docling-core"
version = "2.39.0"
version = "2.40.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "jsonref" },
@ -993,9 +993,9 @@ dependencies = [
{ name = "typer" },
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/6a/8b/5613467523bed58d9f2b94220947783914b6d9910a8d20908cf148805427/docling_core-2.39.0.tar.gz", hash = "sha256:77530156c79c9000fe3104894935437d3e2d46dc0f567b5a500974d7c1a8b38b", size = 148005, upload-time = "2025-06-27T12:59:56.694Z" }
sdist = { url = "https://files.pythonhosted.org/packages/02/5d/fb9fc563d694259877a94f9ae7cf77eba4e1143e539a9dda9fc738db1548/docling_core-2.40.0.tar.gz", hash = "sha256:80a03ac0869d45e1b15ac122ed9da1951cb8d209f596269042601d42e4e1f47f", size = 148373, upload-time = "2025-07-02T16:23:23.754Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/70/85/3d59ac46a47f62a0ed79e187c4163cecd2693d05006f771038db4781f9ff/docling_core-2.39.0-py3-none-any.whl", hash = "sha256:b7ce5142ab95bd8d5cfe5d7df167a96a6eb41d884f00ea42bb3dd8f40ade92ea", size = 152890, upload-time = "2025-06-27T12:59:55.327Z" },
{ url = "https://files.pythonhosted.org/packages/c9/5c/a66db0c0724a0dc7d683e0cd45c90a3f46273f5379f63bdd29152a724061/docling_core-2.40.0-py3-none-any.whl", hash = "sha256:439ae2aab3a2e4044df9ae76926325f8ae65dac6b3e6fd1911168cbdf6df27df", size = 153028, upload-time = "2025-07-02T16:23:22.064Z" },
]
[package.optional-dependencies]