fix(ocr): move bounding bow rotation util to orientation.py

Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>
This commit is contained in:
Clément Doumouro 2025-04-08 15:08:24 +02:00
parent 6c88365c66
commit 0b39bb58bf
3 changed files with 54 additions and 45 deletions

View File

@ -21,11 +21,11 @@ from docling.datamodel.pipeline_options import (
from docling.datamodel.settings import settings
from docling.models.base_ocr_model import BaseOcrModel
from docling.utils.ocr_utils import (
Box,
map_tesseract_script,
parse_tesseract_orientation,
tesseract_box_to_bounding_rectangle,
)
from docling.utils.orientation import Box
from docling.utils.profiling import TimeRecorder
_log = logging.getLogger(__name__)

View File

@ -1,13 +1,14 @@
from typing import Optional, Tuple
from typing import Optional
from docling_core.types.doc import BoundingBox, CoordOrigin
from docling_core.types.doc.page import BoundingRectangle
_TESSERACT_ORIENTATIONS = {0, 90, 180, 270}
Point = Tuple[float, float]
Box = Tuple[float, float, float, float]
Size = Tuple[int, int]
from docling.utils.orientation import (
Box,
Size,
CLIPPED_ORIENTATIONS,
rotate_ltwh_bounding_box,
)
def map_tesseract_script(script: str) -> str:
@ -21,45 +22,18 @@ def map_tesseract_script(script: str) -> str:
return script
def reverse_tesseract_preprocessing_rotation(
box: Box, orientation: int, rotated_im_size: Size
) -> tuple[Point, Point, Point, Point]:
# The box is left top width height in TOPLEFT coordinates
# Bounding rectangle start with r_0 at the bottom left whatever the
# coordinate system. Then other corners are found rotating counterclockwise
l, t, w, h = box
rotated_im_w, rotated_im_h = rotated_im_size
if orientation == 0:
r0_x = l
r0_y = t + h
return (r0_x, r0_y), (r0_x + w, r0_y), (r0_x + w, r0_y - h), (r0_x, r0_y - h)
if orientation == 90:
r0_x = rotated_im_h - (t + h)
r0_y = l
return (r0_x, r0_y), (r0_x, r0_y + w), (r0_x + h, r0_y + w), (r0_x, r0_y + w)
if orientation == 180:
r0_x = rotated_im_w - l
r0_y = rotated_im_h - (t + h)
return (r0_x, r0_y), (r0_x - w, r0_y), (r0_x - w, r0_y + h), (r0_x, r0_y + h)
if orientation == 270:
r0_x = t + h
r0_y = rotated_im_w - l
return (r0_x, r0_y), (r0_x, r0_y - w), (r0_x - h, r0_y - w), (r0_x - h, r0_y)
msg = (
f"invalid tesseract document orientation {orientation}, "
f"expected orientation: {sorted(_TESSERACT_ORIENTATIONS)}"
)
raise ValueError(msg)
def parse_tesseract_orientation(orientation: str) -> int:
# Tesseract orientation is [0, 90, 180, 270] clockwise, bounding rectangle angles
# are [0, 360[ counterclockwise
parsed = int(orientation)
if parsed not in _TESSERACT_ORIENTATIONS:
if parsed not in CLIPPED_ORIENTATIONS:
msg = (
f"invalid tesseract document orientation {orientation}, "
f"expected orientation: {sorted(_TESSERACT_ORIENTATIONS)}"
f"expected orientation: {sorted(CLIPPED_ORIENTATIONS)}"
)
raise ValueError(msg)
parsed = -parsed
parsed %= 360
return parsed
@ -72,9 +46,7 @@ def tesseract_box_to_bounding_rectangle(
rotated_image_size: Size,
) -> BoundingRectangle:
# box is in the top, left, height, width format + top left orientation
r_0, r_1, r_2, r_3 = reverse_tesseract_preprocessing_rotation(
box, orientation, rotated_image_size
)
r_0, r_1, r_2, r_3 = rotate_ltwh_bounding_box(box, orientation, rotated_image_size)
rect = BoundingRectangle(
r_x0=r_0[0] / scale,
r_y0=r_0[1] / scale,

View File

@ -1,13 +1,19 @@
from collections import Counter
from operator import itemgetter
from typing import Tuple
from docling_core.types.doc.page import TextCell
_ORIENTATIONS = [0, 90, 180, 270]
Point = Tuple[float, float]
Box = Tuple[float, float, float, float]
Size = Tuple[int, int]
CLIPPED_ORIENTATIONS = [0, 90, 180, 270]
def _clipped_orientation(angle: float) -> int:
return min((abs(angle - o) % 360, o) for o in _ORIENTATIONS)[1]
return min((abs(angle - o) % 360, o) for o in CLIPPED_ORIENTATIONS)[1]
def detect_orientation(cells: list[TextCell]) -> int:
@ -15,3 +21,34 @@ def detect_orientation(cells: list[TextCell]) -> int:
return 0
orientation_counter = Counter(_clipped_orientation(c.rect.angle_360) for c in cells)
return max(orientation_counter.items(), key=itemgetter(1))[0]
def rotate_ltwh_bounding_box(
box: Box, orientation: int, rotated_im_size: Size
) -> tuple[Point, Point, Point, Point]:
# The box is left top width height in TOPLEFT coordinates
# Bounding rectangle start with r_0 at the bottom left whatever the
# coordinate system. Then other corners are found rotating counterclockwise
l, t, w, h = box
rotated_im_w, rotated_im_h = rotated_im_size
if orientation == 0:
r0_x = l
r0_y = t + h
return (r0_x, r0_y), (r0_x + w, r0_y), (r0_x + w, r0_y - h), (r0_x, r0_y - h)
if orientation == 90:
r0_x = t + h
r0_y = rotated_im_w - l
return (r0_x, r0_y), (r0_x, r0_y - w), (r0_x - h, r0_y - w), (r0_x - h, r0_y)
if orientation == 180:
r0_x = rotated_im_w - l
r0_y = rotated_im_h - (t + h)
return (r0_x, r0_y), (r0_x - w, r0_y), (r0_x - w, r0_y + h), (r0_x, r0_y + h)
if orientation == 270:
r0_x = rotated_im_h - (t + h)
r0_y = l
return (r0_x, r0_y), (r0_x, r0_y + w), (r0_x + h, r0_y + w), (r0_x, r0_y + w)
msg = (
f"orientation {orientation}, expected values in:"
f" {sorted(CLIPPED_ORIENTATIONS)}"
)
raise ValueError(msg)