mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 20:14:47 +00:00
fix(ocr): move bounding bow rotation util to orientation.py
Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>
This commit is contained in:
parent
6c88365c66
commit
0b39bb58bf
@ -21,11 +21,11 @@ from docling.datamodel.pipeline_options import (
|
||||
from docling.datamodel.settings import settings
|
||||
from docling.models.base_ocr_model import BaseOcrModel
|
||||
from docling.utils.ocr_utils import (
|
||||
Box,
|
||||
map_tesseract_script,
|
||||
parse_tesseract_orientation,
|
||||
tesseract_box_to_bounding_rectangle,
|
||||
)
|
||||
from docling.utils.orientation import Box
|
||||
from docling.utils.profiling import TimeRecorder
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
|
@ -1,13 +1,14 @@
|
||||
from typing import Optional, Tuple
|
||||
from typing import Optional
|
||||
|
||||
from docling_core.types.doc import BoundingBox, CoordOrigin
|
||||
from docling_core.types.doc.page import BoundingRectangle
|
||||
|
||||
_TESSERACT_ORIENTATIONS = {0, 90, 180, 270}
|
||||
|
||||
Point = Tuple[float, float]
|
||||
Box = Tuple[float, float, float, float]
|
||||
Size = Tuple[int, int]
|
||||
from docling.utils.orientation import (
|
||||
Box,
|
||||
Size,
|
||||
CLIPPED_ORIENTATIONS,
|
||||
rotate_ltwh_bounding_box,
|
||||
)
|
||||
|
||||
|
||||
def map_tesseract_script(script: str) -> str:
|
||||
@ -21,45 +22,18 @@ def map_tesseract_script(script: str) -> str:
|
||||
return script
|
||||
|
||||
|
||||
def reverse_tesseract_preprocessing_rotation(
|
||||
box: Box, orientation: int, rotated_im_size: Size
|
||||
) -> tuple[Point, Point, Point, Point]:
|
||||
# The box is left top width height in TOPLEFT coordinates
|
||||
# Bounding rectangle start with r_0 at the bottom left whatever the
|
||||
# coordinate system. Then other corners are found rotating counterclockwise
|
||||
l, t, w, h = box
|
||||
rotated_im_w, rotated_im_h = rotated_im_size
|
||||
if orientation == 0:
|
||||
r0_x = l
|
||||
r0_y = t + h
|
||||
return (r0_x, r0_y), (r0_x + w, r0_y), (r0_x + w, r0_y - h), (r0_x, r0_y - h)
|
||||
if orientation == 90:
|
||||
r0_x = rotated_im_h - (t + h)
|
||||
r0_y = l
|
||||
return (r0_x, r0_y), (r0_x, r0_y + w), (r0_x + h, r0_y + w), (r0_x, r0_y + w)
|
||||
if orientation == 180:
|
||||
r0_x = rotated_im_w - l
|
||||
r0_y = rotated_im_h - (t + h)
|
||||
return (r0_x, r0_y), (r0_x - w, r0_y), (r0_x - w, r0_y + h), (r0_x, r0_y + h)
|
||||
if orientation == 270:
|
||||
r0_x = t + h
|
||||
r0_y = rotated_im_w - l
|
||||
return (r0_x, r0_y), (r0_x, r0_y - w), (r0_x - h, r0_y - w), (r0_x - h, r0_y)
|
||||
msg = (
|
||||
f"invalid tesseract document orientation {orientation}, "
|
||||
f"expected orientation: {sorted(_TESSERACT_ORIENTATIONS)}"
|
||||
)
|
||||
raise ValueError(msg)
|
||||
|
||||
|
||||
def parse_tesseract_orientation(orientation: str) -> int:
|
||||
# Tesseract orientation is [0, 90, 180, 270] clockwise, bounding rectangle angles
|
||||
# are [0, 360[ counterclockwise
|
||||
parsed = int(orientation)
|
||||
if parsed not in _TESSERACT_ORIENTATIONS:
|
||||
if parsed not in CLIPPED_ORIENTATIONS:
|
||||
msg = (
|
||||
f"invalid tesseract document orientation {orientation}, "
|
||||
f"expected orientation: {sorted(_TESSERACT_ORIENTATIONS)}"
|
||||
f"expected orientation: {sorted(CLIPPED_ORIENTATIONS)}"
|
||||
)
|
||||
raise ValueError(msg)
|
||||
parsed = -parsed
|
||||
parsed %= 360
|
||||
return parsed
|
||||
|
||||
|
||||
@ -72,9 +46,7 @@ def tesseract_box_to_bounding_rectangle(
|
||||
rotated_image_size: Size,
|
||||
) -> BoundingRectangle:
|
||||
# box is in the top, left, height, width format + top left orientation
|
||||
r_0, r_1, r_2, r_3 = reverse_tesseract_preprocessing_rotation(
|
||||
box, orientation, rotated_image_size
|
||||
)
|
||||
r_0, r_1, r_2, r_3 = rotate_ltwh_bounding_box(box, orientation, rotated_image_size)
|
||||
rect = BoundingRectangle(
|
||||
r_x0=r_0[0] / scale,
|
||||
r_y0=r_0[1] / scale,
|
||||
|
@ -1,13 +1,19 @@
|
||||
from collections import Counter
|
||||
from operator import itemgetter
|
||||
from typing import Tuple
|
||||
|
||||
from docling_core.types.doc.page import TextCell
|
||||
|
||||
_ORIENTATIONS = [0, 90, 180, 270]
|
||||
|
||||
Point = Tuple[float, float]
|
||||
Box = Tuple[float, float, float, float]
|
||||
Size = Tuple[int, int]
|
||||
|
||||
CLIPPED_ORIENTATIONS = [0, 90, 180, 270]
|
||||
|
||||
|
||||
def _clipped_orientation(angle: float) -> int:
|
||||
return min((abs(angle - o) % 360, o) for o in _ORIENTATIONS)[1]
|
||||
return min((abs(angle - o) % 360, o) for o in CLIPPED_ORIENTATIONS)[1]
|
||||
|
||||
|
||||
def detect_orientation(cells: list[TextCell]) -> int:
|
||||
@ -15,3 +21,34 @@ def detect_orientation(cells: list[TextCell]) -> int:
|
||||
return 0
|
||||
orientation_counter = Counter(_clipped_orientation(c.rect.angle_360) for c in cells)
|
||||
return max(orientation_counter.items(), key=itemgetter(1))[0]
|
||||
|
||||
|
||||
def rotate_ltwh_bounding_box(
|
||||
box: Box, orientation: int, rotated_im_size: Size
|
||||
) -> tuple[Point, Point, Point, Point]:
|
||||
# The box is left top width height in TOPLEFT coordinates
|
||||
# Bounding rectangle start with r_0 at the bottom left whatever the
|
||||
# coordinate system. Then other corners are found rotating counterclockwise
|
||||
l, t, w, h = box
|
||||
rotated_im_w, rotated_im_h = rotated_im_size
|
||||
if orientation == 0:
|
||||
r0_x = l
|
||||
r0_y = t + h
|
||||
return (r0_x, r0_y), (r0_x + w, r0_y), (r0_x + w, r0_y - h), (r0_x, r0_y - h)
|
||||
if orientation == 90:
|
||||
r0_x = t + h
|
||||
r0_y = rotated_im_w - l
|
||||
return (r0_x, r0_y), (r0_x, r0_y - w), (r0_x - h, r0_y - w), (r0_x - h, r0_y)
|
||||
if orientation == 180:
|
||||
r0_x = rotated_im_w - l
|
||||
r0_y = rotated_im_h - (t + h)
|
||||
return (r0_x, r0_y), (r0_x - w, r0_y), (r0_x - w, r0_y + h), (r0_x, r0_y + h)
|
||||
if orientation == 270:
|
||||
r0_x = rotated_im_h - (t + h)
|
||||
r0_y = l
|
||||
return (r0_x, r0_y), (r0_x, r0_y + w), (r0_x + h, r0_y + w), (r0_x, r0_y + w)
|
||||
msg = (
|
||||
f"orientation {orientation}, expected values in:"
|
||||
f" {sorted(CLIPPED_ORIENTATIONS)}"
|
||||
)
|
||||
raise ValueError(msg)
|
||||
|
Loading…
Reference in New Issue
Block a user