mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 04:24:45 +00:00
fix(ocr): move bounding bow rotation util to orientation.py
Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>
This commit is contained in:
parent
6c88365c66
commit
0b39bb58bf
@ -21,11 +21,11 @@ from docling.datamodel.pipeline_options import (
|
|||||||
from docling.datamodel.settings import settings
|
from docling.datamodel.settings import settings
|
||||||
from docling.models.base_ocr_model import BaseOcrModel
|
from docling.models.base_ocr_model import BaseOcrModel
|
||||||
from docling.utils.ocr_utils import (
|
from docling.utils.ocr_utils import (
|
||||||
Box,
|
|
||||||
map_tesseract_script,
|
map_tesseract_script,
|
||||||
parse_tesseract_orientation,
|
parse_tesseract_orientation,
|
||||||
tesseract_box_to_bounding_rectangle,
|
tesseract_box_to_bounding_rectangle,
|
||||||
)
|
)
|
||||||
|
from docling.utils.orientation import Box
|
||||||
from docling.utils.profiling import TimeRecorder
|
from docling.utils.profiling import TimeRecorder
|
||||||
|
|
||||||
_log = logging.getLogger(__name__)
|
_log = logging.getLogger(__name__)
|
||||||
|
@ -1,13 +1,14 @@
|
|||||||
from typing import Optional, Tuple
|
from typing import Optional
|
||||||
|
|
||||||
from docling_core.types.doc import BoundingBox, CoordOrigin
|
from docling_core.types.doc import BoundingBox, CoordOrigin
|
||||||
from docling_core.types.doc.page import BoundingRectangle
|
from docling_core.types.doc.page import BoundingRectangle
|
||||||
|
|
||||||
_TESSERACT_ORIENTATIONS = {0, 90, 180, 270}
|
from docling.utils.orientation import (
|
||||||
|
Box,
|
||||||
Point = Tuple[float, float]
|
Size,
|
||||||
Box = Tuple[float, float, float, float]
|
CLIPPED_ORIENTATIONS,
|
||||||
Size = Tuple[int, int]
|
rotate_ltwh_bounding_box,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def map_tesseract_script(script: str) -> str:
|
def map_tesseract_script(script: str) -> str:
|
||||||
@ -21,45 +22,18 @@ def map_tesseract_script(script: str) -> str:
|
|||||||
return script
|
return script
|
||||||
|
|
||||||
|
|
||||||
def reverse_tesseract_preprocessing_rotation(
|
|
||||||
box: Box, orientation: int, rotated_im_size: Size
|
|
||||||
) -> tuple[Point, Point, Point, Point]:
|
|
||||||
# The box is left top width height in TOPLEFT coordinates
|
|
||||||
# Bounding rectangle start with r_0 at the bottom left whatever the
|
|
||||||
# coordinate system. Then other corners are found rotating counterclockwise
|
|
||||||
l, t, w, h = box
|
|
||||||
rotated_im_w, rotated_im_h = rotated_im_size
|
|
||||||
if orientation == 0:
|
|
||||||
r0_x = l
|
|
||||||
r0_y = t + h
|
|
||||||
return (r0_x, r0_y), (r0_x + w, r0_y), (r0_x + w, r0_y - h), (r0_x, r0_y - h)
|
|
||||||
if orientation == 90:
|
|
||||||
r0_x = rotated_im_h - (t + h)
|
|
||||||
r0_y = l
|
|
||||||
return (r0_x, r0_y), (r0_x, r0_y + w), (r0_x + h, r0_y + w), (r0_x, r0_y + w)
|
|
||||||
if orientation == 180:
|
|
||||||
r0_x = rotated_im_w - l
|
|
||||||
r0_y = rotated_im_h - (t + h)
|
|
||||||
return (r0_x, r0_y), (r0_x - w, r0_y), (r0_x - w, r0_y + h), (r0_x, r0_y + h)
|
|
||||||
if orientation == 270:
|
|
||||||
r0_x = t + h
|
|
||||||
r0_y = rotated_im_w - l
|
|
||||||
return (r0_x, r0_y), (r0_x, r0_y - w), (r0_x - h, r0_y - w), (r0_x - h, r0_y)
|
|
||||||
msg = (
|
|
||||||
f"invalid tesseract document orientation {orientation}, "
|
|
||||||
f"expected orientation: {sorted(_TESSERACT_ORIENTATIONS)}"
|
|
||||||
)
|
|
||||||
raise ValueError(msg)
|
|
||||||
|
|
||||||
|
|
||||||
def parse_tesseract_orientation(orientation: str) -> int:
|
def parse_tesseract_orientation(orientation: str) -> int:
|
||||||
|
# Tesseract orientation is [0, 90, 180, 270] clockwise, bounding rectangle angles
|
||||||
|
# are [0, 360[ counterclockwise
|
||||||
parsed = int(orientation)
|
parsed = int(orientation)
|
||||||
if parsed not in _TESSERACT_ORIENTATIONS:
|
if parsed not in CLIPPED_ORIENTATIONS:
|
||||||
msg = (
|
msg = (
|
||||||
f"invalid tesseract document orientation {orientation}, "
|
f"invalid tesseract document orientation {orientation}, "
|
||||||
f"expected orientation: {sorted(_TESSERACT_ORIENTATIONS)}"
|
f"expected orientation: {sorted(CLIPPED_ORIENTATIONS)}"
|
||||||
)
|
)
|
||||||
raise ValueError(msg)
|
raise ValueError(msg)
|
||||||
|
parsed = -parsed
|
||||||
|
parsed %= 360
|
||||||
return parsed
|
return parsed
|
||||||
|
|
||||||
|
|
||||||
@ -72,9 +46,7 @@ def tesseract_box_to_bounding_rectangle(
|
|||||||
rotated_image_size: Size,
|
rotated_image_size: Size,
|
||||||
) -> BoundingRectangle:
|
) -> BoundingRectangle:
|
||||||
# box is in the top, left, height, width format + top left orientation
|
# box is in the top, left, height, width format + top left orientation
|
||||||
r_0, r_1, r_2, r_3 = reverse_tesseract_preprocessing_rotation(
|
r_0, r_1, r_2, r_3 = rotate_ltwh_bounding_box(box, orientation, rotated_image_size)
|
||||||
box, orientation, rotated_image_size
|
|
||||||
)
|
|
||||||
rect = BoundingRectangle(
|
rect = BoundingRectangle(
|
||||||
r_x0=r_0[0] / scale,
|
r_x0=r_0[0] / scale,
|
||||||
r_y0=r_0[1] / scale,
|
r_y0=r_0[1] / scale,
|
||||||
|
@ -1,13 +1,19 @@
|
|||||||
from collections import Counter
|
from collections import Counter
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
|
from typing import Tuple
|
||||||
|
|
||||||
from docling_core.types.doc.page import TextCell
|
from docling_core.types.doc.page import TextCell
|
||||||
|
|
||||||
_ORIENTATIONS = [0, 90, 180, 270]
|
|
||||||
|
Point = Tuple[float, float]
|
||||||
|
Box = Tuple[float, float, float, float]
|
||||||
|
Size = Tuple[int, int]
|
||||||
|
|
||||||
|
CLIPPED_ORIENTATIONS = [0, 90, 180, 270]
|
||||||
|
|
||||||
|
|
||||||
def _clipped_orientation(angle: float) -> int:
|
def _clipped_orientation(angle: float) -> int:
|
||||||
return min((abs(angle - o) % 360, o) for o in _ORIENTATIONS)[1]
|
return min((abs(angle - o) % 360, o) for o in CLIPPED_ORIENTATIONS)[1]
|
||||||
|
|
||||||
|
|
||||||
def detect_orientation(cells: list[TextCell]) -> int:
|
def detect_orientation(cells: list[TextCell]) -> int:
|
||||||
@ -15,3 +21,34 @@ def detect_orientation(cells: list[TextCell]) -> int:
|
|||||||
return 0
|
return 0
|
||||||
orientation_counter = Counter(_clipped_orientation(c.rect.angle_360) for c in cells)
|
orientation_counter = Counter(_clipped_orientation(c.rect.angle_360) for c in cells)
|
||||||
return max(orientation_counter.items(), key=itemgetter(1))[0]
|
return max(orientation_counter.items(), key=itemgetter(1))[0]
|
||||||
|
|
||||||
|
|
||||||
|
def rotate_ltwh_bounding_box(
|
||||||
|
box: Box, orientation: int, rotated_im_size: Size
|
||||||
|
) -> tuple[Point, Point, Point, Point]:
|
||||||
|
# The box is left top width height in TOPLEFT coordinates
|
||||||
|
# Bounding rectangle start with r_0 at the bottom left whatever the
|
||||||
|
# coordinate system. Then other corners are found rotating counterclockwise
|
||||||
|
l, t, w, h = box
|
||||||
|
rotated_im_w, rotated_im_h = rotated_im_size
|
||||||
|
if orientation == 0:
|
||||||
|
r0_x = l
|
||||||
|
r0_y = t + h
|
||||||
|
return (r0_x, r0_y), (r0_x + w, r0_y), (r0_x + w, r0_y - h), (r0_x, r0_y - h)
|
||||||
|
if orientation == 90:
|
||||||
|
r0_x = t + h
|
||||||
|
r0_y = rotated_im_w - l
|
||||||
|
return (r0_x, r0_y), (r0_x, r0_y - w), (r0_x - h, r0_y - w), (r0_x - h, r0_y)
|
||||||
|
if orientation == 180:
|
||||||
|
r0_x = rotated_im_w - l
|
||||||
|
r0_y = rotated_im_h - (t + h)
|
||||||
|
return (r0_x, r0_y), (r0_x - w, r0_y), (r0_x - w, r0_y + h), (r0_x, r0_y + h)
|
||||||
|
if orientation == 270:
|
||||||
|
r0_x = rotated_im_h - (t + h)
|
||||||
|
r0_y = l
|
||||||
|
return (r0_x, r0_y), (r0_x, r0_y + w), (r0_x + h, r0_y + w), (r0_x, r0_y + w)
|
||||||
|
msg = (
|
||||||
|
f"orientation {orientation}, expected values in:"
|
||||||
|
f" {sorted(CLIPPED_ORIENTATIONS)}"
|
||||||
|
)
|
||||||
|
raise ValueError(msg)
|
||||||
|
Loading…
Reference in New Issue
Block a user