chore(ocr): revert layout updates

Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>
This commit is contained in:
Clément Doumouro 2025-04-08 17:37:58 +02:00
parent fdc6a01bc8
commit e2becbcaaf
2 changed files with 2 additions and 18 deletions

View File

@ -16,7 +16,6 @@ from docling.datamodel.settings import settings
from docling.models.base_model import BasePageModel
from docling.utils.accelerator_utils import decide_device
from docling.utils.layout_postprocessor import LayoutPostprocessor
from docling.utils.orientation import detect_orientation
from docling.utils.profiling import TimeRecorder
from docling.utils.visualization import draw_clusters
@ -153,9 +152,7 @@ class LayoutModel(BasePageModel):
assert page.size is not None
page_image = page.get_image(scale=1.0)
assert page_image is not None
page_orientation = detect_orientation(page.cells)
if page_orientation:
page_image = page_image.rotate(-page_orientation, expand=True)
clusters = []
for ix, pred_item in enumerate(
self.layout_predictor.predict(page_image)

View File

@ -1,24 +1,11 @@
from collections import Counter
from operator import itemgetter
from typing import Tuple
from docling_core.types.doc import BoundingBox, CoordOrigin
from docling_core.types.doc.page import BoundingRectangle, TextCell
from docling_core.types.doc.page import BoundingRectangle
CLIPPED_ORIENTATIONS = [0, 90, 180, 270]
def _clipped_orientation(angle: float) -> int:
return min((abs(angle - o) % 360, o) for o in CLIPPED_ORIENTATIONS)[1]
def detect_orientation(cells: list[TextCell]) -> int:
if not cells:
return 0
orientation_counter = Counter(_clipped_orientation(c.rect.angle_360) for c in cells)
return max(orientation_counter.items(), key=itemgetter(1))[0]
def rotate_bounding_box(
bbox: BoundingBox, angle: int, im_size: Tuple[int, int]
) -> BoundingRectangle: