From e2becbcaafa0d184e23ee0eabb9cd2e80c816931 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cle=CC=81ment=20Doumouro?= Date: Tue, 8 Apr 2025 17:37:58 +0200 Subject: [PATCH] chore(ocr): revert layout updates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Clément Doumouro --- docling/models/layout_model.py | 5 +---- docling/utils/orientation.py | 15 +-------------- 2 files changed, 2 insertions(+), 18 deletions(-) diff --git a/docling/models/layout_model.py b/docling/models/layout_model.py index c607a237..ae373012 100644 --- a/docling/models/layout_model.py +++ b/docling/models/layout_model.py @@ -16,7 +16,6 @@ from docling.datamodel.settings import settings from docling.models.base_model import BasePageModel from docling.utils.accelerator_utils import decide_device from docling.utils.layout_postprocessor import LayoutPostprocessor -from docling.utils.orientation import detect_orientation from docling.utils.profiling import TimeRecorder from docling.utils.visualization import draw_clusters @@ -153,9 +152,7 @@ class LayoutModel(BasePageModel): assert page.size is not None page_image = page.get_image(scale=1.0) assert page_image is not None - page_orientation = detect_orientation(page.cells) - if page_orientation: - page_image = page_image.rotate(-page_orientation, expand=True) + clusters = [] for ix, pred_item in enumerate( self.layout_predictor.predict(page_image) diff --git a/docling/utils/orientation.py b/docling/utils/orientation.py index faaeb7aa..5055358a 100644 --- a/docling/utils/orientation.py +++ b/docling/utils/orientation.py @@ -1,24 +1,11 @@ -from collections import Counter -from operator import itemgetter from typing import Tuple from docling_core.types.doc import BoundingBox, CoordOrigin -from docling_core.types.doc.page import BoundingRectangle, TextCell +from docling_core.types.doc.page import BoundingRectangle CLIPPED_ORIENTATIONS = [0, 90, 180, 270] -def _clipped_orientation(angle: float) -> int: - return min((abs(angle - o) % 360, o) for o in CLIPPED_ORIENTATIONS)[1] - - -def detect_orientation(cells: list[TextCell]) -> int: - if not cells: - return 0 - orientation_counter = Counter(_clipped_orientation(c.rect.angle_360) for c in cells) - return max(orientation_counter.items(), key=itemgetter(1))[0] - - def rotate_bounding_box( bbox: BoundingBox, angle: int, im_size: Tuple[int, int] ) -> BoundingRectangle: