mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 04:24:45 +00:00
chore(ocr): revert layout updates
Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>
This commit is contained in:
parent
fdc6a01bc8
commit
e2becbcaaf
@ -16,7 +16,6 @@ from docling.datamodel.settings import settings
|
|||||||
from docling.models.base_model import BasePageModel
|
from docling.models.base_model import BasePageModel
|
||||||
from docling.utils.accelerator_utils import decide_device
|
from docling.utils.accelerator_utils import decide_device
|
||||||
from docling.utils.layout_postprocessor import LayoutPostprocessor
|
from docling.utils.layout_postprocessor import LayoutPostprocessor
|
||||||
from docling.utils.orientation import detect_orientation
|
|
||||||
from docling.utils.profiling import TimeRecorder
|
from docling.utils.profiling import TimeRecorder
|
||||||
from docling.utils.visualization import draw_clusters
|
from docling.utils.visualization import draw_clusters
|
||||||
|
|
||||||
@ -153,9 +152,7 @@ class LayoutModel(BasePageModel):
|
|||||||
assert page.size is not None
|
assert page.size is not None
|
||||||
page_image = page.get_image(scale=1.0)
|
page_image = page.get_image(scale=1.0)
|
||||||
assert page_image is not None
|
assert page_image is not None
|
||||||
page_orientation = detect_orientation(page.cells)
|
|
||||||
if page_orientation:
|
|
||||||
page_image = page_image.rotate(-page_orientation, expand=True)
|
|
||||||
clusters = []
|
clusters = []
|
||||||
for ix, pred_item in enumerate(
|
for ix, pred_item in enumerate(
|
||||||
self.layout_predictor.predict(page_image)
|
self.layout_predictor.predict(page_image)
|
||||||
|
@ -1,24 +1,11 @@
|
|||||||
from collections import Counter
|
|
||||||
from operator import itemgetter
|
|
||||||
from typing import Tuple
|
from typing import Tuple
|
||||||
|
|
||||||
from docling_core.types.doc import BoundingBox, CoordOrigin
|
from docling_core.types.doc import BoundingBox, CoordOrigin
|
||||||
from docling_core.types.doc.page import BoundingRectangle, TextCell
|
from docling_core.types.doc.page import BoundingRectangle
|
||||||
|
|
||||||
CLIPPED_ORIENTATIONS = [0, 90, 180, 270]
|
CLIPPED_ORIENTATIONS = [0, 90, 180, 270]
|
||||||
|
|
||||||
|
|
||||||
def _clipped_orientation(angle: float) -> int:
|
|
||||||
return min((abs(angle - o) % 360, o) for o in CLIPPED_ORIENTATIONS)[1]
|
|
||||||
|
|
||||||
|
|
||||||
def detect_orientation(cells: list[TextCell]) -> int:
|
|
||||||
if not cells:
|
|
||||||
return 0
|
|
||||||
orientation_counter = Counter(_clipped_orientation(c.rect.angle_360) for c in cells)
|
|
||||||
return max(orientation_counter.items(), key=itemgetter(1))[0]
|
|
||||||
|
|
||||||
|
|
||||||
def rotate_bounding_box(
|
def rotate_bounding_box(
|
||||||
bbox: BoundingBox, angle: int, im_size: Tuple[int, int]
|
bbox: BoundingBox, angle: int, im_size: Tuple[int, int]
|
||||||
) -> BoundingRectangle:
|
) -> BoundingRectangle:
|
||||||
|
Loading…
Reference in New Issue
Block a user