fix(ocr): rotate image to the natural orientation before layout prediction

Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>
This commit is contained in:
Clément Doumouro 2025-04-04 17:31:45 +02:00
parent ec588df971
commit b54eeb185f
7 changed files with 31 additions and 6 deletions

View File

@ -18,6 +18,7 @@ from docling.models.base_model import BasePageModel
from docling.models.utils.hf_model_download import download_hf_model from docling.models.utils.hf_model_download import download_hf_model
from docling.utils.accelerator_utils import decide_device from docling.utils.accelerator_utils import decide_device
from docling.utils.layout_postprocessor import LayoutPostprocessor from docling.utils.layout_postprocessor import LayoutPostprocessor
from docling.utils.orientation import detect_orientation
from docling.utils.profiling import TimeRecorder from docling.utils.profiling import TimeRecorder
from docling.utils.visualization import draw_clusters from docling.utils.visualization import draw_clusters
@ -155,7 +156,9 @@ class LayoutModel(BasePageModel):
assert page.size is not None assert page.size is not None
page_image = page.get_image(scale=1.0) page_image = page.get_image(scale=1.0)
assert page_image is not None assert page_image is not None
page_orientation = detect_orientation(page.cells)
if page_orientation:
page_image = page_image.rotate(-page_orientation, expand=True)
clusters = [] clusters = []
for ix, pred_item in enumerate( for ix, pred_item in enumerate(
self.layout_predictor.predict(page_image) self.layout_predictor.predict(page_image)

View File

@ -1,3 +1,20 @@
from collections import Counter
from operator import itemgetter
from docling_core.types.doc.page import TextCell
_ORIENTATIONS = [0, 90, 180, 270]
def _clipped_orientation(angle: float) -> int:
return min((abs(angle - o) % 360, o) for o in _ORIENTATIONS)[1]
def detect_orientation(cells: list[TextCell]) -> int:
if not cells:
return 0
orientation_counter = Counter(_clipped_orientation(c.rect.angle_360) for c in cells)
return max(orientation_counter.items(), key=itemgetter(1))[0]
from typing import Tuple from typing import Tuple
from docling_core.types.doc import BoundingBox, CoordOrigin from docling_core.types.doc import BoundingBox, CoordOrigin

View File

@ -1,4 +1,4 @@
<document> <document>
<paragraph><location><page_1><loc_74><loc_16><loc_88><loc_18></location>package</paragraph> <paragraph><location><page_1><loc_75><loc_16><loc_88><loc_18></location>package</paragraph>
<paragraph><location><page_1><loc_15><loc_9><loc_88><loc_15></location>Docling bundles PDF document conversion to JSON and Markdown in an easy self contained</paragraph> <paragraph><location><page_1><loc_15><loc_9><loc_88><loc_15></location>Docling bundles PDF document conversion to JSON and Markdown in an easy self contained</paragraph>
</document> </document>

View File

@ -1,3 +1,3 @@
<document> <document>
<paragraph><location><page_1><loc_82><loc_74><loc_84><loc_88></location>package</paragraph> <paragraph><location><page_1><loc_82><loc_75><loc_84><loc_88></location>package</paragraph>
</document> </document>

View File

@ -1,3 +1,4 @@
<document> <document>
<paragraph><location><page_1><loc_16><loc_12><loc_18><loc_26></location>package</paragraph> <paragraph><location><page_1><loc_9><loc_12><loc_11><loc_85></location>Docling bundles PDF document conversion to</paragraph>
<paragraph><location><page_1><loc_12><loc_12><loc_15><loc_85></location><location><page_1><loc_12><loc_12><loc_15><loc_85></location>JSON and Markdown in an easy self contained package</paragraph>
</document> </document>

View File

@ -1 +1,3 @@
package Docling bundles PDF document conversion to
JSON and Markdown in an easy self contained package

View File

@ -1 +1,3 @@
package Docling bundles PDF document conversion to
JSON and Markdown in an easy self contained package