From 118afee1f33008a3a1ae46f8ddf46ae37896b8f2 Mon Sep 17 00:00:00 2001 From: Nikos Livathinos Date: Tue, 8 Oct 2024 14:17:54 +0200 Subject: [PATCH] fix(TesserOcrModel): Fix cell coordinates Signed-off-by: Nikos Livathinos --- docling/models/tesserocr_model.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/docling/models/tesserocr_model.py b/docling/models/tesserocr_model.py index a26c29e3..da320870 100644 --- a/docling/models/tesserocr_model.py +++ b/docling/models/tesserocr_model.py @@ -47,7 +47,6 @@ class TesserOcrModel(BaseOcrModel): def __del__(self): if self.reader is not None: # Finalize the tesseractAPI - _log.debug("Finalize TesserOCR") self.reader.End() def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]: @@ -91,9 +90,8 @@ class TesserOcrModel(BaseOcrModel): text=text, confidence=confidence, bbox=BoundingBox.from_tuple( - # l, b, r, t = coord[0], coord[1], coord[2], coord[3] - coord=(left, bottom, right, top), - origin=CoordOrigin.BOTTOMLEFT, + coord=(left, top, right, bottom), + origin=CoordOrigin.TOPLEFT, ), ) )