mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-30 14:04:27 +00:00
[Feature] Add OnnxTR as possible OCR engine
Signed-off-by: felix <felixdittrich92@gmail.com>
This commit is contained in:
parent
a19cf81f98
commit
cfc42458ae
@ -157,16 +157,23 @@ class OnnxtrOcrOptions(OcrOptions):
|
||||
kind: ClassVar[Literal["onnxtr"]] = "onnxtr"
|
||||
|
||||
lang: List[str] = ["en", "fr"]
|
||||
confidence_score: float = 0.5
|
||||
# word confidence threshold for the recognition model
|
||||
confidence_score: float = 0.7
|
||||
# detection model objectness score threshold 'fast algorithm'
|
||||
objectness_score: float = 0.3
|
||||
|
||||
# NOTE: This can be also a hf hub model
|
||||
det_arch: str = "fast_base"
|
||||
reco_arch: str = "crnn_vgg16_bn" # NOTE: This can be also a hf hub model
|
||||
reco_arch: str = "crnn_vgg16_bn"
|
||||
reco_bs: int = 512
|
||||
auto_correct_orientation: bool = False
|
||||
preserve_aspect_ratio: bool = True
|
||||
symmetric_pad: bool = True
|
||||
paragraph_break: float = 0.035
|
||||
load_in_8_bit: bool = False
|
||||
# Ref.: https://onnxruntime.ai/docs/api/python/api_summary.html
|
||||
providers: list[tuple[str, dict[str, Any]]] | list[str] | None = None
|
||||
session_options: Any = None
|
||||
|
||||
model_config = ConfigDict(
|
||||
extra="forbid",
|
||||
|
@ -50,7 +50,7 @@ class OnnxtrOcrModel(BaseOcrModel):
|
||||
ocr_predictor,
|
||||
)
|
||||
|
||||
# We diable multiprocessing for OnnxTR,
|
||||
# We disable multiprocessing for OnnxTR,
|
||||
# because the speed up is minimal and it can raise memory leaks on windows
|
||||
os.environ["ONNXTR_MULTIPROCESSING_DISABLE"] = "TRUE"
|
||||
except ImportError:
|
||||
@ -72,12 +72,16 @@ class OnnxtrOcrModel(BaseOcrModel):
|
||||
config = {
|
||||
"assume_straight_pages": True,
|
||||
"straighten_pages": False,
|
||||
# This should be disabled when docling supports polygons
|
||||
"export_as_straight_boxes": True,
|
||||
"export_as_straight_boxes": False,
|
||||
"disable_crop_orientation": False,
|
||||
"disable_page_orientation": False,
|
||||
}
|
||||
|
||||
engine_cfg = EngineConfig(
|
||||
providers=self.options.providers,
|
||||
session_options=self.options.session_options,
|
||||
)
|
||||
|
||||
self.reader = ocr_predictor(
|
||||
det_arch=(
|
||||
from_hub(self.options.det_arch)
|
||||
@ -95,10 +99,9 @@ class OnnxtrOcrModel(BaseOcrModel):
|
||||
paragraph_break=self.options.paragraph_break,
|
||||
load_in_8_bit=self.options.load_in_8_bit,
|
||||
**config,
|
||||
# TODO: Allow specification of the engine configs in the options
|
||||
det_engine_cfg=None,
|
||||
reco_engine_cfg=None,
|
||||
clf_engine_cfg=None,
|
||||
det_engine_cfg=engine_cfg,
|
||||
reco_engine_cfg=engine_cfg,
|
||||
clf_engine_cfg=engine_cfg,
|
||||
)
|
||||
|
||||
def _to_absolute_and_docling_format(
|
||||
@ -170,24 +173,29 @@ class OnnxtrOcrModel(BaseOcrModel):
|
||||
for line in block.lines
|
||||
for word in line.words
|
||||
):
|
||||
all_ocr_cells.append(
|
||||
TextCell(
|
||||
index=ix,
|
||||
text=word.value,
|
||||
orig=word.value,
|
||||
from_ocr=True,
|
||||
confidence=word.confidence,
|
||||
rect=BoundingRectangle.from_bounding_box(
|
||||
BoundingBox.from_tuple(
|
||||
self._to_absolute_and_docling_format(
|
||||
word.geometry,
|
||||
img_shape=(im_height, im_width),
|
||||
),
|
||||
origin=CoordOrigin.TOPLEFT,
|
||||
)
|
||||
),
|
||||
if (
|
||||
word.confidence >= self.options.confidence_score
|
||||
and word.objectness_score
|
||||
>= self.options.objectness_score
|
||||
):
|
||||
all_ocr_cells.append(
|
||||
TextCell(
|
||||
index=ix,
|
||||
text=word.value,
|
||||
orig=word.value,
|
||||
from_ocr=True,
|
||||
confidence=word.confidence,
|
||||
rect=BoundingRectangle.from_bounding_box(
|
||||
BoundingBox.from_tuple(
|
||||
self._to_absolute_and_docling_format(
|
||||
word.geometry,
|
||||
img_shape=(im_height, im_width),
|
||||
),
|
||||
origin=CoordOrigin.TOPLEFT,
|
||||
)
|
||||
),
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
# Post-process the cells
|
||||
page.cells = self.post_process_cells(all_ocr_cells, page.cells)
|
||||
|
@ -14,7 +14,7 @@ def main():
|
||||
ocr_options = OnnxtrOcrOptions(
|
||||
det_arch="db_mobilenet_v3_large",
|
||||
reco_arch="Felix92/onnxtr-parseq-multilingual-v1", # Model will be downloaded from Hugging Face Hub
|
||||
auto_correct_orientation=True, # This can be used to correct the orientation of the pages
|
||||
auto_correct_orientation=False, # This can be set to `True` to auto-correct the orientation of the pages
|
||||
)
|
||||
|
||||
pipeline_options = PdfPipelineOptions(
|
||||
|
Loading…
Reference in New Issue
Block a user