diff --git a/docling/models/tesseract_ocr_cli_model.py b/docling/models/tesseract_ocr_cli_model.py index 0f9ce201..c483fa87 100644 --- a/docling/models/tesseract_ocr_cli_model.py +++ b/docling/models/tesseract_ocr_cli_model.py @@ -320,6 +320,8 @@ class TesseractOcrCliModel(BaseOcrModel): def _parse_orientation(df_osd: pd.DataFrame) -> int: - orientations = df_osd.loc[df_osd["key"] == "Orientation in degrees"].value.tolist() - orientation = parse_tesseract_orientation(orientations[0].strip()) + # For strictly optimal performance with invariant dataframe format: + mask = df_osd["key"].to_numpy() == "Orientation in degrees" + orientation_val = df_osd["value"].to_numpy()[mask][0] + orientation = parse_tesseract_orientation(orientation_val.strip()) return orientation