diff --git a/docling/models/base_ocr_model.py b/docling/models/base_ocr_model.py index be60e79d..5e35a0e0 100644 --- a/docling/models/base_ocr_model.py +++ b/docling/models/base_ocr_model.py @@ -7,6 +7,7 @@ from typing import List, Optional, Type import numpy as np from docling_core.types.doc import BoundingBox, CoordOrigin +from docling_core.types.doc.page import TextCell from PIL import Image, ImageDraw from rtree import index from scipy.ndimage import binary_dilation, find_objects, label @@ -107,7 +108,9 @@ class BaseOcrModel(BasePageModel, BaseModelWithOptions): return [] # Filters OCR cells by dropping any OCR cell that intersects with an existing programmatic cell. - def _filter_ocr_cells(self, ocr_cells, programmatic_cells): + def _filter_ocr_cells( + self, ocr_cells: List[TextCell], programmatic_cells: List[TextCell] + ) -> List[TextCell]: # Create R-tree index for programmatic cells p = index.Property() p.dimension = 2 @@ -130,7 +133,7 @@ class BaseOcrModel(BasePageModel, BaseModelWithOptions): ] return filtered_ocr_cells - def post_process_cells(self, ocr_cells, page): + def post_process_cells(self, ocr_cells: List[TextCell], page: Page) -> None: r""" Post-process the OCR cells and update the page object. Updates parsed_page.textline_cells directly since page.cells is now read-only. @@ -147,7 +150,9 @@ class BaseOcrModel(BasePageModel, BaseModelWithOptions): page.parsed_page.textline_cells = final_cells page.parsed_page.has_lines = len(final_cells) > 0 - def _combine_cells(self, existing_cells, ocr_cells): + def _combine_cells( + self, existing_cells: List[TextCell], ocr_cells: List[TextCell] + ) -> List[TextCell]: """Combine existing and OCR cells with filtering and re-indexing.""" if self.options.force_full_page_ocr: combined = ocr_cells