working on MyPy

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
2025-07-25 19:44:34 +00:00 · 2025-07-08 15:05:54 +02:00 · 2025-07-08 15:05:54 +02:00 · b5479ab971
commit b5479ab971
parent 49e9a00c05
3 changed files with 26 additions and 12 deletions
--- a/docling/datamodel/pipeline_options.py
+++ b/docling/datamodel/pipeline_options.py
@ -16,9 +16,7 @@ from docling.datamodel import asr_model_specs

 # Import the following for backwards compatibility
 from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
-
 from docling.datamodel.asr_model_specs import WHISPER_TINY as whisper_tiny
-
 from docling.datamodel.layout_model_specs import (
    LayoutModelConfig,
    docling_layout_egret_large,
@ -28,7 +26,6 @@ from docling.datamodel.layout_model_specs import (
    docling_layout_heron_101,
    docling_layout_v2,
 )
-
 from docling.datamodel.pipeline_options_asr_model import (
    InlineAsrOptions,
 )
--- a/docling/models/base_model.py
+++ b/docling/models/base_model.py
@ -6,7 +6,12 @@ from docling_core.types.doc import BoundingBox, DocItem, DoclingDocument, NodeIt
 from PIL import Image
 from typing_extensions import TypeVar

-from docling.datamodel.base_models import ItemAndImageEnrichmentElement, Page
+from docling.datamodel.base_models import (
+    Cluster,
+    ItemAndImageEnrichmentElement,
+    Page,
+    TextCell,
+)
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import BaseOptions
 from docling.datamodel.settings import settings
@ -29,10 +34,18 @@ class BasePageModel(ABC):
    ) -> Iterable[Page]:
        pass

+
 class BaseLayoutModel(BasePageModel):
    @abstractmethod
-    def predict_on_page_image(self, *, page_image: Image.Image) -> list(Cluster):
-        pass    
+    def predict_on_page_image(self, *, page_image: Image.Image) -> list[Cluster]:
+        pass
+
+    @abstractmethod
+    def postprocess_on_page_image(
+        self, *, page: Page, clusters: list[Cluster]
+    ) -> tuple[Page, list[Cluster], list[TextCell]]:
+        pass
+

 class BaseVlmModel(BasePageModel):
    @abstractmethod
--- a/docling/models/vlm_models_inline/two_stage_vlm_model.py
+++ b/docling/models/vlm_models_inline/two_stage_vlm_model.py
@ -8,14 +8,14 @@ from typing import Any, Optional
 from docling.datamodel.accelerator_options import (
    AcceleratorOptions,
 )
-from docling.datamodel.base_models import Page, VlmPrediction
+from docling.datamodel.base_models import Cluster, Page, VlmPrediction
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options_vlm_model import (
    InlineVlmOptions,
    TransformersModelType,
    TransformersPromptStyle,
 )
-from docling.models.base_model import BasePageModel, BaseVlmModel
+from docling.models.base_model import BaseLayoutModel, BasePageModel, BaseVlmModel
 from docling.models.layout_model import LayoutModel
 from docling.models.utils.hf_model_download import (
    HuggingFaceModelDownloadMixin,
@ -30,7 +30,7 @@ class TwoStageVlmModel(BasePageModel, HuggingFaceModelDownloadMixin):
    def __init__(
        self,
        *,
-        layout_model: LayoutModel,
+        layout_model: BaseLayoutModel,
        vlm_model: BaseVlmModel,
    ):
        self.layout_model = layout_model
@ -51,13 +51,17 @@ class TwoStageVlmModel(BasePageModel, HuggingFaceModelDownloadMixin):
                        scale=self.vlm_model.scale, max_size=self.vlm_model.max_size
                    )

-                    pred_clusters = self.layout_model.predict_on_page(page_image=page_image)
+                    assert page_image is not None
+
+                    pred_clusters = self.layout_model.predict_on_page_image(
+                        page_image=page_image
+                    )
                    page, processed_clusters, processed_cells = (
-                        self.layout_model.postprocess_on_page(
+                        self.layout_model.postprocess_on_page_image(
                            page=page, clusters=pred_clusters
                        )
                    )
-
+                    
                    # Define prompt structure
                    if callable(self.vlm_options.prompt):
                        user_prompt = self.vlm_options.prompt(page.parsed_page)