working on MyPy

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
This commit is contained in:
Peter Staar 2025-07-08 15:05:54 +02:00
parent 49e9a00c05
commit b5479ab971
3 changed files with 26 additions and 12 deletions

View File

@ -16,9 +16,7 @@ from docling.datamodel import asr_model_specs
# Import the following for backwards compatibility # Import the following for backwards compatibility
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
from docling.datamodel.asr_model_specs import WHISPER_TINY as whisper_tiny from docling.datamodel.asr_model_specs import WHISPER_TINY as whisper_tiny
from docling.datamodel.layout_model_specs import ( from docling.datamodel.layout_model_specs import (
LayoutModelConfig, LayoutModelConfig,
docling_layout_egret_large, docling_layout_egret_large,
@ -28,7 +26,6 @@ from docling.datamodel.layout_model_specs import (
docling_layout_heron_101, docling_layout_heron_101,
docling_layout_v2, docling_layout_v2,
) )
from docling.datamodel.pipeline_options_asr_model import ( from docling.datamodel.pipeline_options_asr_model import (
InlineAsrOptions, InlineAsrOptions,
) )

View File

@ -6,7 +6,12 @@ from docling_core.types.doc import BoundingBox, DocItem, DoclingDocument, NodeIt
from PIL import Image from PIL import Image
from typing_extensions import TypeVar from typing_extensions import TypeVar
from docling.datamodel.base_models import ItemAndImageEnrichmentElement, Page from docling.datamodel.base_models import (
Cluster,
ItemAndImageEnrichmentElement,
Page,
TextCell,
)
from docling.datamodel.document import ConversionResult from docling.datamodel.document import ConversionResult
from docling.datamodel.pipeline_options import BaseOptions from docling.datamodel.pipeline_options import BaseOptions
from docling.datamodel.settings import settings from docling.datamodel.settings import settings
@ -29,11 +34,19 @@ class BasePageModel(ABC):
) -> Iterable[Page]: ) -> Iterable[Page]:
pass pass
class BaseLayoutModel(BasePageModel): class BaseLayoutModel(BasePageModel):
@abstractmethod @abstractmethod
def predict_on_page_image(self, *, page_image: Image.Image) -> list(Cluster): def predict_on_page_image(self, *, page_image: Image.Image) -> list[Cluster]:
pass pass
@abstractmethod
def postprocess_on_page_image(
self, *, page: Page, clusters: list[Cluster]
) -> tuple[Page, list[Cluster], list[TextCell]]:
pass
class BaseVlmModel(BasePageModel): class BaseVlmModel(BasePageModel):
@abstractmethod @abstractmethod
def predict_on_page_image(self, *, page_image: Image.Image, prompt: str) -> str: def predict_on_page_image(self, *, page_image: Image.Image, prompt: str) -> str:

View File

@ -8,14 +8,14 @@ from typing import Any, Optional
from docling.datamodel.accelerator_options import ( from docling.datamodel.accelerator_options import (
AcceleratorOptions, AcceleratorOptions,
) )
from docling.datamodel.base_models import Page, VlmPrediction from docling.datamodel.base_models import Cluster, Page, VlmPrediction
from docling.datamodel.document import ConversionResult from docling.datamodel.document import ConversionResult
from docling.datamodel.pipeline_options_vlm_model import ( from docling.datamodel.pipeline_options_vlm_model import (
InlineVlmOptions, InlineVlmOptions,
TransformersModelType, TransformersModelType,
TransformersPromptStyle, TransformersPromptStyle,
) )
from docling.models.base_model import BasePageModel, BaseVlmModel from docling.models.base_model import BaseLayoutModel, BasePageModel, BaseVlmModel
from docling.models.layout_model import LayoutModel from docling.models.layout_model import LayoutModel
from docling.models.utils.hf_model_download import ( from docling.models.utils.hf_model_download import (
HuggingFaceModelDownloadMixin, HuggingFaceModelDownloadMixin,
@ -30,7 +30,7 @@ class TwoStageVlmModel(BasePageModel, HuggingFaceModelDownloadMixin):
def __init__( def __init__(
self, self,
*, *,
layout_model: LayoutModel, layout_model: BaseLayoutModel,
vlm_model: BaseVlmModel, vlm_model: BaseVlmModel,
): ):
self.layout_model = layout_model self.layout_model = layout_model
@ -51,9 +51,13 @@ class TwoStageVlmModel(BasePageModel, HuggingFaceModelDownloadMixin):
scale=self.vlm_model.scale, max_size=self.vlm_model.max_size scale=self.vlm_model.scale, max_size=self.vlm_model.max_size
) )
pred_clusters = self.layout_model.predict_on_page(page_image=page_image) assert page_image is not None
pred_clusters = self.layout_model.predict_on_page_image(
page_image=page_image
)
page, processed_clusters, processed_cells = ( page, processed_clusters, processed_cells = (
self.layout_model.postprocess_on_page( self.layout_model.postprocess_on_page_image(
page=page, clusters=pred_clusters page=page, clusters=pred_clusters
) )
) )