working on MyPy

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
This commit is contained in:
Peter Staar 2025-07-08 15:05:54 +02:00
parent 49e9a00c05
commit b5479ab971
3 changed files with 26 additions and 12 deletions

View File

@ -16,9 +16,7 @@ from docling.datamodel import asr_model_specs
# Import the following for backwards compatibility
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
from docling.datamodel.asr_model_specs import WHISPER_TINY as whisper_tiny
from docling.datamodel.layout_model_specs import (
LayoutModelConfig,
docling_layout_egret_large,
@ -28,7 +26,6 @@ from docling.datamodel.layout_model_specs import (
docling_layout_heron_101,
docling_layout_v2,
)
from docling.datamodel.pipeline_options_asr_model import (
InlineAsrOptions,
)

View File

@ -6,7 +6,12 @@ from docling_core.types.doc import BoundingBox, DocItem, DoclingDocument, NodeIt
from PIL import Image
from typing_extensions import TypeVar
from docling.datamodel.base_models import ItemAndImageEnrichmentElement, Page
from docling.datamodel.base_models import (
Cluster,
ItemAndImageEnrichmentElement,
Page,
TextCell,
)
from docling.datamodel.document import ConversionResult
from docling.datamodel.pipeline_options import BaseOptions
from docling.datamodel.settings import settings
@ -29,10 +34,18 @@ class BasePageModel(ABC):
) -> Iterable[Page]:
pass
class BaseLayoutModel(BasePageModel):
@abstractmethod
def predict_on_page_image(self, *, page_image: Image.Image) -> list(Cluster):
pass
def predict_on_page_image(self, *, page_image: Image.Image) -> list[Cluster]:
pass
@abstractmethod
def postprocess_on_page_image(
self, *, page: Page, clusters: list[Cluster]
) -> tuple[Page, list[Cluster], list[TextCell]]:
pass
class BaseVlmModel(BasePageModel):
@abstractmethod

View File

@ -8,14 +8,14 @@ from typing import Any, Optional
from docling.datamodel.accelerator_options import (
AcceleratorOptions,
)
from docling.datamodel.base_models import Page, VlmPrediction
from docling.datamodel.base_models import Cluster, Page, VlmPrediction
from docling.datamodel.document import ConversionResult
from docling.datamodel.pipeline_options_vlm_model import (
InlineVlmOptions,
TransformersModelType,
TransformersPromptStyle,
)
from docling.models.base_model import BasePageModel, BaseVlmModel
from docling.models.base_model import BaseLayoutModel, BasePageModel, BaseVlmModel
from docling.models.layout_model import LayoutModel
from docling.models.utils.hf_model_download import (
HuggingFaceModelDownloadMixin,
@ -30,7 +30,7 @@ class TwoStageVlmModel(BasePageModel, HuggingFaceModelDownloadMixin):
def __init__(
self,
*,
layout_model: LayoutModel,
layout_model: BaseLayoutModel,
vlm_model: BaseVlmModel,
):
self.layout_model = layout_model
@ -51,13 +51,17 @@ class TwoStageVlmModel(BasePageModel, HuggingFaceModelDownloadMixin):
scale=self.vlm_model.scale, max_size=self.vlm_model.max_size
)
pred_clusters = self.layout_model.predict_on_page(page_image=page_image)
assert page_image is not None
pred_clusters = self.layout_model.predict_on_page_image(
page_image=page_image
)
page, processed_clusters, processed_cells = (
self.layout_model.postprocess_on_page(
self.layout_model.postprocess_on_page_image(
page=page, clusters=pred_clusters
)
)
# Define prompt structure
if callable(self.vlm_options.prompt):
user_prompt = self.vlm_options.prompt(page.parsed_page)