mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-25 19:44:34 +00:00
working on MyPy
Signed-off-by: Peter Staar <taa@zurich.ibm.com>
This commit is contained in:
parent
49e9a00c05
commit
b5479ab971
@ -16,9 +16,7 @@ from docling.datamodel import asr_model_specs
|
||||
|
||||
# Import the following for backwards compatibility
|
||||
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
|
||||
|
||||
from docling.datamodel.asr_model_specs import WHISPER_TINY as whisper_tiny
|
||||
|
||||
from docling.datamodel.layout_model_specs import (
|
||||
LayoutModelConfig,
|
||||
docling_layout_egret_large,
|
||||
@ -28,7 +26,6 @@ from docling.datamodel.layout_model_specs import (
|
||||
docling_layout_heron_101,
|
||||
docling_layout_v2,
|
||||
)
|
||||
|
||||
from docling.datamodel.pipeline_options_asr_model import (
|
||||
InlineAsrOptions,
|
||||
)
|
||||
|
@ -6,7 +6,12 @@ from docling_core.types.doc import BoundingBox, DocItem, DoclingDocument, NodeIt
|
||||
from PIL import Image
|
||||
from typing_extensions import TypeVar
|
||||
|
||||
from docling.datamodel.base_models import ItemAndImageEnrichmentElement, Page
|
||||
from docling.datamodel.base_models import (
|
||||
Cluster,
|
||||
ItemAndImageEnrichmentElement,
|
||||
Page,
|
||||
TextCell,
|
||||
)
|
||||
from docling.datamodel.document import ConversionResult
|
||||
from docling.datamodel.pipeline_options import BaseOptions
|
||||
from docling.datamodel.settings import settings
|
||||
@ -29,10 +34,18 @@ class BasePageModel(ABC):
|
||||
) -> Iterable[Page]:
|
||||
pass
|
||||
|
||||
|
||||
class BaseLayoutModel(BasePageModel):
|
||||
@abstractmethod
|
||||
def predict_on_page_image(self, *, page_image: Image.Image) -> list(Cluster):
|
||||
pass
|
||||
def predict_on_page_image(self, *, page_image: Image.Image) -> list[Cluster]:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def postprocess_on_page_image(
|
||||
self, *, page: Page, clusters: list[Cluster]
|
||||
) -> tuple[Page, list[Cluster], list[TextCell]]:
|
||||
pass
|
||||
|
||||
|
||||
class BaseVlmModel(BasePageModel):
|
||||
@abstractmethod
|
||||
|
@ -8,14 +8,14 @@ from typing import Any, Optional
|
||||
from docling.datamodel.accelerator_options import (
|
||||
AcceleratorOptions,
|
||||
)
|
||||
from docling.datamodel.base_models import Page, VlmPrediction
|
||||
from docling.datamodel.base_models import Cluster, Page, VlmPrediction
|
||||
from docling.datamodel.document import ConversionResult
|
||||
from docling.datamodel.pipeline_options_vlm_model import (
|
||||
InlineVlmOptions,
|
||||
TransformersModelType,
|
||||
TransformersPromptStyle,
|
||||
)
|
||||
from docling.models.base_model import BasePageModel, BaseVlmModel
|
||||
from docling.models.base_model import BaseLayoutModel, BasePageModel, BaseVlmModel
|
||||
from docling.models.layout_model import LayoutModel
|
||||
from docling.models.utils.hf_model_download import (
|
||||
HuggingFaceModelDownloadMixin,
|
||||
@ -30,7 +30,7 @@ class TwoStageVlmModel(BasePageModel, HuggingFaceModelDownloadMixin):
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
layout_model: LayoutModel,
|
||||
layout_model: BaseLayoutModel,
|
||||
vlm_model: BaseVlmModel,
|
||||
):
|
||||
self.layout_model = layout_model
|
||||
@ -51,13 +51,17 @@ class TwoStageVlmModel(BasePageModel, HuggingFaceModelDownloadMixin):
|
||||
scale=self.vlm_model.scale, max_size=self.vlm_model.max_size
|
||||
)
|
||||
|
||||
pred_clusters = self.layout_model.predict_on_page(page_image=page_image)
|
||||
assert page_image is not None
|
||||
|
||||
pred_clusters = self.layout_model.predict_on_page_image(
|
||||
page_image=page_image
|
||||
)
|
||||
page, processed_clusters, processed_cells = (
|
||||
self.layout_model.postprocess_on_page(
|
||||
self.layout_model.postprocess_on_page_image(
|
||||
page=page, clusters=pred_clusters
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
# Define prompt structure
|
||||
if callable(self.vlm_options.prompt):
|
||||
user_prompt = self.vlm_options.prompt(page.parsed_page)
|
||||
|
Loading…
Reference in New Issue
Block a user