mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 20:14:47 +00:00
working on MyPy
Signed-off-by: Peter Staar <taa@zurich.ibm.com>
This commit is contained in:
parent
49e9a00c05
commit
b5479ab971
@ -16,9 +16,7 @@ from docling.datamodel import asr_model_specs
|
|||||||
|
|
||||||
# Import the following for backwards compatibility
|
# Import the following for backwards compatibility
|
||||||
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
|
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
|
||||||
|
|
||||||
from docling.datamodel.asr_model_specs import WHISPER_TINY as whisper_tiny
|
from docling.datamodel.asr_model_specs import WHISPER_TINY as whisper_tiny
|
||||||
|
|
||||||
from docling.datamodel.layout_model_specs import (
|
from docling.datamodel.layout_model_specs import (
|
||||||
LayoutModelConfig,
|
LayoutModelConfig,
|
||||||
docling_layout_egret_large,
|
docling_layout_egret_large,
|
||||||
@ -28,7 +26,6 @@ from docling.datamodel.layout_model_specs import (
|
|||||||
docling_layout_heron_101,
|
docling_layout_heron_101,
|
||||||
docling_layout_v2,
|
docling_layout_v2,
|
||||||
)
|
)
|
||||||
|
|
||||||
from docling.datamodel.pipeline_options_asr_model import (
|
from docling.datamodel.pipeline_options_asr_model import (
|
||||||
InlineAsrOptions,
|
InlineAsrOptions,
|
||||||
)
|
)
|
||||||
|
@ -6,7 +6,12 @@ from docling_core.types.doc import BoundingBox, DocItem, DoclingDocument, NodeIt
|
|||||||
from PIL import Image
|
from PIL import Image
|
||||||
from typing_extensions import TypeVar
|
from typing_extensions import TypeVar
|
||||||
|
|
||||||
from docling.datamodel.base_models import ItemAndImageEnrichmentElement, Page
|
from docling.datamodel.base_models import (
|
||||||
|
Cluster,
|
||||||
|
ItemAndImageEnrichmentElement,
|
||||||
|
Page,
|
||||||
|
TextCell,
|
||||||
|
)
|
||||||
from docling.datamodel.document import ConversionResult
|
from docling.datamodel.document import ConversionResult
|
||||||
from docling.datamodel.pipeline_options import BaseOptions
|
from docling.datamodel.pipeline_options import BaseOptions
|
||||||
from docling.datamodel.settings import settings
|
from docling.datamodel.settings import settings
|
||||||
@ -29,11 +34,19 @@ class BasePageModel(ABC):
|
|||||||
) -> Iterable[Page]:
|
) -> Iterable[Page]:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class BaseLayoutModel(BasePageModel):
|
class BaseLayoutModel(BasePageModel):
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def predict_on_page_image(self, *, page_image: Image.Image) -> list(Cluster):
|
def predict_on_page_image(self, *, page_image: Image.Image) -> list[Cluster]:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def postprocess_on_page_image(
|
||||||
|
self, *, page: Page, clusters: list[Cluster]
|
||||||
|
) -> tuple[Page, list[Cluster], list[TextCell]]:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class BaseVlmModel(BasePageModel):
|
class BaseVlmModel(BasePageModel):
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def predict_on_page_image(self, *, page_image: Image.Image, prompt: str) -> str:
|
def predict_on_page_image(self, *, page_image: Image.Image, prompt: str) -> str:
|
||||||
|
@ -8,14 +8,14 @@ from typing import Any, Optional
|
|||||||
from docling.datamodel.accelerator_options import (
|
from docling.datamodel.accelerator_options import (
|
||||||
AcceleratorOptions,
|
AcceleratorOptions,
|
||||||
)
|
)
|
||||||
from docling.datamodel.base_models import Page, VlmPrediction
|
from docling.datamodel.base_models import Cluster, Page, VlmPrediction
|
||||||
from docling.datamodel.document import ConversionResult
|
from docling.datamodel.document import ConversionResult
|
||||||
from docling.datamodel.pipeline_options_vlm_model import (
|
from docling.datamodel.pipeline_options_vlm_model import (
|
||||||
InlineVlmOptions,
|
InlineVlmOptions,
|
||||||
TransformersModelType,
|
TransformersModelType,
|
||||||
TransformersPromptStyle,
|
TransformersPromptStyle,
|
||||||
)
|
)
|
||||||
from docling.models.base_model import BasePageModel, BaseVlmModel
|
from docling.models.base_model import BaseLayoutModel, BasePageModel, BaseVlmModel
|
||||||
from docling.models.layout_model import LayoutModel
|
from docling.models.layout_model import LayoutModel
|
||||||
from docling.models.utils.hf_model_download import (
|
from docling.models.utils.hf_model_download import (
|
||||||
HuggingFaceModelDownloadMixin,
|
HuggingFaceModelDownloadMixin,
|
||||||
@ -30,7 +30,7 @@ class TwoStageVlmModel(BasePageModel, HuggingFaceModelDownloadMixin):
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
*,
|
*,
|
||||||
layout_model: LayoutModel,
|
layout_model: BaseLayoutModel,
|
||||||
vlm_model: BaseVlmModel,
|
vlm_model: BaseVlmModel,
|
||||||
):
|
):
|
||||||
self.layout_model = layout_model
|
self.layout_model = layout_model
|
||||||
@ -51,9 +51,13 @@ class TwoStageVlmModel(BasePageModel, HuggingFaceModelDownloadMixin):
|
|||||||
scale=self.vlm_model.scale, max_size=self.vlm_model.max_size
|
scale=self.vlm_model.scale, max_size=self.vlm_model.max_size
|
||||||
)
|
)
|
||||||
|
|
||||||
pred_clusters = self.layout_model.predict_on_page(page_image=page_image)
|
assert page_image is not None
|
||||||
|
|
||||||
|
pred_clusters = self.layout_model.predict_on_page_image(
|
||||||
|
page_image=page_image
|
||||||
|
)
|
||||||
page, processed_clusters, processed_cells = (
|
page, processed_clusters, processed_cells = (
|
||||||
self.layout_model.postprocess_on_page(
|
self.layout_model.postprocess_on_page_image(
|
||||||
page=page, clusters=pred_clusters
|
page=page, clusters=pred_clusters
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user