mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-01 23:12:20 +00:00
rename model
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
parent
f0ed5aca1e
commit
4e11ce62bb
@ -184,7 +184,7 @@ class OcrMacOptions(OcrOptions):
|
||||
)
|
||||
|
||||
|
||||
class PicDescBaseOptions(BaseModel):
|
||||
class PictureDescriptionBaseOptions(BaseModel):
|
||||
kind: str
|
||||
batch_size: int = 8
|
||||
scale: float = 2
|
||||
@ -194,7 +194,7 @@ class PicDescBaseOptions(BaseModel):
|
||||
)
|
||||
|
||||
|
||||
class PicDescApiOptions(PicDescBaseOptions):
|
||||
class PictureDescriptionApiOptions(PictureDescriptionBaseOptions):
|
||||
kind: Literal["api"] = "api"
|
||||
|
||||
url: AnyUrl = AnyUrl("http://localhost:8000/v1/chat/completions")
|
||||
@ -206,7 +206,7 @@ class PicDescApiOptions(PicDescBaseOptions):
|
||||
provenance: str = ""
|
||||
|
||||
|
||||
class PicDescVlmOptions(PicDescBaseOptions):
|
||||
class PictureDescriptionVlmOptions(PictureDescriptionBaseOptions):
|
||||
kind: Literal["vlm"] = "vlm"
|
||||
|
||||
repo_id: str
|
||||
@ -215,18 +215,11 @@ class PicDescVlmOptions(PicDescBaseOptions):
|
||||
generation_config: Dict[str, Any] = dict(max_new_tokens=200, do_sample=False)
|
||||
|
||||
|
||||
# class PicDescSmolVlmOptions(PicDescVlmOptions):
|
||||
# repo_id: str = "HuggingFaceTB/SmolVLM-256M-Instruct"
|
||||
|
||||
|
||||
# class PicDescGraniteOptions(PicDescVlmOptions):
|
||||
# repo_id: str = "ibm-granite/granite-vision-3.1-2b-preview"
|
||||
# prompt: str = "What is shown in this image?"
|
||||
|
||||
|
||||
smolvlm_pic_desc = PicDescVlmOptions(repo_id="HuggingFaceTB/SmolVLM-256M-Instruct")
|
||||
# phi_pic_desc = PicDescVlmOptions(repo_id="microsoft/Phi-3-vision-128k-instruct")
|
||||
granite_pic_desc = PicDescVlmOptions(
|
||||
smolvlm_picture_description = PictureDescriptionVlmOptions(
|
||||
repo_id="HuggingFaceTB/SmolVLM-256M-Instruct"
|
||||
)
|
||||
# phi_pic_desc = PictureDescriptionVlmOptions(repo_id="microsoft/Phi-3-vision-128k-instruct")
|
||||
granite_picture_description = PictureDescriptionVlmOptions(
|
||||
repo_id="ibm-granite/granite-vision-3.1-2b-preview",
|
||||
prompt="What is shown in this image?",
|
||||
)
|
||||
@ -282,8 +275,9 @@ class PdfPipelineOptions(PipelineOptions):
|
||||
RapidOcrOptions,
|
||||
] = Field(EasyOcrOptions(), discriminator="kind")
|
||||
picture_description_options: Annotated[
|
||||
Union[PicDescApiOptions, PicDescVlmOptions], Field(discriminator="kind")
|
||||
] = smolvlm_pic_desc
|
||||
Union[PictureDescriptionApiOptions, PictureDescriptionVlmOptions],
|
||||
Field(discriminator="kind"),
|
||||
] = smolvlm_picture_description
|
||||
|
||||
images_scale: float = 1.0
|
||||
generate_page_images: bool = False
|
||||
|
@ -11,7 +11,7 @@ from docling_core.types.doc.document import ( # TODO: move import to docling_co
|
||||
from PIL import Image
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
|
||||
from docling.datamodel.pipeline_options import PicDescApiOptions
|
||||
from docling.datamodel.pipeline_options import PictureDescriptionApiOptions
|
||||
from docling.models.pic_description_base_model import PictureDescriptionBaseModel
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
@ -49,9 +49,9 @@ class ApiResponse(BaseModel):
|
||||
class PictureDescriptionApiModel(PictureDescriptionBaseModel):
|
||||
# elements_batch_size = 4
|
||||
|
||||
def __init__(self, enabled: bool, options: PicDescApiOptions):
|
||||
def __init__(self, enabled: bool, options: PictureDescriptionApiOptions):
|
||||
super().__init__(enabled=enabled, options=options)
|
||||
self.options: PicDescApiOptions
|
||||
self.options: PictureDescriptionApiOptions
|
||||
|
||||
if self.enabled:
|
||||
if options.url.host != "localhost":
|
||||
|
@ -13,7 +13,7 @@ from docling_core.types.doc.document import ( # TODO: move import to docling_co
|
||||
)
|
||||
from PIL import Image
|
||||
|
||||
from docling.datamodel.pipeline_options import PicDescBaseOptions
|
||||
from docling.datamodel.pipeline_options import PictureDescriptionBaseOptions
|
||||
from docling.models.base_model import (
|
||||
BaseItemAndImageEnrichmentModel,
|
||||
ItemAndImageEnrichmentElement,
|
||||
@ -26,7 +26,7 @@ class PictureDescriptionBaseModel(BaseItemAndImageEnrichmentModel):
|
||||
def __init__(
|
||||
self,
|
||||
enabled: bool,
|
||||
options: PicDescBaseOptions,
|
||||
options: PictureDescriptionBaseOptions,
|
||||
):
|
||||
self.enabled = enabled
|
||||
self.options = options
|
||||
|
@ -4,7 +4,10 @@ from typing import Iterable, List, Optional, Union
|
||||
|
||||
from PIL import Image
|
||||
|
||||
from docling.datamodel.pipeline_options import AcceleratorOptions, PicDescVlmOptions
|
||||
from docling.datamodel.pipeline_options import (
|
||||
AcceleratorOptions,
|
||||
PictureDescriptionVlmOptions,
|
||||
)
|
||||
from docling.models.pic_description_base_model import PictureDescriptionBaseModel
|
||||
from docling.utils.accelerator_utils import decide_device
|
||||
|
||||
@ -15,11 +18,11 @@ class PictureDescriptionVlmModel(PictureDescriptionBaseModel):
|
||||
self,
|
||||
enabled: bool,
|
||||
artifacts_path: Optional[Union[Path, str]],
|
||||
options: PicDescVlmOptions,
|
||||
options: PictureDescriptionVlmOptions,
|
||||
accelerator_options: AcceleratorOptions,
|
||||
):
|
||||
super().__init__(enabled=enabled, options=options)
|
||||
self.options: PicDescVlmOptions
|
||||
self.options: PictureDescriptionVlmOptions
|
||||
|
||||
if self.enabled:
|
||||
|
||||
|
@ -13,8 +13,8 @@ from docling.datamodel.pipeline_options import (
|
||||
EasyOcrOptions,
|
||||
OcrMacOptions,
|
||||
PdfPipelineOptions,
|
||||
PicDescApiOptions,
|
||||
PicDescVlmOptions,
|
||||
PictureDescriptionApiOptions,
|
||||
PictureDescriptionVlmOptions,
|
||||
RapidOcrOptions,
|
||||
TesseractCliOcrOptions,
|
||||
TesseractOcrOptions,
|
||||
@ -191,14 +191,16 @@ class StandardPdfPipeline(PaginatedPipeline):
|
||||
self, artifacts_path: Optional[Path] = None
|
||||
) -> Optional[PictureDescriptionBaseModel]:
|
||||
if isinstance(
|
||||
self.pipeline_options.picture_description_options, PicDescApiOptions
|
||||
self.pipeline_options.picture_description_options,
|
||||
PictureDescriptionApiOptions,
|
||||
):
|
||||
return PictureDescriptionApiModel(
|
||||
enabled=self.pipeline_options.do_picture_description,
|
||||
options=self.pipeline_options.picture_description_options,
|
||||
)
|
||||
elif isinstance(
|
||||
self.pipeline_options.picture_description_options, PicDescVlmOptions
|
||||
self.pipeline_options.picture_description_options,
|
||||
PictureDescriptionVlmOptions,
|
||||
):
|
||||
return PictureDescriptionVlmModel(
|
||||
enabled=self.pipeline_options.do_picture_description,
|
||||
|
@ -4,10 +4,10 @@ from pathlib import Path
|
||||
from docling_core.types.doc import PictureItem
|
||||
|
||||
from docling.datamodel.base_models import InputFormat
|
||||
from docling.datamodel.pipeline_options import ( # PicDescSmolVlmOptions, PicDescGraniteOptions
|
||||
from docling.datamodel.pipeline_options import (
|
||||
PdfPipelineOptions,
|
||||
granite_pic_desc,
|
||||
smolvlm_pic_desc,
|
||||
granite_picture_description,
|
||||
smolvlm_picture_description,
|
||||
)
|
||||
from docling.document_converter import DocumentConverter, PdfFormatOption
|
||||
|
||||
@ -19,7 +19,7 @@ def main():
|
||||
|
||||
pipeline_options = PdfPipelineOptions()
|
||||
pipeline_options.do_picture_description = True
|
||||
pipeline_options.picture_description_options = smolvlm_pic_desc
|
||||
pipeline_options.picture_description_options = smolvlm_picture_description
|
||||
# pipeline_options.picture_description_options = granite_pic_desc
|
||||
|
||||
pipeline_options.picture_description_options.prompt = (
|
||||
|
@ -4,7 +4,10 @@ from pathlib import Path
|
||||
from docling_core.types.doc import PictureItem
|
||||
|
||||
from docling.datamodel.base_models import InputFormat
|
||||
from docling.datamodel.pipeline_options import PdfPipelineOptions, PicDescApiOptions
|
||||
from docling.datamodel.pipeline_options import (
|
||||
PdfPipelineOptions,
|
||||
PictureDescriptionApiOptions,
|
||||
)
|
||||
from docling.document_converter import DocumentConverter, PdfFormatOption
|
||||
|
||||
|
||||
@ -19,7 +22,7 @@ def main():
|
||||
|
||||
pipeline_options = PdfPipelineOptions()
|
||||
pipeline_options.do_picture_description = True
|
||||
pipeline_options.picture_description_options = PicDescApiOptions(
|
||||
pipeline_options.picture_description_options = PictureDescriptionApiOptions(
|
||||
url="http://localhost:8000/v1/chat/completions",
|
||||
params=dict(
|
||||
model="HuggingFaceTB/SmolVLM-256M-Instruct",
|
||||
|
Loading…
Reference in New Issue
Block a user