mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-02 07:22:14 +00:00
rename model
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
parent
f0ed5aca1e
commit
4e11ce62bb
@ -184,7 +184,7 @@ class OcrMacOptions(OcrOptions):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class PicDescBaseOptions(BaseModel):
|
class PictureDescriptionBaseOptions(BaseModel):
|
||||||
kind: str
|
kind: str
|
||||||
batch_size: int = 8
|
batch_size: int = 8
|
||||||
scale: float = 2
|
scale: float = 2
|
||||||
@ -194,7 +194,7 @@ class PicDescBaseOptions(BaseModel):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class PicDescApiOptions(PicDescBaseOptions):
|
class PictureDescriptionApiOptions(PictureDescriptionBaseOptions):
|
||||||
kind: Literal["api"] = "api"
|
kind: Literal["api"] = "api"
|
||||||
|
|
||||||
url: AnyUrl = AnyUrl("http://localhost:8000/v1/chat/completions")
|
url: AnyUrl = AnyUrl("http://localhost:8000/v1/chat/completions")
|
||||||
@ -206,7 +206,7 @@ class PicDescApiOptions(PicDescBaseOptions):
|
|||||||
provenance: str = ""
|
provenance: str = ""
|
||||||
|
|
||||||
|
|
||||||
class PicDescVlmOptions(PicDescBaseOptions):
|
class PictureDescriptionVlmOptions(PictureDescriptionBaseOptions):
|
||||||
kind: Literal["vlm"] = "vlm"
|
kind: Literal["vlm"] = "vlm"
|
||||||
|
|
||||||
repo_id: str
|
repo_id: str
|
||||||
@ -215,18 +215,11 @@ class PicDescVlmOptions(PicDescBaseOptions):
|
|||||||
generation_config: Dict[str, Any] = dict(max_new_tokens=200, do_sample=False)
|
generation_config: Dict[str, Any] = dict(max_new_tokens=200, do_sample=False)
|
||||||
|
|
||||||
|
|
||||||
# class PicDescSmolVlmOptions(PicDescVlmOptions):
|
smolvlm_picture_description = PictureDescriptionVlmOptions(
|
||||||
# repo_id: str = "HuggingFaceTB/SmolVLM-256M-Instruct"
|
repo_id="HuggingFaceTB/SmolVLM-256M-Instruct"
|
||||||
|
)
|
||||||
|
# phi_pic_desc = PictureDescriptionVlmOptions(repo_id="microsoft/Phi-3-vision-128k-instruct")
|
||||||
# class PicDescGraniteOptions(PicDescVlmOptions):
|
granite_picture_description = PictureDescriptionVlmOptions(
|
||||||
# repo_id: str = "ibm-granite/granite-vision-3.1-2b-preview"
|
|
||||||
# prompt: str = "What is shown in this image?"
|
|
||||||
|
|
||||||
|
|
||||||
smolvlm_pic_desc = PicDescVlmOptions(repo_id="HuggingFaceTB/SmolVLM-256M-Instruct")
|
|
||||||
# phi_pic_desc = PicDescVlmOptions(repo_id="microsoft/Phi-3-vision-128k-instruct")
|
|
||||||
granite_pic_desc = PicDescVlmOptions(
|
|
||||||
repo_id="ibm-granite/granite-vision-3.1-2b-preview",
|
repo_id="ibm-granite/granite-vision-3.1-2b-preview",
|
||||||
prompt="What is shown in this image?",
|
prompt="What is shown in this image?",
|
||||||
)
|
)
|
||||||
@ -282,8 +275,9 @@ class PdfPipelineOptions(PipelineOptions):
|
|||||||
RapidOcrOptions,
|
RapidOcrOptions,
|
||||||
] = Field(EasyOcrOptions(), discriminator="kind")
|
] = Field(EasyOcrOptions(), discriminator="kind")
|
||||||
picture_description_options: Annotated[
|
picture_description_options: Annotated[
|
||||||
Union[PicDescApiOptions, PicDescVlmOptions], Field(discriminator="kind")
|
Union[PictureDescriptionApiOptions, PictureDescriptionVlmOptions],
|
||||||
] = smolvlm_pic_desc
|
Field(discriminator="kind"),
|
||||||
|
] = smolvlm_picture_description
|
||||||
|
|
||||||
images_scale: float = 1.0
|
images_scale: float = 1.0
|
||||||
generate_page_images: bool = False
|
generate_page_images: bool = False
|
||||||
|
@ -11,7 +11,7 @@ from docling_core.types.doc.document import ( # TODO: move import to docling_co
|
|||||||
from PIL import Image
|
from PIL import Image
|
||||||
from pydantic import BaseModel, ConfigDict
|
from pydantic import BaseModel, ConfigDict
|
||||||
|
|
||||||
from docling.datamodel.pipeline_options import PicDescApiOptions
|
from docling.datamodel.pipeline_options import PictureDescriptionApiOptions
|
||||||
from docling.models.pic_description_base_model import PictureDescriptionBaseModel
|
from docling.models.pic_description_base_model import PictureDescriptionBaseModel
|
||||||
|
|
||||||
_log = logging.getLogger(__name__)
|
_log = logging.getLogger(__name__)
|
||||||
@ -49,9 +49,9 @@ class ApiResponse(BaseModel):
|
|||||||
class PictureDescriptionApiModel(PictureDescriptionBaseModel):
|
class PictureDescriptionApiModel(PictureDescriptionBaseModel):
|
||||||
# elements_batch_size = 4
|
# elements_batch_size = 4
|
||||||
|
|
||||||
def __init__(self, enabled: bool, options: PicDescApiOptions):
|
def __init__(self, enabled: bool, options: PictureDescriptionApiOptions):
|
||||||
super().__init__(enabled=enabled, options=options)
|
super().__init__(enabled=enabled, options=options)
|
||||||
self.options: PicDescApiOptions
|
self.options: PictureDescriptionApiOptions
|
||||||
|
|
||||||
if self.enabled:
|
if self.enabled:
|
||||||
if options.url.host != "localhost":
|
if options.url.host != "localhost":
|
||||||
|
@ -13,7 +13,7 @@ from docling_core.types.doc.document import ( # TODO: move import to docling_co
|
|||||||
)
|
)
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
from docling.datamodel.pipeline_options import PicDescBaseOptions
|
from docling.datamodel.pipeline_options import PictureDescriptionBaseOptions
|
||||||
from docling.models.base_model import (
|
from docling.models.base_model import (
|
||||||
BaseItemAndImageEnrichmentModel,
|
BaseItemAndImageEnrichmentModel,
|
||||||
ItemAndImageEnrichmentElement,
|
ItemAndImageEnrichmentElement,
|
||||||
@ -26,7 +26,7 @@ class PictureDescriptionBaseModel(BaseItemAndImageEnrichmentModel):
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
enabled: bool,
|
enabled: bool,
|
||||||
options: PicDescBaseOptions,
|
options: PictureDescriptionBaseOptions,
|
||||||
):
|
):
|
||||||
self.enabled = enabled
|
self.enabled = enabled
|
||||||
self.options = options
|
self.options = options
|
||||||
|
@ -4,7 +4,10 @@ from typing import Iterable, List, Optional, Union
|
|||||||
|
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
from docling.datamodel.pipeline_options import AcceleratorOptions, PicDescVlmOptions
|
from docling.datamodel.pipeline_options import (
|
||||||
|
AcceleratorOptions,
|
||||||
|
PictureDescriptionVlmOptions,
|
||||||
|
)
|
||||||
from docling.models.pic_description_base_model import PictureDescriptionBaseModel
|
from docling.models.pic_description_base_model import PictureDescriptionBaseModel
|
||||||
from docling.utils.accelerator_utils import decide_device
|
from docling.utils.accelerator_utils import decide_device
|
||||||
|
|
||||||
@ -15,11 +18,11 @@ class PictureDescriptionVlmModel(PictureDescriptionBaseModel):
|
|||||||
self,
|
self,
|
||||||
enabled: bool,
|
enabled: bool,
|
||||||
artifacts_path: Optional[Union[Path, str]],
|
artifacts_path: Optional[Union[Path, str]],
|
||||||
options: PicDescVlmOptions,
|
options: PictureDescriptionVlmOptions,
|
||||||
accelerator_options: AcceleratorOptions,
|
accelerator_options: AcceleratorOptions,
|
||||||
):
|
):
|
||||||
super().__init__(enabled=enabled, options=options)
|
super().__init__(enabled=enabled, options=options)
|
||||||
self.options: PicDescVlmOptions
|
self.options: PictureDescriptionVlmOptions
|
||||||
|
|
||||||
if self.enabled:
|
if self.enabled:
|
||||||
|
|
||||||
|
@ -13,8 +13,8 @@ from docling.datamodel.pipeline_options import (
|
|||||||
EasyOcrOptions,
|
EasyOcrOptions,
|
||||||
OcrMacOptions,
|
OcrMacOptions,
|
||||||
PdfPipelineOptions,
|
PdfPipelineOptions,
|
||||||
PicDescApiOptions,
|
PictureDescriptionApiOptions,
|
||||||
PicDescVlmOptions,
|
PictureDescriptionVlmOptions,
|
||||||
RapidOcrOptions,
|
RapidOcrOptions,
|
||||||
TesseractCliOcrOptions,
|
TesseractCliOcrOptions,
|
||||||
TesseractOcrOptions,
|
TesseractOcrOptions,
|
||||||
@ -191,14 +191,16 @@ class StandardPdfPipeline(PaginatedPipeline):
|
|||||||
self, artifacts_path: Optional[Path] = None
|
self, artifacts_path: Optional[Path] = None
|
||||||
) -> Optional[PictureDescriptionBaseModel]:
|
) -> Optional[PictureDescriptionBaseModel]:
|
||||||
if isinstance(
|
if isinstance(
|
||||||
self.pipeline_options.picture_description_options, PicDescApiOptions
|
self.pipeline_options.picture_description_options,
|
||||||
|
PictureDescriptionApiOptions,
|
||||||
):
|
):
|
||||||
return PictureDescriptionApiModel(
|
return PictureDescriptionApiModel(
|
||||||
enabled=self.pipeline_options.do_picture_description,
|
enabled=self.pipeline_options.do_picture_description,
|
||||||
options=self.pipeline_options.picture_description_options,
|
options=self.pipeline_options.picture_description_options,
|
||||||
)
|
)
|
||||||
elif isinstance(
|
elif isinstance(
|
||||||
self.pipeline_options.picture_description_options, PicDescVlmOptions
|
self.pipeline_options.picture_description_options,
|
||||||
|
PictureDescriptionVlmOptions,
|
||||||
):
|
):
|
||||||
return PictureDescriptionVlmModel(
|
return PictureDescriptionVlmModel(
|
||||||
enabled=self.pipeline_options.do_picture_description,
|
enabled=self.pipeline_options.do_picture_description,
|
||||||
|
@ -4,10 +4,10 @@ from pathlib import Path
|
|||||||
from docling_core.types.doc import PictureItem
|
from docling_core.types.doc import PictureItem
|
||||||
|
|
||||||
from docling.datamodel.base_models import InputFormat
|
from docling.datamodel.base_models import InputFormat
|
||||||
from docling.datamodel.pipeline_options import ( # PicDescSmolVlmOptions, PicDescGraniteOptions
|
from docling.datamodel.pipeline_options import (
|
||||||
PdfPipelineOptions,
|
PdfPipelineOptions,
|
||||||
granite_pic_desc,
|
granite_picture_description,
|
||||||
smolvlm_pic_desc,
|
smolvlm_picture_description,
|
||||||
)
|
)
|
||||||
from docling.document_converter import DocumentConverter, PdfFormatOption
|
from docling.document_converter import DocumentConverter, PdfFormatOption
|
||||||
|
|
||||||
@ -19,7 +19,7 @@ def main():
|
|||||||
|
|
||||||
pipeline_options = PdfPipelineOptions()
|
pipeline_options = PdfPipelineOptions()
|
||||||
pipeline_options.do_picture_description = True
|
pipeline_options.do_picture_description = True
|
||||||
pipeline_options.picture_description_options = smolvlm_pic_desc
|
pipeline_options.picture_description_options = smolvlm_picture_description
|
||||||
# pipeline_options.picture_description_options = granite_pic_desc
|
# pipeline_options.picture_description_options = granite_pic_desc
|
||||||
|
|
||||||
pipeline_options.picture_description_options.prompt = (
|
pipeline_options.picture_description_options.prompt = (
|
||||||
|
@ -4,7 +4,10 @@ from pathlib import Path
|
|||||||
from docling_core.types.doc import PictureItem
|
from docling_core.types.doc import PictureItem
|
||||||
|
|
||||||
from docling.datamodel.base_models import InputFormat
|
from docling.datamodel.base_models import InputFormat
|
||||||
from docling.datamodel.pipeline_options import PdfPipelineOptions, PicDescApiOptions
|
from docling.datamodel.pipeline_options import (
|
||||||
|
PdfPipelineOptions,
|
||||||
|
PictureDescriptionApiOptions,
|
||||||
|
)
|
||||||
from docling.document_converter import DocumentConverter, PdfFormatOption
|
from docling.document_converter import DocumentConverter, PdfFormatOption
|
||||||
|
|
||||||
|
|
||||||
@ -19,7 +22,7 @@ def main():
|
|||||||
|
|
||||||
pipeline_options = PdfPipelineOptions()
|
pipeline_options = PdfPipelineOptions()
|
||||||
pipeline_options.do_picture_description = True
|
pipeline_options.do_picture_description = True
|
||||||
pipeline_options.picture_description_options = PicDescApiOptions(
|
pipeline_options.picture_description_options = PictureDescriptionApiOptions(
|
||||||
url="http://localhost:8000/v1/chat/completions",
|
url="http://localhost:8000/v1/chat/completions",
|
||||||
params=dict(
|
params=dict(
|
||||||
model="HuggingFaceTB/SmolVLM-256M-Instruct",
|
model="HuggingFaceTB/SmolVLM-256M-Instruct",
|
||||||
|
Loading…
Reference in New Issue
Block a user