more renaming

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi 2025-02-07 10:12:38 +01:00
parent b6ed0b34cd
commit 90f0428a62
6 changed files with 11 additions and 12 deletions

View File

@ -218,7 +218,7 @@ class PictureDescriptionVlmOptions(PictureDescriptionBaseOptions):
smolvlm_picture_description = PictureDescriptionVlmOptions(
repo_id="HuggingFaceTB/SmolVLM-256M-Instruct"
)
# phi_pic_desc = PictureDescriptionVlmOptions(repo_id="microsoft/Phi-3-vision-128k-instruct")
# phi_picture_description = PictureDescriptionVlmOptions(repo_id="microsoft/Phi-3-vision-128k-instruct")
granite_picture_description = PictureDescriptionVlmOptions(
repo_id="ibm-granite/granite-vision-3.1-2b-preview",
prompt="What is shown in this image?",

View File

@ -12,7 +12,7 @@ from PIL import Image
from pydantic import BaseModel, ConfigDict
from docling.datamodel.pipeline_options import PictureDescriptionApiOptions
from docling.models.pic_description_base_model import PictureDescriptionBaseModel
from docling.models.picture_description_base_model import PictureDescriptionBaseModel
_log = logging.getLogger(__name__)

View File

@ -1,6 +1,5 @@
import json
from pathlib import Path
from typing import Iterable, List, Optional, Union
from typing import Iterable, Optional, Union
from PIL import Image
@ -8,7 +7,7 @@ from docling.datamodel.pipeline_options import (
AcceleratorOptions,
PictureDescriptionVlmOptions,
)
from docling.models.pic_description_base_model import PictureDescriptionBaseModel
from docling.models.picture_description_base_model import PictureDescriptionBaseModel
from docling.utils.accelerator_utils import decide_device

View File

@ -36,9 +36,9 @@ from docling.models.page_preprocessing_model import (
PagePreprocessingModel,
PagePreprocessingOptions,
)
from docling.models.pic_description_api_model import PictureDescriptionApiModel
from docling.models.pic_description_base_model import PictureDescriptionBaseModel
from docling.models.pic_description_vlm_model import PictureDescriptionVlmModel
from docling.models.picture_description_api_model import PictureDescriptionApiModel
from docling.models.picture_description_base_model import PictureDescriptionBaseModel
from docling.models.picture_description_vlm_model import PictureDescriptionVlmModel
from docling.models.rapid_ocr_model import RapidOcrModel
from docling.models.table_structure_model import TableStructureModel
from docling.models.tesseract_ocr_cli_model import TesseractOcrCliModel
@ -101,7 +101,7 @@ class StandardPdfPipeline(PaginatedPipeline):
]
# Picture description model
if (pic_desc_model := self.get_pic_description_model()) is None:
if (picture_description_model := self.get_picture_description_model()) is None:
raise RuntimeError(
f"The specified picture description kind is not supported: {pipeline_options.picture_description_options.kind}."
)
@ -126,7 +126,7 @@ class StandardPdfPipeline(PaginatedPipeline):
accelerator_options=pipeline_options.accelerator_options,
),
# Document Picture description
pic_desc_model,
picture_description_model,
]
if (
@ -188,7 +188,7 @@ class StandardPdfPipeline(PaginatedPipeline):
)
return None
def get_pic_description_model(
def get_picture_description_model(
self, artifacts_path: Optional[Path] = None
) -> Optional[PictureDescriptionBaseModel]:
if isinstance(

View File

@ -20,7 +20,7 @@ def main():
pipeline_options = PdfPipelineOptions()
pipeline_options.do_picture_description = True
pipeline_options.picture_description_options = smolvlm_picture_description
# pipeline_options.picture_description_options = granite_pic_desc
# pipeline_options.picture_description_options = granite_picture_description
pipeline_options.picture_description_options.prompt = (
"Describe the image in three sentences. Be consise and accurate."