diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py index 21d92b6d..d6394e33 100644 --- a/docling/datamodel/pipeline_options.py +++ b/docling/datamodel/pipeline_options.py @@ -218,7 +218,7 @@ class PictureDescriptionVlmOptions(PictureDescriptionBaseOptions): smolvlm_picture_description = PictureDescriptionVlmOptions( repo_id="HuggingFaceTB/SmolVLM-256M-Instruct" ) -# phi_pic_desc = PictureDescriptionVlmOptions(repo_id="microsoft/Phi-3-vision-128k-instruct") +# phi_picture_description = PictureDescriptionVlmOptions(repo_id="microsoft/Phi-3-vision-128k-instruct") granite_picture_description = PictureDescriptionVlmOptions( repo_id="ibm-granite/granite-vision-3.1-2b-preview", prompt="What is shown in this image?", diff --git a/docling/models/pic_description_api_model.py b/docling/models/picture_description_api_model.py similarity index 97% rename from docling/models/pic_description_api_model.py rename to docling/models/picture_description_api_model.py index 8b5cc7b5..6c7e02fc 100644 --- a/docling/models/pic_description_api_model.py +++ b/docling/models/picture_description_api_model.py @@ -12,7 +12,7 @@ from PIL import Image from pydantic import BaseModel, ConfigDict from docling.datamodel.pipeline_options import PictureDescriptionApiOptions -from docling.models.pic_description_base_model import PictureDescriptionBaseModel +from docling.models.picture_description_base_model import PictureDescriptionBaseModel _log = logging.getLogger(__name__) diff --git a/docling/models/pic_description_base_model.py b/docling/models/picture_description_base_model.py similarity index 100% rename from docling/models/pic_description_base_model.py rename to docling/models/picture_description_base_model.py diff --git a/docling/models/pic_description_vlm_model.py b/docling/models/picture_description_vlm_model.py similarity index 95% rename from docling/models/pic_description_vlm_model.py rename to docling/models/picture_description_vlm_model.py index 0e9d2abc..b4c8cf21 100644 --- a/docling/models/pic_description_vlm_model.py +++ b/docling/models/picture_description_vlm_model.py @@ -1,6 +1,5 @@ -import json from pathlib import Path -from typing import Iterable, List, Optional, Union +from typing import Iterable, Optional, Union from PIL import Image @@ -8,7 +7,7 @@ from docling.datamodel.pipeline_options import ( AcceleratorOptions, PictureDescriptionVlmOptions, ) -from docling.models.pic_description_base_model import PictureDescriptionBaseModel +from docling.models.picture_description_base_model import PictureDescriptionBaseModel from docling.utils.accelerator_utils import decide_device diff --git a/docling/pipeline/standard_pdf_pipeline.py b/docling/pipeline/standard_pdf_pipeline.py index 2902a4b6..3a329525 100644 --- a/docling/pipeline/standard_pdf_pipeline.py +++ b/docling/pipeline/standard_pdf_pipeline.py @@ -36,9 +36,9 @@ from docling.models.page_preprocessing_model import ( PagePreprocessingModel, PagePreprocessingOptions, ) -from docling.models.pic_description_api_model import PictureDescriptionApiModel -from docling.models.pic_description_base_model import PictureDescriptionBaseModel -from docling.models.pic_description_vlm_model import PictureDescriptionVlmModel +from docling.models.picture_description_api_model import PictureDescriptionApiModel +from docling.models.picture_description_base_model import PictureDescriptionBaseModel +from docling.models.picture_description_vlm_model import PictureDescriptionVlmModel from docling.models.rapid_ocr_model import RapidOcrModel from docling.models.table_structure_model import TableStructureModel from docling.models.tesseract_ocr_cli_model import TesseractOcrCliModel @@ -101,7 +101,7 @@ class StandardPdfPipeline(PaginatedPipeline): ] # Picture description model - if (pic_desc_model := self.get_pic_description_model()) is None: + if (picture_description_model := self.get_picture_description_model()) is None: raise RuntimeError( f"The specified picture description kind is not supported: {pipeline_options.picture_description_options.kind}." ) @@ -126,7 +126,7 @@ class StandardPdfPipeline(PaginatedPipeline): accelerator_options=pipeline_options.accelerator_options, ), # Document Picture description - pic_desc_model, + picture_description_model, ] if ( @@ -188,7 +188,7 @@ class StandardPdfPipeline(PaginatedPipeline): ) return None - def get_pic_description_model( + def get_picture_description_model( self, artifacts_path: Optional[Path] = None ) -> Optional[PictureDescriptionBaseModel]: if isinstance( diff --git a/docs/examples/pictures_description.py b/docs/examples/pictures_description.py index b276b4d2..e1694a11 100644 --- a/docs/examples/pictures_description.py +++ b/docs/examples/pictures_description.py @@ -20,7 +20,7 @@ def main(): pipeline_options = PdfPipelineOptions() pipeline_options.do_picture_description = True pipeline_options.picture_description_options = smolvlm_picture_description - # pipeline_options.picture_description_options = granite_pic_desc + # pipeline_options.picture_description_options = granite_picture_description pipeline_options.picture_description_options.prompt = ( "Describe the image in three sentences. Be consise and accurate."