From 27fec3de6cca460c84dd4786cbf3f8549b6905ce Mon Sep 17 00:00:00 2001 From: Navanit Dubey <98005188+Navanit-git@users.noreply.github.com> Date: Tue, 25 Feb 2025 14:47:30 +0530 Subject: [PATCH] Feature to use local vlm model too Hi, This PR is used to let the user use the local repository or the model downloaded in their path. I have ran this in my local. ``` from docling.datamodel.pipeline_options import PictureDescriptionVlmOptions pipeline_options = PdfPipelineOptions() pipeline_options.do_picture_description = True pipeline_options.picture_description_options = PictureDescriptionVlmOptions( repo_id= "/opt/dlami/nvme/Qwen/Qwen2.5-VL-7B-Instruct", # <-- add here the Hugging Face repo_id of your favorite VLM prompt="Extract the text from the images, if it is table extract the table format.If there are no text give 'No Image Text' response", ) pipeline_options.images_scale = 2.0 pipeline_options.generate_picture_images = True converter = DocumentConverter( format_options={ InputFormat.PDF: PdfFormatOption( pipeline_options=pipeline_options, ) } ) ``` Signed-off-by: Navanit Dubey <98005188+Navanit-git@users.noreply.github.com> --- .../models/picture_description_vlm_model.py | 31 ++++++++++++------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/docling/models/picture_description_vlm_model.py b/docling/models/picture_description_vlm_model.py index 9fa4826d..d25743e0 100644 --- a/docling/models/picture_description_vlm_model.py +++ b/docling/models/picture_description_vlm_model.py @@ -1,5 +1,6 @@ from pathlib import Path from typing import Iterable, Optional, Union +import os from PIL import Image @@ -59,18 +60,24 @@ class PictureDescriptionVlmModel(PictureDescriptionBaseModel): force: bool = False, progress: bool = False, ) -> Path: - from huggingface_hub import snapshot_download - from huggingface_hub.utils import disable_progress_bars - - if not progress: - disable_progress_bars() - download_path = snapshot_download( - repo_id=repo_id, - force_download=force, - local_dir=local_dir, - ) - - return Path(download_path) + # Check if repo_id is a local path and exists + if os.path.exists(repo_id): + # If it exists, return the path directly + return Path(repo_id) + else: + # If it doesn't exist, download the repository + from huggingface_hub import snapshot_download + from huggingface_hub.utils import disable_progress_bars + + if not progress: + disable_progress_bars() + download_path = snapshot_download( + repo_id=repo_id, + force_download=force, + local_dir=local_dir, + ) + + return Path(download_path) def _annotate_images(self, images: Iterable[Image.Image]) -> Iterable[str]: from transformers import GenerationConfig