mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-31 14:34:40 +00:00
Feature to use local vlm model too
Hi, This PR is used to let the user use the local repository or the model downloaded in their path. I have ran this in my local. ``` from docling.datamodel.pipeline_options import PictureDescriptionVlmOptions pipeline_options = PdfPipelineOptions() pipeline_options.do_picture_description = True pipeline_options.picture_description_options = PictureDescriptionVlmOptions( repo_id= "/opt/dlami/nvme/Qwen/Qwen2.5-VL-7B-Instruct", # <-- add here the Hugging Face repo_id of your favorite VLM prompt="Extract the text from the images, if it is table extract the table format.If there are no text give 'No Image Text' response", ) pipeline_options.images_scale = 2.0 pipeline_options.generate_picture_images = True converter = DocumentConverter( format_options={ InputFormat.PDF: PdfFormatOption( pipeline_options=pipeline_options, ) } ) ``` Signed-off-by: Navanit Dubey <98005188+Navanit-git@users.noreply.github.com>
This commit is contained in:
parent
1b0ead6907
commit
27fec3de6c
@ -1,5 +1,6 @@
|
||||
from pathlib import Path
|
||||
from typing import Iterable, Optional, Union
|
||||
import os
|
||||
|
||||
from PIL import Image
|
||||
|
||||
@ -59,18 +60,24 @@ class PictureDescriptionVlmModel(PictureDescriptionBaseModel):
|
||||
force: bool = False,
|
||||
progress: bool = False,
|
||||
) -> Path:
|
||||
from huggingface_hub import snapshot_download
|
||||
from huggingface_hub.utils import disable_progress_bars
|
||||
|
||||
if not progress:
|
||||
disable_progress_bars()
|
||||
download_path = snapshot_download(
|
||||
repo_id=repo_id,
|
||||
force_download=force,
|
||||
local_dir=local_dir,
|
||||
)
|
||||
|
||||
return Path(download_path)
|
||||
# Check if repo_id is a local path and exists
|
||||
if os.path.exists(repo_id):
|
||||
# If it exists, return the path directly
|
||||
return Path(repo_id)
|
||||
else:
|
||||
# If it doesn't exist, download the repository
|
||||
from huggingface_hub import snapshot_download
|
||||
from huggingface_hub.utils import disable_progress_bars
|
||||
|
||||
if not progress:
|
||||
disable_progress_bars()
|
||||
download_path = snapshot_download(
|
||||
repo_id=repo_id,
|
||||
force_download=force,
|
||||
local_dir=local_dir,
|
||||
)
|
||||
|
||||
return Path(download_path)
|
||||
|
||||
def _annotate_images(self, images: Iterable[Image.Image]) -> Iterable[str]:
|
||||
from transformers import GenerationConfig
|
||||
|
Loading…
Reference in New Issue
Block a user