diff --git a/docling/models/hf_vlm_model.py b/docling/models/hf_vlm_model.py index 973cb374..619658d3 100644 --- a/docling/models/hf_vlm_model.py +++ b/docling/models/hf_vlm_model.py @@ -3,8 +3,6 @@ import time from pathlib import Path from typing import Iterable, List, Optional -from transformers import AutoModelForVision2Seq - from docling.datamodel.base_models import Page, VlmPrediction from docling.datamodel.document import ConversionResult from docling.datamodel.pipeline_options import ( @@ -36,9 +34,9 @@ class HuggingFaceVlmModel(BasePageModel): if self.enabled: import torch from transformers import ( # type: ignore + AutoModelForVision2Seq, AutoProcessor, BitsAndBytesConfig, - Idefics3ForConditionalGeneration, ) device = decide_device(accelerator_options.device)