From 0961cda5fb40fb848c9df0fe13bc224c956e1e46 Mon Sep 17 00:00:00 2001 From: Zach Cox Date: Tue, 29 Apr 2025 19:33:58 -0400 Subject: [PATCH] fix: enable use_cuda_flash_attention2 for PictureDescriptionVlmModel Signed-off-by: Zach Cox --- docling/models/picture_description_vlm_model.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docling/models/picture_description_vlm_model.py b/docling/models/picture_description_vlm_model.py index 374f575d..679e80c2 100644 --- a/docling/models/picture_description_vlm_model.py +++ b/docling/models/picture_description_vlm_model.py @@ -57,7 +57,10 @@ class PictureDescriptionVlmModel(PictureDescriptionBaseModel): artifacts_path, torch_dtype=torch.bfloat16, _attn_implementation=( - "flash_attention_2" if self.device.startswith("cuda") else "eager" + "flash_attention_2" + if self.device.startswith("cuda") + and accelerator_options.cuda_use_flash_attention2 + else "eager" ), ).to(self.device)