From fa925741b6dc00c7bd2806c62cb75cb539649c9f Mon Sep 17 00:00:00 2001 From: Panos Vagenas <35837085+vagenas@users.noreply.github.com> Date: Wed, 5 Nov 2025 21:23:12 +0100 Subject: [PATCH] fix: temporarily pin NuExtract to working revision (#2588) * fix: temporarily pin NuExtract revision NuExtract rev 489efed was causing MPS errors Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> * Revise revision comment for NuExtract transformer Updated revision comment for NU_EXTRACT_2B_TRANSFORMERS. Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> * pass revision to model download Signed-off-by: Panos Vagenas --------- Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> Signed-off-by: Panos Vagenas --- docling/datamodel/vlm_model_specs.py | 1 + .../models/vlm_models_inline/nuextract_transformers_model.py | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/docling/datamodel/vlm_model_specs.py b/docling/datamodel/vlm_model_specs.py index 54d0c3e9..d008f58c 100644 --- a/docling/datamodel/vlm_model_specs.py +++ b/docling/datamodel/vlm_model_specs.py @@ -287,6 +287,7 @@ DOLPHIN_TRANSFORMERS = InlineVlmOptions( # NuExtract NU_EXTRACT_2B_TRANSFORMERS = InlineVlmOptions( repo_id="numind/NuExtract-2.0-2B", + revision="fe5b2f0b63b81150721435a3ca1129a75c59c74e", # 489efed leads to MPS issues prompt="", # This won't be used, template is passed separately torch_dtype="bfloat16", inference_framework=InferenceFramework.TRANSFORMERS, diff --git a/docling/models/vlm_models_inline/nuextract_transformers_model.py b/docling/models/vlm_models_inline/nuextract_transformers_model.py index 194a1d9d..3fe39510 100644 --- a/docling/models/vlm_models_inline/nuextract_transformers_model.py +++ b/docling/models/vlm_models_inline/nuextract_transformers_model.py @@ -131,7 +131,10 @@ class NuExtractTransformersModel(BaseVlmModel, HuggingFaceModelDownloadMixin): repo_cache_folder = vlm_options.repo_id.replace("/", "--") if artifacts_path is None: - artifacts_path = self.download_models(self.vlm_options.repo_id) + artifacts_path = self.download_models( + repo_id=self.vlm_options.repo_id, + revision=self.vlm_options.revision, + ) elif (artifacts_path / repo_cache_folder).exists(): artifacts_path = artifacts_path / repo_cache_folder