diff --git a/docling/datamodel/vlm_model_specs.py b/docling/datamodel/vlm_model_specs.py
index d09e0a81..18be529e 100644
--- a/docling/datamodel/vlm_model_specs.py
+++ b/docling/datamodel/vlm_model_specs.py
@@ -65,7 +65,7 @@ SMOLDOCLING_VLLM = InlineVlmOptions(
 SMOLVLM500_TRANSFORMERS = InlineVlmOptions(
     repo_id="HuggingFaceTB/SmolVLM-500M-Instruct",
     prompt="Transcribe this image to plain text.",
-    response_format=ResponseFormat.DOCTAGS,
+    response_format=ResponseFormat.PLAINTEXT,
     inference_framework=InferenceFramework.TRANSFORMERS,
     transformers_model_type=TransformersModelType.AUTOMODEL_IMAGETEXTTOTEXT,
     supported_devices=[
diff --git a/docling/pipeline/threaded_multistage_vlm_pipeline.py b/docling/pipeline/threaded_multistage_vlm_pipeline.py
index a9dd9e64..3a967250 100644
--- a/docling/pipeline/threaded_multistage_vlm_pipeline.py
+++ b/docling/pipeline/threaded_multistage_vlm_pipeline.py
@@ -161,7 +161,7 @@ class ThreadedMultiStageVlmPipelineOptions(PaginatedPipelineOptions):
         # text_opts = DOLPHIN_TRANSFORMERS.model_copy()
         # text_opts.prompt = "<s>Read text in the image. <Answer/>"
 
-        base_model = SMOLVLM500_TRANSFORMERS
+        base_model = SMOLVLM500_MLX
 
         text_opts = base_model.model_copy()
         # text_opts.prompt = "Convert this page to docling."