diff --git a/docling/datamodel/pipeline_model_specializations.py b/docling/datamodel/pipeline_model_specializations.py
index 9a80fae4..77e6c2f2 100644
--- a/docling/datamodel/pipeline_model_specializations.py
+++ b/docling/datamodel/pipeline_model_specializations.py
@@ -84,6 +84,7 @@ smoldocling_vlm_mlx_conversion_options = HuggingFaceVlmOptions(
     response_format=ResponseFormat.DOCTAGS,
     inference_framework=InferenceFramework.MLX,
     scale=2.0,
+    temperature=0.0,
 )
 
 smoldocling_vlm_conversion_options = HuggingFaceVlmOptions(
@@ -92,6 +93,7 @@ smoldocling_vlm_conversion_options = HuggingFaceVlmOptions(
     response_format=ResponseFormat.DOCTAGS,
     inference_framework=InferenceFramework.TRANSFORMERS_AutoModelForVision2Seq,
     scale=2.0,
+    temperature=0.0,
 )
 
 # GraniteVision
@@ -101,6 +103,7 @@ granite_vision_vlm_conversion_options = HuggingFaceVlmOptions(
     response_format=ResponseFormat.MARKDOWN,
     inference_framework=InferenceFramework.TRANSFORMERS_AutoModelForVision2Seq,
     scale=2.0,
+    temperature=0.0,
 )
 
 granite_vision_vlm_ollama_conversion_options = ApiVlmOptions(
@@ -110,6 +113,7 @@ granite_vision_vlm_ollama_conversion_options = ApiVlmOptions(
     scale=1.0,
     timeout=120,
     response_format=ResponseFormat.MARKDOWN,
+    temperature=0.0,
 )
 
 # Pixtral
@@ -119,6 +123,7 @@ pixtral_12b_vlm_conversion_options = HuggingFaceVlmOptions(
     response_format=ResponseFormat.MARKDOWN,
     inference_framework=InferenceFramework.TRANSFORMERS_LlavaForConditionalGeneration,
     scale=2.0,
+    temperature=0.0,
 )
 
 pixtral_12b_vlm_mlx_conversion_options = HuggingFaceVlmOptions(
@@ -127,6 +132,7 @@ pixtral_12b_vlm_mlx_conversion_options = HuggingFaceVlmOptions(
     response_format=ResponseFormat.MARKDOWN,
     inference_framework=InferenceFramework.MLX,
     scale=2.0,
+    temperature=0.0,
 )
 
 # Phi4
@@ -135,6 +141,8 @@ phi_vlm_conversion_options = HuggingFaceVlmOptions(
     prompt="Convert this page to MarkDown. Do not miss any text and only output the bare MarkDown",
     response_format=ResponseFormat.MARKDOWN,
     inference_framework=InferenceFramework.TRANSFORMERS_AutoModelForCausalLM,
+    scale=2.0,
+    temperature=0.0,
 )
 
 # Qwen
@@ -143,4 +151,6 @@ qwen25_vl_3b_vlm_mlx_conversion_options = HuggingFaceVlmOptions(
     prompt="Convert this page to markdown. Do not miss any text and only output the bare MarkDown!",
     response_format=ResponseFormat.MARKDOWN,
     inference_framework=InferenceFramework.MLX,
+    scale=2.0,
+    temperature=0.0,
 )
diff --git a/docling/models/hf_vlm_model.py b/docling/models/hf_vlm_model.py
index 20a1c7dd..73e6f313 100644
--- a/docling/models/hf_vlm_model.py
+++ b/docling/models/hf_vlm_model.py
@@ -6,6 +6,17 @@ _log = logging.getLogger(__name__)
 
 
 class HuggingFaceVlmModel:
+
+    @staticmethod
+    def map_device_to_cpu_if_mlx(device: str) -> str:
+        if device == "mps":
+            _log.warning(
+                "Mapping mlx to cpu for AutoModelForCausalLM, use MLX framework!"
+            )
+            return "cpu"
+
+        return device
+        
     @staticmethod
     def download_models(
         repo_id: str,
diff --git a/docling/models/hf_vlm_models/hf_vlm_mlx_model.py b/docling/models/hf_vlm_models/hf_vlm_mlx_model.py
index 0d7b63f9..57abaa7e 100644
--- a/docling/models/hf_vlm_models/hf_vlm_mlx_model.py
+++ b/docling/models/hf_vlm_models/hf_vlm_mlx_model.py
@@ -29,7 +29,8 @@ class HuggingFaceMlxModel(BasePageModel):
 
         self.vlm_options = vlm_options
         self.max_tokens = vlm_options.max_new_tokens
-
+        self.temperature = vlm_options.temperature
+        
         if self.enabled:
             try:
                 from mlx_vlm import generate, load  # type: ignore
@@ -103,8 +104,9 @@ class HuggingFaceMlxModel(BasePageModel):
                         self.processor,
                         prompt,
                         [hi_res_image],
-                        max_tokens=4096,
+                        max_tokens=self.max_tokens,
                         verbose=False,
+                        temp=self.temperature,
                     ):
                         if len(token.logprobs.shape) == 1:
                             tokens.append(
diff --git a/docling/models/hf_vlm_models/hf_vlm_model_AutoModelForCausalLM.py b/docling/models/hf_vlm_models/hf_vlm_model_AutoModelForCausalLM.py
index 449764cb..213a5a28 100644
--- a/docling/models/hf_vlm_models/hf_vlm_model_AutoModelForCausalLM.py
+++ b/docling/models/hf_vlm_models/hf_vlm_model_AutoModelForCausalLM.py
@@ -42,19 +42,13 @@ class HuggingFaceVlmModel_AutoModelForCausalLM(BasePageModel):
             )
 
             self.device = decide_device(accelerator_options.device)
-
-            if self.device == "mps":
-                _log.warning(
-                    "Mapping mlx to cpu for AutoModelForCausalLM, use MLX framework!"
-                )
-                self.device = "cpu"
-
-            print("device: ", self.device)
-                
+            self.device = HuggingFaceVlmMode.map_device_to_cpu_if_mlx(self.device)
+            _log.debug(f"Available device for VLM: {self.device}")
+            
             self.use_cache = vlm_options.use_kv_cache
             self.max_new_tokens = vlm_options.max_new_tokens
+            self.temperature = vlm_options.temperature
 
-            _log.debug(f"Available device for VLM: {self.device}")
             repo_cache_folder = vlm_options.repo_id.replace("/", "--")
 
             if artifacts_path is None:
@@ -126,12 +120,6 @@ class HuggingFaceVlmModel_AutoModelForCausalLM(BasePageModel):
 
                     if hi_res_image is not None:
                         im_width, im_height = hi_res_image.size
-
-                    """
-                    if hi_res_image:
-                        if hi_res_image.mode != "RGB":
-                            hi_res_image = hi_res_image.convert("RGB")
-                    """
                     
                     # Define prompt structure
                     prompt = self.formulate_prompt()
@@ -147,9 +135,9 @@ class HuggingFaceVlmModel_AutoModelForCausalLM(BasePageModel):
                         **inputs,
                         max_new_tokens=self.max_new_tokens,
                         use_cache=self.use_cache,  # Enables KV caching which can improve performance
+                        temperature=self.temperature,
                         generation_config=self.generation_config,
                         num_logits_to_keep=1,
-                        # temperature=0.0,
                     )
                     generate_ids = generate_ids[:, inputs["input_ids"].shape[1] :]
 
@@ -162,8 +150,7 @@ class HuggingFaceVlmModel_AutoModelForCausalLM(BasePageModel):
                         clean_up_tokenization_spaces=False,
                     )[0]
 
-                    #_log.debug(
-                    print(
+                    _log.debug(
                         f"Generated {num_tokens} tokens in time {generation_time:.2f} seconds."
                     )
                     page.predictions.vlm_response = VlmPrediction(text=response, generation_time=generation_time)
diff --git a/docling/models/hf_vlm_models/hf_vlm_model_AutoModelForVision2Seq.py b/docling/models/hf_vlm_models/hf_vlm_model_AutoModelForVision2Seq.py
index 6633c842..b0c74aa8 100644
--- a/docling/models/hf_vlm_models/hf_vlm_model_AutoModelForVision2Seq.py
+++ b/docling/models/hf_vlm_models/hf_vlm_model_AutoModelForVision2Seq.py
@@ -39,8 +39,14 @@ class HuggingFaceVlmModel_AutoModelForVision2Seq(BasePageModel):
             )
 
             self.device = decide_device(accelerator_options.device)
+            self.device = HuggingFaceVlmMode.map_device_to_cpu_if_mlx(self.device)
+
             _log.debug(f"Available device for HuggingFace VLM: {self.device}")
 
+            self.use_cache = vlm_options.use_kv_cache
+            self.max_new_tokens = vlm_options.max_new_tokens
+            self.temperature = vlm_options.temperature
+            
             repo_cache_folder = vlm_options.repo_id.replace("/", "--")
 
             # PARAMETERS:
@@ -111,10 +117,12 @@ class HuggingFaceVlmModel_AutoModelForVision2Seq(BasePageModel):
                     # populate page_tags with predicted doc tags
                     page_tags = ""
 
+                    """
                     if hi_res_image:
                         if hi_res_image.mode != "RGB":
                             hi_res_image = hi_res_image.convert("RGB")
-
+                    """
+                    
                     # Define prompt structure
                     prompt = self.formulate_prompt()
 
@@ -126,7 +134,10 @@ class HuggingFaceVlmModel_AutoModelForVision2Seq(BasePageModel):
                     start_time = time.time()
                     # Call model to generate:
                     generated_ids = self.vlm_model.generate(
-                        **inputs, max_new_tokens=4096, use_cache=True
+                        **inputs,
+                        max_new_tokens=self.max_new_tokens,
+                        use_cache=self.use_cache,
+                        temperature=self.temperature,
                     )
 
                     generation_time = time.time() - start_time
diff --git a/docling/models/hf_vlm_models/hf_vlm_model_LlavaForConditionalGeneration.py b/docling/models/hf_vlm_models/hf_vlm_model_LlavaForConditionalGeneration.py
index 304f8c0f..1c286a8b 100644
--- a/docling/models/hf_vlm_models/hf_vlm_model_LlavaForConditionalGeneration.py
+++ b/docling/models/hf_vlm_models/hf_vlm_model_LlavaForConditionalGeneration.py
@@ -39,16 +39,12 @@ class HuggingFaceVlmModel_LlavaForConditionalGeneration(BasePageModel):
             )
 
             self.device = decide_device(accelerator_options.device)
-
-            if self.device == "mlx":
-                _log.warning(
-                    "Mapping mlx to cpu for LlavaForConditionalGeneration, use MLX framework!"
-                )
-                self.device = "cpu"
+            self.device = HuggingFaceVlmMode.map_device_to_cpu_if_mlx(self.device)
 
             self.use_cache = vlm_options.use_kv_cache
             self.max_new_tokens = vlm_options.max_new_tokens
-
+            self.temperature = vlm_options.temperature
+            
             _log.debug(f"Available device for VLM: {self.device}")
             repo_cache_folder = vlm_options.repo_id.replace("/", "--")
 
@@ -93,10 +89,12 @@ class HuggingFaceVlmModel_LlavaForConditionalGeneration(BasePageModel):
                     if hi_res_image is not None:
                         im_width, im_height = hi_res_image.size
 
+                    """
                     if hi_res_image:
                         if hi_res_image.mode != "RGB":
                             hi_res_image = hi_res_image.convert("RGB")
-
+                    """
+                    
                     images = [hi_res_image]
 
                     # Define prompt structure
@@ -112,9 +110,10 @@ class HuggingFaceVlmModel_LlavaForConditionalGeneration(BasePageModel):
                         **inputs,
                         max_new_tokens=self.max_new_tokens,
                         use_cache=self.use_cache,  # Enables KV caching which can improve performance
+                        temperature=self.temperature,
                     )
 
-                    num_tokens = len(generate_ids[0])
+                    #num_tokens = len(generate_ids[0])
                     generation_time = time.time() - start_time
 
                     response = self.processor.batch_decode(
@@ -125,7 +124,7 @@ class HuggingFaceVlmModel_LlavaForConditionalGeneration(BasePageModel):
 
                     page.predictions.vlm_response = VlmPrediction(
                         text=response,
-                        generated_tokens=num_tokens,
+                        #generated_tokens=num_tokens,
                         generation_time=generation_time,
                     )
 
@@ -134,7 +133,6 @@ class HuggingFaceVlmModel_LlavaForConditionalGeneration(BasePageModel):
     def formulate_prompt(self) -> str:
         """Formulate a prompt for the VLM."""
         if self.vlm_options.repo_id == "mistral-community/pixtral-12b":
-            # prompt = f"<s>[INST]{self.vlm_options.prompt}\n[IMG][/INST]"
             chat = [
                 {
                     "role": "user",
diff --git a/docs/examples/minimal_vlm_pipeline.py b/docs/examples/minimal_vlm_pipeline.py
index e240f6ce..be2afe06 100644
--- a/docs/examples/minimal_vlm_pipeline.py
+++ b/docs/examples/minimal_vlm_pipeline.py
@@ -187,9 +187,9 @@ if __name__ == "__main__":
     rows = []
     for vlm_options in [
             # smoldocling_vlm_conversion_options, \
-            # smoldocling_vlm_mlx_conversion_options, \
+            smoldocling_vlm_mlx_conversion_options, \
             # granite_vision_vlm_conversion_options, \
-            phi_vlm_conversion_options, \
+            # phi_vlm_conversion_options, \
             # qwen25_vl_3b_vlm_mlx_conversion_options, \
             # pixtral_12b_vlm_mlx_conversion_options,
             # pixtral_12b_vlm_conversion_options,