diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py
index c950bf0b..3ee3702b 100644
--- a/docling/datamodel/pipeline_options.py
+++ b/docling/datamodel/pipeline_options.py
@@ -317,8 +317,7 @@ smoldocling_vlm_conversion_options = HuggingFaceVlmOptions(
 
 granite_vision_vlm_conversion_options = HuggingFaceVlmOptions(
     repo_id="ibm-granite/granite-vision-3.1-2b-preview",
-    # prompt="OCR the full page to markdown.",
-    prompt="OCR this image.",
+    prompt="OCR the full page to markdown.",
     response_format=ResponseFormat.MARKDOWN,
     inference_framework=InferenceFramework.TRANSFORMERS_AutoModelForVision2Seq,
 )
diff --git a/docling/models/hf_vlm_model.py b/docling/models/hf_vlm_model.py
index 38300add..64023545 100644
--- a/docling/models/hf_vlm_model.py
+++ b/docling/models/hf_vlm_model.py
@@ -18,6 +18,8 @@ _log = logging.getLogger(__name__)
 
 
 class HuggingFaceVlmModel(BasePageModel):
+
+    """
     def __init__(
         self,
         enabled: bool,
@@ -89,7 +91,8 @@ class HuggingFaceVlmModel(BasePageModel):
                     ),
                     # trust_remote_code=True,
                 )  # .to(self.device)
-
+    """
+    
     @staticmethod
     def download_models(
         repo_id: str,
@@ -111,6 +114,7 @@ class HuggingFaceVlmModel(BasePageModel):
 
         return Path(download_path)
 
+    """
     def __call__(
         self, conv_res: ConversionResult, page_batch: Iterable[Page]
     ) -> Iterable[Page]:
@@ -185,3 +189,4 @@ class HuggingFaceVlmModel(BasePageModel):
                     page.predictions.vlm_response = VlmPrediction(text=page_tags)
 
                 yield page
+    """
diff --git a/docling/models/hf_vlm_models/hf_vlm_model_AutoModelForCausalLM.py b/docling/models/hf_vlm_models/hf_vlm_model_AutoModelForCausalLM.py
index a64288cd..692b77e6 100644
--- a/docling/models/hf_vlm_models/hf_vlm_model_AutoModelForCausalLM.py
+++ b/docling/models/hf_vlm_models/hf_vlm_model_AutoModelForCausalLM.py
@@ -42,9 +42,9 @@ class HuggingFaceVlmModel_AutoModelForCausalLM(BasePageModel):
             )
 
             self.device = decide_device(accelerator_options.device)
-            self.device = "cpu"  # device
+            self.device = "cpu" # FIXME
 
-            _log.debug(f"Available device for HuggingFace VLM: {self.device}")
+            _log.debug(f"Available device for VLM: {self.device}")
             repo_cache_folder = vlm_options.repo_id.replace("/", "--")
 
             # PARAMETERS:
@@ -154,6 +154,7 @@ class HuggingFaceVlmModel_AutoModelForCausalLM(BasePageModel):
                     ).to(self.device)
 
                     # Generate response
+                    start_time = time.time()
                     generate_ids = self.vlm_model.generate(
                         **inputs,
                         max_new_tokens=128,
@@ -162,13 +163,19 @@ class HuggingFaceVlmModel_AutoModelForCausalLM(BasePageModel):
                     )
                     generate_ids = generate_ids[:, inputs["input_ids"].shape[1] :]
 
-                    # num_tokens = len(generate_ids[0])
+                    num_tokens = len(generate_ids[0])
+                    generation_time = time.time() - start_time
+                    
                     response = self.processor.batch_decode(
                         generate_ids,
                         skip_special_tokens=True,
                         clean_up_tokenization_spaces=False,
                     )[0]
 
+                    _log.debug(
+                        f"Generated {num_tokens} tokens in time {generation_time:.2f} seconds."
+                    )
+                    
                     # inference_time = time.time() - start_time
                     # tokens_per_second = num_tokens / generation_time
                     # print("")
diff --git a/docling/pipeline/vlm_pipeline.py b/docling/pipeline/vlm_pipeline.py
index 660ea3f4..5bfb82b2 100644
--- a/docling/pipeline/vlm_pipeline.py
+++ b/docling/pipeline/vlm_pipeline.py
@@ -109,10 +109,10 @@ class VlmPipeline(PaginatedPipeline):
                 ]
             else:
                 _log.warning(
-                    "falling back to HuggingFaceVlmModel (AutoModelForVision2Seq) pipeline"
+                    "falling back to HuggingFaceVlmModel_AutoModelForVision2Seq pipeline"
                 )
                 self.build_pipe = [
-                    HuggingFaceVlmModel(
+                    HuggingFaceVlmModel_AutoModelForVision2Seq(
                         enabled=True,  # must be always enabled for this pipeline to make sense.
                         artifacts_path=artifacts_path,
                         accelerator_options=pipeline_options.accelerator_options,