diff --git a/docling/models/hf_vlm_models/hf_vlm_model_AutoModelForCausalLM.py b/docling/models/hf_vlm_models/hf_vlm_model_AutoModelForCausalLM.py
index b7f91e57..f26654ab 100644
--- a/docling/models/hf_vlm_models/hf_vlm_model_AutoModelForCausalLM.py
+++ b/docling/models/hf_vlm_models/hf_vlm_model_AutoModelForCausalLM.py
@@ -137,6 +137,8 @@ class HuggingFaceVlmModel_AutoModelForCausalLM(BasePageModel):
                     # hi_res_image = page.get_image(scale=2.0)  # 144dpi
                     hi_res_image = page.get_image(scale=1.0)  # 72dpi
 
+                    hi_res_image.show()
+                    
                     if hi_res_image is not None:
                         im_width, im_height = hi_res_image.size
 
@@ -195,7 +197,7 @@ class HuggingFaceVlmModel_AutoModelForCausalLM(BasePageModel):
                     # Part 1: Image Processing
                     print("\n--- IMAGE PROCESSING ---")
                     # image_url = 'https://www.ilankelman.org/stopsigns/australia.jpg'
-                    prompt = f'{user_prompt}<|image_1|>OCR this image into MarkDown?{prompt_suffix}{assistant_prompt}'
+                    prompt = f'{user_prompt}<|image_1|>Convert this image into MarkDown and only return the bare MarkDown!{prompt_suffix}{assistant_prompt}'
                     print(f'>>> Prompt\n{prompt}')
 
                     inputs = self.processor(text=prompt, images=hi_res_image, return_tensors='pt').to(self.device) #.to('cuda:0')
@@ -206,19 +208,20 @@ class HuggingFaceVlmModel_AutoModelForCausalLM(BasePageModel):
                         **inputs,
                         max_new_tokens=128,
                         generation_config=self.generation_config,
+                        num_logits_to_keep=1,
                     )
                     generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
 
-                    num_tokens = len(generated_ids[0])
+                    num_tokens = len(generate_ids[0])
                     response = self.processor.batch_decode(
                         generate_ids,
                         skip_special_tokens=True,
-                        clean_up_tokenization_spaces=False
+                        clean_up_tokenization_spaces=False,
                     )[0]
                     print(f'>>> Response\n{response}')
                     
                     _log.debug(
-                        f"Generated {num_tokens} tokens in time {generation_time:.2f} seconds."
+                        f"Generated {num_tokens} tokens."
                     )
 
                     # inference_time = time.time() - start_time