diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py
index 40947fd9..4fb2885e 100644
--- a/docling/datamodel/pipeline_options.py
+++ b/docling/datamodel/pipeline_options.py
@@ -39,6 +39,7 @@ from docling.datamodel.pipeline_options_vlm_model import (
     InferenceFramework,
     InlineVlmOptions,
     ResponseFormat,
+    TwoStageVlmOptions,
 )
 from docling.datamodel.vlm_model_specs import (
     GRANITE_VISION_OLLAMA as granite_vision_vlm_ollama_conversion_options,
@@ -276,9 +277,9 @@ class VlmPipelineOptions(PaginatedPipelineOptions):
         False  # (To be used with vlms, or other generative models)
     )
     # If True, text from backend will be used instead of generated text
-    vlm_options: Union[InlineVlmOptions, ApiVlmOptions] = (
+    vlm_options: Union[InlineVlmOptions, ApiVlmOptions, TwoStageVlmOptions] = (
         smoldocling_vlm_conversion_options
-        #SMOLDOCLING_TRANSFORMERS
+        # SMOLDOCLING_TRANSFORMERS
     )
 
 
@@ -294,9 +295,6 @@ class AsrPipelineOptions(PipelineOptions):
     artifacts_path: Optional[Union[Path, str]] = None
 
 
-
-
-    
 class PdfPipelineOptions(PaginatedPipelineOptions):
     """Options for the PDF pipeline."""
 
diff --git a/docling/datamodel/pipeline_options_asr_model.py b/docling/datamodel/pipeline_options_asr_model.py
index 12109ad1..f26aad76 100644
--- a/docling/datamodel/pipeline_options_asr_model.py
+++ b/docling/datamodel/pipeline_options_asr_model.py
@@ -7,8 +7,8 @@ from typing_extensions import deprecated
 from docling.datamodel.accelerator_options import AcceleratorDevice
 
 # from docling.datamodel.pipeline_options_vlm_model import (
-    # InferenceFramework,
-    # TransformersModelType,
+# InferenceFramework,
+# TransformersModelType,
 # )
 
 
diff --git a/docling/datamodel/pipeline_options_vlm_model.py b/docling/datamodel/pipeline_options_vlm_model.py
index 3cf2efb0..66c97ca4 100644
--- a/docling/datamodel/pipeline_options_vlm_model.py
+++ b/docling/datamodel/pipeline_options_vlm_model.py
@@ -6,6 +6,9 @@ from pydantic import AnyUrl, BaseModel
 from typing_extensions import deprecated
 
 from docling.datamodel.accelerator_options import AcceleratorDevice
+from docling.datamodel.layout_model_specs import (
+    LayoutModelConfig,
+)
 
 
 class BaseVlmOptions(BaseModel):
@@ -88,12 +91,11 @@ class ApiVlmOptions(BaseVlmOptions):
     concurrency: int = 1
     response_format: ResponseFormat
 
-from docling.datamodel.layout_model_specs import (
-    LayoutModelConfig,
-)
-    
+
 class TwoStageVlmOptions(BaseModel):
     kind: Literal["inline_two_stage_model_options"] = "inline_two_stage_model_options"
 
-    vlm_options: Union[InlineVlmOptions, ApiVlmOptions] # = SMOLDOCLING_TRANSFORMERS
-    layout_options: LayoutModelConfig # = DOCLING_LAYOUT_V2
+    response_format: ResponseFormat  # final response of the VLM
+
+    layout_options: LayoutModelConfig  # = DOCLING_LAYOUT_V2
+    vlm_options: Union[InlineVlmOptions, ApiVlmOptions]  # = SMOLDOCLING_TRANSFORMERS
diff --git a/docling/datamodel/vlm_model_specs.py b/docling/datamodel/vlm_model_specs.py
index 25815a92..8025d02f 100644
--- a/docling/datamodel/vlm_model_specs.py
+++ b/docling/datamodel/vlm_model_specs.py
@@ -143,7 +143,9 @@ GEMMA3_27B_MLX = InlineVlmOptions(
 )
 
 VLM2STAGE = TwoStageVlmOptions(
-    vlm_options=SMOLDOCLING_MLX, layout_options=DOCLING_LAYOUT_HERON
+    vlm_options=SMOLDOCLING_MLX,
+    layout_options=DOCLING_LAYOUT_HERON,
+    response_format=SMOLDOCLING_MLX.response_format,
 )
 
 
diff --git a/docling/pipeline/vlm_pipeline.py b/docling/pipeline/vlm_pipeline.py
index 0ee06efb..1c94d977 100644
--- a/docling/pipeline/vlm_pipeline.py
+++ b/docling/pipeline/vlm_pipeline.py
@@ -26,7 +26,7 @@ from docling.backend.md_backend import MarkdownDocumentBackend
 from docling.backend.pdf_backend import PdfDocumentBackend
 from docling.datamodel.base_models import InputFormat, Page
 from docling.datamodel.document import ConversionResult, InputDocument
-from docling.datamodel.pipeline_options import VlmPipelineOptions
+from docling.datamodel.pipeline_options import LayoutOptions, VlmPipelineOptions
 from docling.datamodel.pipeline_options_vlm_model import (
     ApiVlmOptions,
     InferenceFramework,
@@ -115,38 +115,47 @@ class VlmPipeline(PaginatedPipeline):
                 TwoStageVlmOptions, self.pipeline_options.vlm_options
             )
 
-            layout_options = twostagevlm_options.layout_options
-            vlm_options = twostagevlm_options.vlm_options
+            stage_1_options = twostagevlm_options.layout_options
+            stage_2_options = twostagevlm_options.vlm_options
 
             layout_model = LayoutModel(
                 artifacts_path=artifacts_path,
                 accelerator_options=pipeline_options.accelerator_options,
-                options=layout_options,
+                options=LayoutOptions(
+                    create_orphan_clusters=False, model_spec=stage_1_options
+                ),
             )
 
-            if vlm_options.inference_framework == InferenceFramework.MLX:
+            if (
+                isinstance(stage_2_options, InlineVlmOptions)
+                and stage_2_options.inference_framework == InferenceFramework.MLX
+            ):
                 vlm_model_mlx = HuggingFaceMlxModel(
                     enabled=True,  # must be always enabled for this pipeline to make sense.
                     artifacts_path=artifacts_path,
                     accelerator_options=pipeline_options.accelerator_options,
-                    vlm_options=vlm_options,
+                    vlm_options=stage_2_options,
                 )
                 self.build_pipe = [
                     TwoStageVlmModel(layout_model=layout_model, vlm_model=vlm_model_mlx)
                 ]
-            elif vlm_options.inference_framework == InferenceFramework.TRANSFORMERS:
+            elif (
+                isinstance(stage_2_options, InlineVlmOptions)
+                and stage_2_options.inference_framework
+                == InferenceFramework.TRANSFORMERS
+            ):
                 vlm_model_hf = HuggingFaceTransformersVlmModel(
                     enabled=True,  # must be always enabled for this pipeline to make sense.
                     artifacts_path=artifacts_path,
                     accelerator_options=pipeline_options.accelerator_options,
-                    vlm_options=vlm_options,
+                    vlm_options=stage_2_options,
                 )
                 self.build_pipe = [
                     TwoStageVlmModel(layout_model=layout_model, vlm_model=vlm_model_hf)
                 ]
             else:
                 raise ValueError(
-                    f"Could not instantiate the right type of VLM pipeline: {vlm_options.inference_framework}"
+                    f"Could not instantiate the right type of VLM pipeline: {stage_2_options}"
                 )
 
         self.enrichment_pipe = [