diff --git a/docling/cli/main.py b/docling/cli/main.py
index cd2f040b..88edde0b 100644
--- a/docling/cli/main.py
+++ b/docling/cli/main.py
@@ -29,13 +29,6 @@ from docling.datamodel.base_models import (
     OutputFormat,
 )
 from docling.datamodel.document import ConversionResult
-from docling.datamodel.pipeline_model_specializations import (
-    VlmModelType,
-    granite_vision_vlm_conversion_options,
-    granite_vision_vlm_ollama_conversion_options,
-    smoldocling_vlm_conversion_options,
-    smoldocling_vlm_mlx_conversion_options,
-)
 from docling.datamodel.pipeline_options import (
     AcceleratorDevice,
     AcceleratorOptions,
@@ -48,6 +41,13 @@ from docling.datamodel.pipeline_options import (
     TableFormerMode,
     VlmPipelineOptions,
 )
+from docling.datamodel.pipeline_vlm_model_spec import (
+    GRANITE_VISION_OLLAMA,
+    GRANITE_VISION_TRANSFORMERS,
+    SMOLDOCLING_MLX,
+    SMOLDOCLING_TRANSFORMERS,
+    VlmModelType,
+)
 from docling.datamodel.settings import settings
 from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
 from docling.models.factories import get_ocr_factory
@@ -549,20 +549,16 @@ def convert(  # noqa: C901
             )
 
             if vlm_model == VlmModelType.GRANITE_VISION:
-                pipeline_options.vlm_options = granite_vision_vlm_conversion_options
+                pipeline_options.vlm_options = GRANITE_VISION_TRANSFORMERS
             elif vlm_model == VlmModelType.GRANITE_VISION_OLLAMA:
-                pipeline_options.vlm_options = (
-                    granite_vision_vlm_ollama_conversion_options
-                )
+                pipeline_options.vlm_options = GRANITE_VISION_OLLAMA
             elif vlm_model == VlmModelType.SMOLDOCLING:
-                pipeline_options.vlm_options = smoldocling_vlm_conversion_options
+                pipeline_options.vlm_options = SMOLDOCLING_TRANSFORMERS
                 if sys.platform == "darwin":
                     try:
                         import mlx_vlm
 
-                        pipeline_options.vlm_options = (
-                            smoldocling_vlm_mlx_conversion_options
-                        )
+                        pipeline_options.vlm_options = SMOLDOCLING_MLX
                     except ImportError:
                         _log.warning(
                             "To run SmolDocling faster, please install mlx-vlm:\n"
diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py
index 79d762b3..133a31fd 100644
--- a/docling/datamodel/pipeline_options.py
+++ b/docling/datamodel/pipeline_options.py
@@ -16,10 +16,13 @@ from pydantic import (
 from pydantic_settings import BaseSettings, SettingsConfigDict
 from typing_extensions import deprecated
 
-from docling.datamodel.pipeline_model_specializations import (
-    ApiVlmOptions,
-    HuggingFaceVlmOptions,
-    smoldocling_vlm_conversion_options,
+# Import the following for backwards compatibility
+from docling.datamodel.pipeline_vlm_model_spec import (
+    GRANITE_VISION_OLLAMA as granite_vision_vlm_ollama_conversion_options,
+    GRANITE_VISION_TRANSFORMERS as granite_vision_vlm_conversion_options,
+    SMOLDOCLING_MLX as smoldocling_vlm_mlx_conversion_options,
+    SMOLDOCLING_TRANSFORMERS as smoldocling_vlm_conversion_options,
+    VlmModelType,
 )
 
 _log = logging.getLogger(__name__)
@@ -302,6 +305,65 @@ class PaginatedPipelineOptions(PipelineOptions):
     generate_picture_images: bool = False
 
 
+class BaseVlmOptions(BaseModel):
+    kind: str
+    prompt: str
+
+
+class ResponseFormat(str, Enum):
+    DOCTAGS = "doctags"
+    MARKDOWN = "markdown"
+    HTML = "html"
+
+
+class InferenceFramework(str, Enum):
+    MLX = "mlx"
+    TRANSFORMERS = "transformers"
+    TRANSFORMERS_AutoModelForVision2Seq = "transformers-AutoModelForVision2Seq"
+    TRANSFORMERS_AutoModelForCausalLM = "transformers-AutoModelForCausalLM"
+    TRANSFORMERS_LlavaForConditionalGeneration = (
+        "transformers-LlavaForConditionalGeneration"
+    )
+
+
+class HuggingFaceVlmOptions(BaseVlmOptions):
+    kind: Literal["hf_model_options"] = "hf_model_options"
+
+    repo_id: str
+    load_in_8bit: bool = True
+    llm_int8_threshold: float = 6.0
+    quantized: bool = False
+
+    inference_framework: InferenceFramework
+    response_format: ResponseFormat
+
+    scale: float = 2.0
+
+    temperature: float = 0.0
+    stop_strings: list[str] = []
+
+    use_kv_cache: bool = True
+    max_new_tokens: int = 4096
+
+    @property
+    def repo_cache_folder(self) -> str:
+        return self.repo_id.replace("/", "--")
+
+
+class ApiVlmOptions(BaseVlmOptions):
+    kind: Literal["api_model_options"] = "api_model_options"
+
+    url: AnyUrl = AnyUrl(
+        "http://localhost:11434/v1/chat/completions"
+    )  # Default to ollama
+    headers: Dict[str, str] = {}
+    params: Dict[str, Any] = {}
+    scale: float = 2.0
+    timeout: float = 60
+    concurrency: int = 1
+    response_format: ResponseFormat
+
+
 class VlmPipelineOptions(PaginatedPipelineOptions):
     generate_page_images: bool = True
     force_backend_text: bool = (
diff --git a/docling/datamodel/pipeline_model_specializations.py b/docling/datamodel/pipeline_vlm_model_spec.py
similarity index 59%
rename from docling/datamodel/pipeline_model_specializations.py
rename to docling/datamodel/pipeline_vlm_model_spec.py
index 33d968ff..6547600d 100644
--- a/docling/datamodel/pipeline_model_specializations.py
+++ b/docling/datamodel/pipeline_vlm_model_spec.py
@@ -1,83 +1,22 @@
 import logging
 from enum import Enum
-from pathlib import Path
-from typing import Any, ClassVar, Dict, List, Literal, Optional, Union
 
 from pydantic import (
     AnyUrl,
-    BaseModel,
+)
+
+from docling.datamodel.pipeline_options import (
+    ApiVlmOptions,
+    HuggingFaceVlmOptions,
+    InferenceFramework,
+    ResponseFormat,
 )
 
 _log = logging.getLogger(__name__)
 
 
-class BaseVlmOptions(BaseModel):
-    kind: str
-    prompt: str
-
-
-class ResponseFormat(str, Enum):
-    DOCTAGS = "doctags"
-    MARKDOWN = "markdown"
-    HTML = "html"
-
-
-class InferenceFramework(str, Enum):
-    MLX = "mlx"
-    TRANSFORMERS = "transformers"
-    TRANSFORMERS_AutoModelForVision2Seq = "transformers-AutoModelForVision2Seq"
-    TRANSFORMERS_AutoModelForCausalLM = "transformers-AutoModelForCausalLM"
-    TRANSFORMERS_LlavaForConditionalGeneration = (
-        "transformers-LlavaForConditionalGeneration"
-    )
-
-
-class HuggingFaceVlmOptions(BaseVlmOptions):
-    kind: Literal["hf_model_options"] = "hf_model_options"
-
-    repo_id: str
-    load_in_8bit: bool = True
-    llm_int8_threshold: float = 6.0
-    quantized: bool = False
-
-    inference_framework: InferenceFramework
-    response_format: ResponseFormat
-
-    scale: float = 2.0
-
-    temperature: float = 0.0
-    stop_strings: list[str] = []
-
-    use_kv_cache: bool = True
-    max_new_tokens: int = 4096
-
-    @property
-    def repo_cache_folder(self) -> str:
-        return self.repo_id.replace("/", "--")
-
-
-class ApiVlmOptions(BaseVlmOptions):
-    kind: Literal["api_model_options"] = "api_model_options"
-
-    url: AnyUrl = AnyUrl(
-        "http://localhost:11434/v1/chat/completions"
-    )  # Default to ollama
-    headers: Dict[str, str] = {}
-    params: Dict[str, Any] = {}
-    scale: float = 2.0
-    timeout: float = 60
-    concurrency: int = 1
-    response_format: ResponseFormat
-
-
-class VlmModelType(str, Enum):
-    SMOLDOCLING = "smoldocling"
-    GRANITE_VISION = "granite_vision"
-    GRANITE_VISION_OLLAMA = "granite_vision_ollama"
-
-
 # SmolDocling
-smoldocling_vlm_mlx_conversion_options = HuggingFaceVlmOptions(
+SMOLDOCLING_MLX = HuggingFaceVlmOptions(
     repo_id="ds4sd/SmolDocling-256M-preview-mlx-bf16",
     prompt="Convert this page to docling.",
     response_format=ResponseFormat.DOCTAGS,
@@ -86,7 +25,7 @@ smoldocling_vlm_mlx_conversion_options = HuggingFaceVlmOptions(
     temperature=0.0,
 )
 
-smoldocling_vlm_conversion_options = HuggingFaceVlmOptions(
+SMOLDOCLING_TRANSFORMERS = HuggingFaceVlmOptions(
     repo_id="ds4sd/SmolDocling-256M-preview",
     prompt="Convert this page to docling.",
     response_format=ResponseFormat.DOCTAGS,
@@ -96,7 +35,7 @@ smoldocling_vlm_conversion_options = HuggingFaceVlmOptions(
 )
 
 # GraniteVision
-granite_vision_vlm_conversion_options = HuggingFaceVlmOptions(
+GRANITE_VISION_TRANSFORMERS = HuggingFaceVlmOptions(
     repo_id="ibm-granite/granite-vision-3.2-2b",
     prompt="Convert this page to markdown. Do not miss any text and only output the bare MarkDown!",
     response_format=ResponseFormat.MARKDOWN,
@@ -105,7 +44,7 @@ granite_vision_vlm_conversion_options = HuggingFaceVlmOptions(
     temperature=0.0,
 )
 
-granite_vision_vlm_ollama_conversion_options = ApiVlmOptions(
+GRANITE_VISION_OLLAMA = ApiVlmOptions(
     url=AnyUrl("http://localhost:11434/v1/chat/completions"),
     params={"model": "granite3.2-vision:2b"},
     prompt="Convert this page to markdown. Do not miss any text and only output the bare MarkDown!",
@@ -116,7 +55,7 @@ granite_vision_vlm_ollama_conversion_options = ApiVlmOptions(
 )
 
 # Pixtral
-pixtral_12b_vlm_conversion_options = HuggingFaceVlmOptions(
+PIXTRAL_12B_TRANSFORMERS = HuggingFaceVlmOptions(
     repo_id="mistral-community/pixtral-12b",
     prompt="Convert this page to markdown. Do not miss any text and only output the bare markdown!",
     response_format=ResponseFormat.MARKDOWN,
@@ -125,7 +64,7 @@ pixtral_12b_vlm_conversion_options = HuggingFaceVlmOptions(
     temperature=0.0,
 )
 
-pixtral_12b_vlm_mlx_conversion_options = HuggingFaceVlmOptions(
+PIXTRAL_12B_MLX = HuggingFaceVlmOptions(
     repo_id="mlx-community/pixtral-12b-bf16",
     prompt="Convert this page to markdown. Do not miss any text and only output the bare markdown!",
     response_format=ResponseFormat.MARKDOWN,
@@ -135,7 +74,7 @@ pixtral_12b_vlm_mlx_conversion_options = HuggingFaceVlmOptions(
 )
 
 # Phi4
-phi_vlm_conversion_options = HuggingFaceVlmOptions(
+PHI4_TRANSFORMERS = HuggingFaceVlmOptions(
     repo_id="microsoft/Phi-4-multimodal-instruct",
     prompt="Convert this page to MarkDown. Do not miss any text and only output the bare markdown",
     response_format=ResponseFormat.MARKDOWN,
@@ -145,7 +84,7 @@ phi_vlm_conversion_options = HuggingFaceVlmOptions(
 )
 
 # Qwen
-qwen25_vl_3b_vlm_mlx_conversion_options = HuggingFaceVlmOptions(
+QWEN25_VL_3B_MLX = HuggingFaceVlmOptions(
     repo_id="mlx-community/Qwen2.5-VL-3B-Instruct-bf16",
     prompt="Convert this page to markdown. Do not miss any text and only output the bare markdown!",
     response_format=ResponseFormat.MARKDOWN,
@@ -155,7 +94,7 @@ qwen25_vl_3b_vlm_mlx_conversion_options = HuggingFaceVlmOptions(
 )
 
 # Gemma-3
-gemma_3_12b_mlx_conversion_options = HuggingFaceVlmOptions(
+GEMMA3_12B_MLX = HuggingFaceVlmOptions(
     repo_id="mlx-community/gemma-3-12b-it-bf16",
     prompt="Convert this page to markdown. Do not miss any text and only output the bare markdown!",
     response_format=ResponseFormat.MARKDOWN,
@@ -164,7 +103,7 @@ gemma_3_12b_mlx_conversion_options = HuggingFaceVlmOptions(
     temperature=0.0,
 )
 
-gemma_3_27b_mlx_conversion_options = HuggingFaceVlmOptions(
+GEMMA3_27B_MLX = HuggingFaceVlmOptions(
     repo_id="mlx-community/gemma-3-27b-it-bf16",
     prompt="Convert this page to markdown. Do not miss any text and only output the bare markdown!",
     response_format=ResponseFormat.MARKDOWN,
@@ -172,3 +111,9 @@ gemma_3_27b_mlx_conversion_options = HuggingFaceVlmOptions(
     scale=2.0,
     temperature=0.0,
 )
+
+
+class VlmModelType(str, Enum):
+    SMOLDOCLING = "smoldocling"
+    GRANITE_VISION = "granite_vision"
+    GRANITE_VISION_OLLAMA = "granite_vision_ollama"
diff --git a/docling/pipeline/vlm_pipeline.py b/docling/pipeline/vlm_pipeline.py
index 8acd471c..0c6237e1 100644
--- a/docling/pipeline/vlm_pipeline.py
+++ b/docling/pipeline/vlm_pipeline.py
@@ -27,13 +27,11 @@ from docling.backend.md_backend import MarkdownDocumentBackend
 from docling.backend.pdf_backend import PdfDocumentBackend
 from docling.datamodel.base_models import InputFormat, Page
 from docling.datamodel.document import ConversionResult, InputDocument
-from docling.datamodel.pipeline_model_specializations import (
+from docling.datamodel.pipeline_options import (
     ApiVlmOptions,
     HuggingFaceVlmOptions,
     InferenceFramework,
     ResponseFormat,
-)
-from docling.datamodel.pipeline_options import (
     VlmPipelineOptions,
 )
 from docling.datamodel.settings import settings
diff --git a/docling/utils/model_downloader.py b/docling/utils/model_downloader.py
index e2b4b194..585f22a6 100644
--- a/docling/utils/model_downloader.py
+++ b/docling/utils/model_downloader.py
@@ -2,14 +2,14 @@ import logging
 from pathlib import Path
 from typing import Optional
 
-from docling.datamodel.pipeline_model_specializations import (
-    smoldocling_vlm_conversion_options,
-    smoldocling_vlm_mlx_conversion_options,
-)
 from docling.datamodel.pipeline_options import (
     granite_picture_description,
     smolvlm_picture_description,
 )
+from docling.datamodel.pipeline_vlm_model_spec import (
+    SMOLDOCLING_MLX,
+    SMOLDOCLING_TRANSFORMERS,
+)
 from docling.datamodel.settings import settings
 from docling.models.code_formula_model import CodeFormulaModel
 from docling.models.document_picture_classifier import DocumentPictureClassifier
@@ -87,8 +87,8 @@ def download_models(
     if with_smoldocling:
         _log.info("Downloading SmolDocling model...")
         HuggingFaceVlmModel.download_models(
-            repo_id=smoldocling_vlm_conversion_options.repo_id,
-            local_dir=output_dir / smoldocling_vlm_conversion_options.repo_cache_folder,
+            repo_id=SMOLDOCLING_TRANSFORMERS.repo_id,
+            local_dir=output_dir / SMOLDOCLING_TRANSFORMERS.repo_cache_folder,
             force=force,
             progress=progress,
         )
@@ -96,9 +96,8 @@ def download_models(
     if with_smoldocling_mlx:
         _log.info("Downloading SmolDocling MLX model...")
         HuggingFaceVlmModel.download_models(
-            repo_id=smoldocling_vlm_mlx_conversion_options.repo_id,
-            local_dir=output_dir
-            / smoldocling_vlm_mlx_conversion_options.repo_cache_folder,
+            repo_id=SMOLDOCLING_MLX.repo_id,
+            local_dir=output_dir / SMOLDOCLING_MLX.repo_cache_folder,
             force=force,
             progress=progress,
         )
diff --git a/docs/examples/compare_vlm_models.py b/docs/examples/compare_vlm_models.py
index 2d8915de..457c7d58 100644
--- a/docs/examples/compare_vlm_models.py
+++ b/docs/examples/compare_vlm_models.py
@@ -13,20 +13,20 @@ from docling_core.types.doc.document import DEFAULT_EXPORT_LABELS
 from tabulate import tabulate
 
 from docling.datamodel.base_models import InputFormat
-from docling.datamodel.pipeline_model_specializations import (
-    gemma_3_12b_mlx_conversion_options,
-    granite_vision_vlm_conversion_options,
-    granite_vision_vlm_ollama_conversion_options,
-    phi_vlm_conversion_options,
-    pixtral_12b_vlm_conversion_options,
-    pixtral_12b_vlm_mlx_conversion_options,
-    qwen25_vl_3b_vlm_mlx_conversion_options,
-    smoldocling_vlm_conversion_options,
-    smoldocling_vlm_mlx_conversion_options,
-)
 from docling.datamodel.pipeline_options import (
     VlmPipelineOptions,
 )
+from docling.datamodel.pipeline_vlm_model_spec import (
+    GEMMA3_12B_MLX,
+    GRANITE_VISION_OLLAMA,
+    GRANITE_VISION_TRANSFORMERS,
+    PHI4_TRANSFORMERS,
+    PIXTRAL_12B_MLX,
+    PIXTRAL_12B_TRANSFORMERS,
+    QWEN25_VL_3B_MLX,
+    SMOLDOCLING_MLX,
+    SMOLDOCLING_TRANSFORMERS,
+)
 from docling.document_converter import DocumentConverter, PdfFormatOption
 from docling.pipeline.vlm_pipeline import VlmPipeline
 
@@ -120,16 +120,16 @@ if __name__ == "__main__":
     rows = []
     for vlm_options in [
         ## DocTags / SmolDocling models
-        smoldocling_vlm_conversion_options,
-        # smoldocling_vlm_mlx_conversion_options,
+        SMOLDOCLING_TRANSFORMERS,
+        SMOLDOCLING_MLX,
         ## Markdown models (using MLX framework)
-        # qwen25_vl_3b_vlm_mlx_conversion_options,
-        # pixtral_12b_vlm_mlx_conversion_options,
-        # gemma_3_12b_mlx_conversion_options,
+        QWEN25_VL_3B_MLX,
+        PIXTRAL_12B_MLX,
+        GEMMA3_12B_MLX,
         ## Markdown models (using Transformers framework)
-        # granite_vision_vlm_conversion_options,
-        phi_vlm_conversion_options,
-        pixtral_12b_vlm_conversion_options,
+        GRANITE_VISION_TRANSFORMERS,
+        PHI4_TRANSFORMERS,
+        PIXTRAL_12B_TRANSFORMERS,
     ]:
         pipeline_options.vlm_options = vlm_options