diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 00000000..85260753
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,3 @@
+tests/data/** linguist-vendored
+tests/data_scanned/** linguist-vendored
+docs/** linguist-vendored
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
index 362380d5..f18fc88c 100644
--- a/.github/workflows/checks.yml
+++ b/.github/workflows/checks.yml
@@ -60,7 +60,7 @@ jobs:
         run: |
           for file in docs/examples/*.py; do
             # Skip batch_convert.py
-            if [[ "$(basename "$file")" =~ ^(batch_convert|minimal_vlm_pipeline|minimal|export_multimodal|custom_convert|develop_picture_enrichment|rapidocr_with_custom_models|offline_convert|pictures_description|pictures_description_api|vlm_pipeline_api_model).py ]]; then
+            if [[ "$(basename "$file")" =~ ^(batch_convert|compare_vlm_models|minimal|minimal_vlm_pipeline|export_multimodal|custom_convert|develop_picture_enrichment|rapidocr_with_custom_models|offline_convert|pictures_description|pictures_description_api|vlm_pipeline_api_model).py ]]; then
                 echo "Skipping $file"
                 continue
             fi
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9678243c..040a3b55 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,18 @@
+## [v2.35.0](https://github.com/docling-project/docling/releases/tag/v2.35.0) - 2025-06-02
+
+### Feature
+
+* Add visualization of bbox on page with html export. ([#1663](https://github.com/docling-project/docling/issues/1663)) ([`b356b33`](https://github.com/docling-project/docling/commit/b356b33059bdeeaf1584d9d189cbf1c4832e367c))
+
+### Fix
+
+* Guess HTML content starting with script tag ([#1673](https://github.com/docling-project/docling/issues/1673)) ([`984cb13`](https://github.com/docling-project/docling/commit/984cb137f6a8ae2f3a63623add6c474d97ef8739))
+* UnicodeDecodeError: 'utf-8' codec can't decode byte 0xd0 in position 0: invalid continuation byte ([#1665](https://github.com/docling-project/docling/issues/1665)) ([`51d3450`](https://github.com/docling-project/docling/commit/51d34509156e2dbec9e697276681d59f9ca7e020))
+
+### Documentation
+
+* Fix typo in index.md ([#1676](https://github.com/docling-project/docling/issues/1676)) ([`11ca4f7`](https://github.com/docling-project/docling/commit/11ca4f7a7bd8068bee472510dd71f1cd58f86f17))
+
 ## [v2.34.0](https://github.com/docling-project/docling/releases/tag/v2.34.0) - 2025-05-22
 
 ### Feature
diff --git a/README.md b/README.md
index 3911f794..309e1030 100644
--- a/README.md
+++ b/README.md
@@ -35,7 +35,7 @@ Docling simplifies document processing, parsing diverse formats — including ad
 * 🔒 Local execution capabilities for sensitive data and air-gapped environments
 * 🤖 Plug-and-play [integrations][integrations] incl. LangChain, LlamaIndex, Crew AI & Haystack for agentic AI
 * 🔍 Extensive OCR support for scanned PDFs and images
-* 🥚 Support of Visual Language Models ([SmolDocling](https://huggingface.co/ds4sd/SmolDocling-256M-preview)) 🆕
+* 🥚 Support of several Visual Language Models ([SmolDocling](https://huggingface.co/ds4sd/SmolDocling-256M-preview))
 * 💻 Simple and convenient CLI
 
 ### Coming soon
diff --git a/docling/cli/main.py b/docling/cli/main.py
index 98a4c8d7..083f53b2 100644
--- a/docling/cli/main.py
+++ b/docling/cli/main.py
@@ -12,6 +12,12 @@ from typing import Annotated, Dict, List, Optional, Type
 
 import rich.table
 import typer
+from docling_core.transforms.serializer.html import (
+    HTMLDocSerializer,
+    HTMLOutputStyle,
+    HTMLParams,
+)
+from docling_core.transforms.visualizer.layout_visualizer import LayoutVisualizer
 from docling_core.types.doc import ImageRefMode
 from docling_core.utils.file import resolve_source_to_path
 from pydantic import TypeAdapter
@@ -22,6 +28,7 @@ from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBacke
 from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
 from docling.backend.pdf_backend import PdfDocumentBackend
 from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
+from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
 from docling.datamodel.base_models import (
     ConversionStatus,
     FormatToExtensions,
@@ -30,8 +37,6 @@ from docling.datamodel.base_models import (
 )
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import (
-    AcceleratorDevice,
-    AcceleratorOptions,
     EasyOcrOptions,
     OcrOptions,
     PaginatedPipelineOptions,
@@ -39,14 +44,16 @@ from docling.datamodel.pipeline_options import (
     PdfPipeline,
     PdfPipelineOptions,
     TableFormerMode,
-    VlmModelType,
     VlmPipelineOptions,
-    granite_vision_vlm_conversion_options,
-    granite_vision_vlm_ollama_conversion_options,
-    smoldocling_vlm_conversion_options,
-    smoldocling_vlm_mlx_conversion_options,
 )
 from docling.datamodel.settings import settings
+from docling.datamodel.vlm_model_specs import (
+    GRANITE_VISION_OLLAMA,
+    GRANITE_VISION_TRANSFORMERS,
+    SMOLDOCLING_MLX,
+    SMOLDOCLING_TRANSFORMERS,
+    VlmModelType,
+)
 from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
 from docling.models.factories import get_ocr_factory
 from docling.pipeline.vlm_pipeline import VlmPipeline
@@ -156,6 +163,7 @@ def export_documents(
     export_json: bool,
     export_html: bool,
     export_html_split_page: bool,
+    show_layout: bool,
     export_md: bool,
     export_txt: bool,
     export_doctags: bool,
@@ -189,9 +197,27 @@ def export_documents(
             if export_html_split_page:
                 fname = output_dir / f"{doc_filename}.html"
                 _log.info(f"writing HTML output to {fname}")
-                conv_res.document.save_as_html(
-                    filename=fname, image_mode=image_export_mode, split_page_view=True
-                )
+                if show_layout:
+                    ser = HTMLDocSerializer(
+                        doc=conv_res.document,
+                        params=HTMLParams(
+                            image_mode=image_export_mode,
+                            output_style=HTMLOutputStyle.SPLIT_PAGE,
+                        ),
+                    )
+                    visualizer = LayoutVisualizer()
+                    visualizer.params.show_label = False
+                    ser_res = ser.serialize(
+                        visualizer=visualizer,
+                    )
+                    with open(fname, "w") as fw:
+                        fw.write(ser_res.text)
+                else:
+                    conv_res.document.save_as_html(
+                        filename=fname,
+                        image_mode=image_export_mode,
+                        split_page_view=True,
+                    )
 
             # Export Text format:
             if export_txt:
@@ -250,6 +276,13 @@ def convert(  # noqa: C901
     to_formats: List[OutputFormat] = typer.Option(
         None, "--to", help="Specify output formats. Defaults to Markdown."
     ),
+    show_layout: Annotated[
+        bool,
+        typer.Option(
+            ...,
+            help="If enabled, the page images will show the bounding-boxes of the items.",
+        ),
+    ] = False,
     headers: str = typer.Option(
         None,
         "--headers",
@@ -547,20 +580,16 @@ def convert(  # noqa: C901
             )
 
             if vlm_model == VlmModelType.GRANITE_VISION:
-                pipeline_options.vlm_options = granite_vision_vlm_conversion_options
+                pipeline_options.vlm_options = GRANITE_VISION_TRANSFORMERS
             elif vlm_model == VlmModelType.GRANITE_VISION_OLLAMA:
-                pipeline_options.vlm_options = (
-                    granite_vision_vlm_ollama_conversion_options
-                )
+                pipeline_options.vlm_options = GRANITE_VISION_OLLAMA
             elif vlm_model == VlmModelType.SMOLDOCLING:
-                pipeline_options.vlm_options = smoldocling_vlm_conversion_options
+                pipeline_options.vlm_options = SMOLDOCLING_TRANSFORMERS
                 if sys.platform == "darwin":
                     try:
                         import mlx_vlm
 
-                        pipeline_options.vlm_options = (
-                            smoldocling_vlm_mlx_conversion_options
-                        )
+                        pipeline_options.vlm_options = SMOLDOCLING_MLX
                     except ImportError:
                         _log.warning(
                             "To run SmolDocling faster, please install mlx-vlm:\n"
@@ -596,6 +625,7 @@ def convert(  # noqa: C901
             export_json=export_json,
             export_html=export_html,
             export_html_split_page=export_html_split_page,
+            show_layout=show_layout,
             export_md=export_md,
             export_txt=export_txt,
             export_doctags=export_doctags,
diff --git a/docling/datamodel/accelerator_options.py b/docling/datamodel/accelerator_options.py
new file mode 100644
index 00000000..1b0ea8cf
--- /dev/null
+++ b/docling/datamodel/accelerator_options.py
@@ -0,0 +1,68 @@
+import logging
+import os
+import re
+from enum import Enum
+from typing import Any, Union
+
+from pydantic import field_validator, model_validator
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+_log = logging.getLogger(__name__)
+
+
+class AcceleratorDevice(str, Enum):
+    """Devices to run model inference"""
+
+    AUTO = "auto"
+    CPU = "cpu"
+    CUDA = "cuda"
+    MPS = "mps"
+
+
+class AcceleratorOptions(BaseSettings):
+    model_config = SettingsConfigDict(
+        env_prefix="DOCLING_", env_nested_delimiter="_", populate_by_name=True
+    )
+
+    num_threads: int = 4
+    device: Union[str, AcceleratorDevice] = "auto"
+    cuda_use_flash_attention2: bool = False
+
+    @field_validator("device")
+    def validate_device(cls, value):
+        # "auto", "cpu", "cuda", "mps", or "cuda:N"
+        if value in {d.value for d in AcceleratorDevice} or re.match(
+            r"^cuda(:\d+)?$", value
+        ):
+            return value
+        raise ValueError(
+            "Invalid device option. Use 'auto', 'cpu', 'mps', 'cuda', or 'cuda:N'."
+        )
+
+    @model_validator(mode="before")
+    @classmethod
+    def check_alternative_envvars(cls, data: Any) -> Any:
+        r"""
+        Set num_threads from the "alternative" envvar OMP_NUM_THREADS.
+        The alternative envvar is used only if it is valid and the regular envvar is not set.
+
+        Notice: The standard pydantic settings mechanism with parameter "aliases" does not provide
+        the same functionality. In case the alias envvar is set and the user tries to override the
+        parameter in settings initialization, Pydantic treats the parameter provided in __init__()
+        as an extra input instead of simply overwriting the evvar value for that parameter.
+        """
+        if isinstance(data, dict):
+            input_num_threads = data.get("num_threads")
+            # Check if to set the num_threads from the alternative envvar
+            if input_num_threads is None:
+                docling_num_threads = os.getenv("DOCLING_NUM_THREADS")
+                omp_num_threads = os.getenv("OMP_NUM_THREADS")
+                if docling_num_threads is None and omp_num_threads is not None:
+                    try:
+                        data["num_threads"] = int(omp_num_threads)
+                    except ValueError:
+                        _log.error(
+                            "Ignoring misformatted envvar OMP_NUM_THREADS '%s'",
+                            omp_num_threads,
+                        )
+        return data
diff --git a/docling/datamodel/base_models.py b/docling/datamodel/base_models.py
index 1e98a972..6299e619 100644
--- a/docling/datamodel/base_models.py
+++ b/docling/datamodel/base_models.py
@@ -13,11 +13,11 @@ from docling_core.types.doc import (
     TableCell,
 )
 from docling_core.types.doc.page import SegmentedPdfPage, TextCell
-
-# DO NOT REMOVE; explicitly exposed from this location
 from docling_core.types.io import (
     DocumentStream,
 )
+
+# DO NOT REMOVE; explicitly exposed from this location
 from PIL.Image import Image
 from pydantic import BaseModel, ConfigDict, Field, computed_field
 
@@ -131,12 +131,6 @@ class ErrorItem(BaseModel):
     error_message: str
 
 
-# class Cell(BaseModel):
-#    id: int
-#    text: str
-#    bbox: BoundingBox
-
-
 class Cluster(BaseModel):
     id: int
     label: DocItemLabel
@@ -158,8 +152,16 @@ class LayoutPrediction(BaseModel):
     clusters: List[Cluster] = []
 
 
+class VlmPredictionToken(BaseModel):
+    text: str = ""
+    token: int = -1
+    logprob: float = -1
+
+
 class VlmPrediction(BaseModel):
     text: str = ""
+    generated_tokens: list[VlmPredictionToken] = []
+    generation_time: float = -1
 
 
 class ContainerElement(
diff --git a/docling/datamodel/document.py b/docling/datamodel/document.py
index 5791c0e4..4c71f5c8 100644
--- a/docling/datamodel/document.py
+++ b/docling/datamodel/document.py
@@ -412,7 +412,11 @@ class _DocumentConversionInput(BaseModel):
             else:
                 return "application/xml"
 
-        if re.match(r"<!doctype\s+html|<html|<head|<body", content_str):
+        if re.match(
+            r"(<script.*?>.*?</script>\s*)?(<!doctype\s+html|<html|<head|<body)",
+            content_str,
+            re.DOTALL,
+        ):
             return "text/html"
 
         p = re.compile(
diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py
index 1d35227c..cde5cd33 100644
--- a/docling/datamodel/pipeline_options.py
+++ b/docling/datamodel/pipeline_options.py
@@ -1,6 +1,4 @@
 import logging
-import os
-import re
 from enum import Enum
 from pathlib import Path
 from typing import Any, ClassVar, Dict, List, Literal, Optional, Union
@@ -10,73 +8,28 @@ from pydantic import (
     BaseModel,
     ConfigDict,
     Field,
-    field_validator,
-    model_validator,
 )
-from pydantic_settings import BaseSettings, SettingsConfigDict
 from typing_extensions import deprecated
 
+# Import the following for backwards compatibility
+from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
+from docling.datamodel.pipeline_options_vlm_model import (
+    ApiVlmOptions,
+    InferenceFramework,
+    InlineVlmOptions,
+    ResponseFormat,
+)
+from docling.datamodel.vlm_model_specs import (
+    GRANITE_VISION_OLLAMA as granite_vision_vlm_ollama_conversion_options,
+    GRANITE_VISION_TRANSFORMERS as granite_vision_vlm_conversion_options,
+    SMOLDOCLING_MLX as smoldocling_vlm_mlx_conversion_options,
+    SMOLDOCLING_TRANSFORMERS as smoldocling_vlm_conversion_options,
+    VlmModelType,
+)
+
 _log = logging.getLogger(__name__)
 
 
-class AcceleratorDevice(str, Enum):
-    """Devices to run model inference"""
-
-    AUTO = "auto"
-    CPU = "cpu"
-    CUDA = "cuda"
-    MPS = "mps"
-
-
-class AcceleratorOptions(BaseSettings):
-    model_config = SettingsConfigDict(
-        env_prefix="DOCLING_", env_nested_delimiter="_", populate_by_name=True
-    )
-
-    num_threads: int = 4
-    device: Union[str, AcceleratorDevice] = "auto"
-    cuda_use_flash_attention2: bool = False
-
-    @field_validator("device")
-    def validate_device(cls, value):
-        # "auto", "cpu", "cuda", "mps", or "cuda:N"
-        if value in {d.value for d in AcceleratorDevice} or re.match(
-            r"^cuda(:\d+)?$", value
-        ):
-            return value
-        raise ValueError(
-            "Invalid device option. Use 'auto', 'cpu', 'mps', 'cuda', or 'cuda:N'."
-        )
-
-    @model_validator(mode="before")
-    @classmethod
-    def check_alternative_envvars(cls, data: Any) -> Any:
-        r"""
-        Set num_threads from the "alternative" envvar OMP_NUM_THREADS.
-        The alternative envvar is used only if it is valid and the regular envvar is not set.
-
-        Notice: The standard pydantic settings mechanism with parameter "aliases" does not provide
-        the same functionality. In case the alias envvar is set and the user tries to override the
-        parameter in settings initialization, Pydantic treats the parameter provided in __init__()
-        as an extra input instead of simply overwriting the evvar value for that parameter.
-        """
-        if isinstance(data, dict):
-            input_num_threads = data.get("num_threads")
-            # Check if to set the num_threads from the alternative envvar
-            if input_num_threads is None:
-                docling_num_threads = os.getenv("DOCLING_NUM_THREADS")
-                omp_num_threads = os.getenv("OMP_NUM_THREADS")
-                if docling_num_threads is None and omp_num_threads is not None:
-                    try:
-                        data["num_threads"] = int(omp_num_threads)
-                    except ValueError:
-                        _log.error(
-                            "Ignoring misformatted envvar OMP_NUM_THREADS '%s'",
-                            omp_num_threads,
-                        )
-        return data
-
-
 class BaseOptions(BaseModel):
     """Base class for options."""
 
@@ -121,24 +74,22 @@ class RapidOcrOptions(OcrOptions):
     lang: List[str] = [
         "english",
         "chinese",
-    ]  # However, language as a parameter is not supported by rapidocr yet and hence changing this options doesn't affect anything.
-    # For more details on supported languages by RapidOCR visit https://rapidai.github.io/RapidOCRDocs/blog/2022/09/28/%E6%94%AF%E6%8C%81%E8%AF%86%E5%88%AB%E8%AF%AD%E8%A8%80/
+    ]
+    # However, language as a parameter is not supported by rapidocr yet
+    # and hence changing this options doesn't affect anything.
+
+    # For more details on supported languages by RapidOCR visit
+    # https://rapidai.github.io/RapidOCRDocs/blog/2022/09/28/%E6%94%AF%E6%8C%81%E8%AF%86%E5%88%AB%E8%AF%AD%E8%A8%80/
+
+    # For more details on the following options visit
+    # https://rapidai.github.io/RapidOCRDocs/install_usage/api/RapidOCR/
 
-    # For more details on the following options visit https://rapidai.github.io/RapidOCRDocs/install_usage/api/RapidOCR/
     text_score: float = 0.5  # same default as rapidocr
 
     use_det: Optional[bool] = None  # same default as rapidocr
     use_cls: Optional[bool] = None  # same default as rapidocr
     use_rec: Optional[bool] = None  # same default as rapidocr
 
-    # class Device(Enum):
-    #     CPU = "CPU"
-    #     CUDA = "CUDA"
-    #     DIRECTML = "DIRECTML"
-    #     AUTO = "AUTO"
-
-    # device: Device = Device.AUTO  # Default value is AUTO
-
     print_verbose: bool = False  # same default as rapidocr
 
     det_model_path: Optional[str] = None  # same default as rapidocr
@@ -244,101 +195,18 @@ class PictureDescriptionVlmOptions(PictureDescriptionBaseOptions):
         return self.repo_id.replace("/", "--")
 
 
+# SmolVLM
 smolvlm_picture_description = PictureDescriptionVlmOptions(
     repo_id="HuggingFaceTB/SmolVLM-256M-Instruct"
 )
-# phi_picture_description = PictureDescriptionVlmOptions(repo_id="microsoft/Phi-3-vision-128k-instruct")
+
+# GraniteVision
 granite_picture_description = PictureDescriptionVlmOptions(
     repo_id="ibm-granite/granite-vision-3.1-2b-preview",
     prompt="What is shown in this image?",
 )
 
 
-class BaseVlmOptions(BaseModel):
-    kind: str
-    prompt: str
-
-
-class ResponseFormat(str, Enum):
-    DOCTAGS = "doctags"
-    MARKDOWN = "markdown"
-
-
-class InferenceFramework(str, Enum):
-    MLX = "mlx"
-    TRANSFORMERS = "transformers"
-    OPENAI = "openai"
-
-
-class HuggingFaceVlmOptions(BaseVlmOptions):
-    kind: Literal["hf_model_options"] = "hf_model_options"
-
-    repo_id: str
-    load_in_8bit: bool = True
-    llm_int8_threshold: float = 6.0
-    quantized: bool = False
-
-    inference_framework: InferenceFramework
-    response_format: ResponseFormat
-
-    @property
-    def repo_cache_folder(self) -> str:
-        return self.repo_id.replace("/", "--")
-
-
-class ApiVlmOptions(BaseVlmOptions):
-    kind: Literal["api_model_options"] = "api_model_options"
-
-    url: AnyUrl = AnyUrl(
-        "http://localhost:11434/v1/chat/completions"
-    )  # Default to ollama
-    headers: Dict[str, str] = {}
-    params: Dict[str, Any] = {}
-    scale: float = 2.0
-    timeout: float = 60
-    concurrency: int = 1
-    response_format: ResponseFormat
-
-
-smoldocling_vlm_mlx_conversion_options = HuggingFaceVlmOptions(
-    repo_id="ds4sd/SmolDocling-256M-preview-mlx-bf16",
-    prompt="Convert this page to docling.",
-    response_format=ResponseFormat.DOCTAGS,
-    inference_framework=InferenceFramework.MLX,
-)
-
-
-smoldocling_vlm_conversion_options = HuggingFaceVlmOptions(
-    repo_id="ds4sd/SmolDocling-256M-preview",
-    prompt="Convert this page to docling.",
-    response_format=ResponseFormat.DOCTAGS,
-    inference_framework=InferenceFramework.TRANSFORMERS,
-)
-
-granite_vision_vlm_conversion_options = HuggingFaceVlmOptions(
-    repo_id="ibm-granite/granite-vision-3.1-2b-preview",
-    # prompt="OCR the full page to markdown.",
-    prompt="OCR this image.",
-    response_format=ResponseFormat.MARKDOWN,
-    inference_framework=InferenceFramework.TRANSFORMERS,
-)
-
-granite_vision_vlm_ollama_conversion_options = ApiVlmOptions(
-    url=AnyUrl("http://localhost:11434/v1/chat/completions"),
-    params={"model": "granite3.2-vision:2b"},
-    prompt="OCR the full page to markdown.",
-    scale=1.0,
-    timeout=120,
-    response_format=ResponseFormat.MARKDOWN,
-)
-
-
-class VlmModelType(str, Enum):
-    SMOLDOCLING = "smoldocling"
-    GRANITE_VISION = "granite_vision"
-    GRANITE_VISION_OLLAMA = "granite_vision_ollama"
-
-
 # Define an enum for the backend options
 class PdfBackend(str, Enum):
     """Enum of valid PDF backends."""
@@ -387,7 +255,7 @@ class VlmPipelineOptions(PaginatedPipelineOptions):
         False  # (To be used with vlms, or other generative models)
     )
     # If True, text from backend will be used instead of generated text
-    vlm_options: Union[HuggingFaceVlmOptions, ApiVlmOptions] = (
+    vlm_options: Union[InlineVlmOptions, ApiVlmOptions] = (
         smoldocling_vlm_conversion_options
     )
 
diff --git a/docling/datamodel/pipeline_options_vlm_model.py b/docling/datamodel/pipeline_options_vlm_model.py
new file mode 100644
index 00000000..c1ec28aa
--- /dev/null
+++ b/docling/datamodel/pipeline_options_vlm_model.py
@@ -0,0 +1,81 @@
+from enum import Enum
+from typing import Any, Dict, List, Literal
+
+from pydantic import AnyUrl, BaseModel
+from typing_extensions import deprecated
+
+from docling.datamodel.accelerator_options import AcceleratorDevice
+
+
+class BaseVlmOptions(BaseModel):
+    kind: str
+    prompt: str
+
+
+class ResponseFormat(str, Enum):
+    DOCTAGS = "doctags"
+    MARKDOWN = "markdown"
+    HTML = "html"
+
+
+class InferenceFramework(str, Enum):
+    MLX = "mlx"
+    TRANSFORMERS = "transformers"
+
+
+class TransformersModelType(str, Enum):
+    AUTOMODEL = "automodel"
+    AUTOMODEL_VISION2SEQ = "automodel-vision2seq"
+    AUTOMODEL_CAUSALLM = "automodel-causallm"
+
+
+class InlineVlmOptions(BaseVlmOptions):
+    kind: Literal["inline_model_options"] = "inline_model_options"
+
+    repo_id: str
+    trust_remote_code: bool = False
+    load_in_8bit: bool = True
+    llm_int8_threshold: float = 6.0
+    quantized: bool = False
+
+    inference_framework: InferenceFramework
+    transformers_model_type: TransformersModelType = TransformersModelType.AUTOMODEL
+    response_format: ResponseFormat
+
+    supported_devices: List[AcceleratorDevice] = [
+        AcceleratorDevice.CPU,
+        AcceleratorDevice.CUDA,
+        AcceleratorDevice.MPS,
+    ]
+
+    scale: float = 2.0
+
+    temperature: float = 0.0
+    stop_strings: List[str] = []
+    extra_generation_config: Dict[str, Any] = {}
+
+    use_kv_cache: bool = True
+    max_new_tokens: int = 4096
+
+    @property
+    def repo_cache_folder(self) -> str:
+        return self.repo_id.replace("/", "--")
+
+
+@deprecated("Use InlineVlmOptions instead.")
+class HuggingFaceVlmOptions(InlineVlmOptions):
+    pass
+
+
+class ApiVlmOptions(BaseVlmOptions):
+    kind: Literal["api_model_options"] = "api_model_options"
+
+    url: AnyUrl = AnyUrl(
+        "http://localhost:11434/v1/chat/completions"
+    )  # Default to ollama
+    headers: Dict[str, str] = {}
+    params: Dict[str, Any] = {}
+    scale: float = 2.0
+    timeout: float = 60
+    concurrency: int = 1
+    response_format: ResponseFormat
diff --git a/docling/datamodel/vlm_model_specs.py b/docling/datamodel/vlm_model_specs.py
new file mode 100644
index 00000000..5045c846
--- /dev/null
+++ b/docling/datamodel/vlm_model_specs.py
@@ -0,0 +1,144 @@
+import logging
+from enum import Enum
+
+from pydantic import (
+    AnyUrl,
+)
+
+from docling.datamodel.accelerator_options import AcceleratorDevice
+from docling.datamodel.pipeline_options_vlm_model import (
+    ApiVlmOptions,
+    InferenceFramework,
+    InlineVlmOptions,
+    ResponseFormat,
+    TransformersModelType,
+)
+
+_log = logging.getLogger(__name__)
+
+
+# SmolDocling
+SMOLDOCLING_MLX = InlineVlmOptions(
+    repo_id="ds4sd/SmolDocling-256M-preview-mlx-bf16",
+    prompt="Convert this page to docling.",
+    response_format=ResponseFormat.DOCTAGS,
+    inference_framework=InferenceFramework.MLX,
+    supported_devices=[AcceleratorDevice.MPS],
+    scale=2.0,
+    temperature=0.0,
+)
+
+SMOLDOCLING_TRANSFORMERS = InlineVlmOptions(
+    repo_id="ds4sd/SmolDocling-256M-preview",
+    prompt="Convert this page to docling.",
+    response_format=ResponseFormat.DOCTAGS,
+    inference_framework=InferenceFramework.TRANSFORMERS,
+    transformers_model_type=TransformersModelType.AUTOMODEL_VISION2SEQ,
+    supported_devices=[
+        AcceleratorDevice.CPU,
+        AcceleratorDevice.CUDA,
+        AcceleratorDevice.MPS,
+    ],
+    scale=2.0,
+    temperature=0.0,
+)
+
+# GraniteVision
+GRANITE_VISION_TRANSFORMERS = InlineVlmOptions(
+    repo_id="ibm-granite/granite-vision-3.2-2b",
+    prompt="Convert this page to markdown. Do not miss any text and only output the bare markdown!",
+    response_format=ResponseFormat.MARKDOWN,
+    inference_framework=InferenceFramework.TRANSFORMERS,
+    transformers_model_type=TransformersModelType.AUTOMODEL_VISION2SEQ,
+    supported_devices=[
+        AcceleratorDevice.CPU,
+        AcceleratorDevice.CUDA,
+        AcceleratorDevice.MPS,
+    ],
+    scale=2.0,
+    temperature=0.0,
+)
+
+GRANITE_VISION_OLLAMA = ApiVlmOptions(
+    url=AnyUrl("http://localhost:11434/v1/chat/completions"),
+    params={"model": "granite3.2-vision:2b"},
+    prompt="Convert this page to markdown. Do not miss any text and only output the bare markdown!",
+    scale=1.0,
+    timeout=120,
+    response_format=ResponseFormat.MARKDOWN,
+    temperature=0.0,
+)
+
+# Pixtral
+PIXTRAL_12B_TRANSFORMERS = InlineVlmOptions(
+    repo_id="mistral-community/pixtral-12b",
+    prompt="Convert this page to markdown. Do not miss any text and only output the bare markdown!",
+    response_format=ResponseFormat.MARKDOWN,
+    inference_framework=InferenceFramework.TRANSFORMERS,
+    transformers_model_type=TransformersModelType.AUTOMODEL_VISION2SEQ,
+    supported_devices=[AcceleratorDevice.CPU, AcceleratorDevice.CUDA],
+    scale=2.0,
+    temperature=0.0,
+)
+
+PIXTRAL_12B_MLX = InlineVlmOptions(
+    repo_id="mlx-community/pixtral-12b-bf16",
+    prompt="Convert this page to markdown. Do not miss any text and only output the bare markdown!",
+    response_format=ResponseFormat.MARKDOWN,
+    inference_framework=InferenceFramework.MLX,
+    supported_devices=[AcceleratorDevice.MPS],
+    scale=2.0,
+    temperature=0.0,
+)
+
+# Phi4
+PHI4_TRANSFORMERS = InlineVlmOptions(
+    repo_id="microsoft/Phi-4-multimodal-instruct",
+    prompt="Convert this page to MarkDown. Do not miss any text and only output the bare markdown",
+    trust_remote_code=True,
+    response_format=ResponseFormat.MARKDOWN,
+    inference_framework=InferenceFramework.TRANSFORMERS,
+    transformers_model_type=TransformersModelType.AUTOMODEL_CAUSALLM,
+    supported_devices=[AcceleratorDevice.CPU, AcceleratorDevice.CUDA],
+    scale=2.0,
+    temperature=0.0,
+    extra_generation_config=dict(num_logits_to_keep=0),
+)
+
+# Qwen
+QWEN25_VL_3B_MLX = InlineVlmOptions(
+    repo_id="mlx-community/Qwen2.5-VL-3B-Instruct-bf16",
+    prompt="Convert this page to markdown. Do not miss any text and only output the bare markdown!",
+    response_format=ResponseFormat.MARKDOWN,
+    inference_framework=InferenceFramework.MLX,
+    supported_devices=[AcceleratorDevice.MPS],
+    scale=2.0,
+    temperature=0.0,
+)
+
+# Gemma-3
+GEMMA3_12B_MLX = InlineVlmOptions(
+    repo_id="mlx-community/gemma-3-12b-it-bf16",
+    prompt="Convert this page to markdown. Do not miss any text and only output the bare markdown!",
+    response_format=ResponseFormat.MARKDOWN,
+    inference_framework=InferenceFramework.MLX,
+    supported_devices=[AcceleratorDevice.MPS],
+    scale=2.0,
+    temperature=0.0,
+)
+
+GEMMA3_27B_MLX = InlineVlmOptions(
+    repo_id="mlx-community/gemma-3-27b-it-bf16",
+    prompt="Convert this page to markdown. Do not miss any text and only output the bare markdown!",
+    response_format=ResponseFormat.MARKDOWN,
+    inference_framework=InferenceFramework.MLX,
+    supported_devices=[AcceleratorDevice.MPS],
+    scale=2.0,
+    temperature=0.0,
+)
+
+
+class VlmModelType(str, Enum):
+    SMOLDOCLING = "smoldocling"
+    GRANITE_VISION = "granite_vision"
+    GRANITE_VISION_OLLAMA = "granite_vision_ollama"
diff --git a/docling/document_converter.py b/docling/document_converter.py
index 08095d43..e553c083 100644
--- a/docling/document_converter.py
+++ b/docling/document_converter.py
@@ -186,6 +186,11 @@ class DocumentConverter:
             Tuple[Type[BasePipeline], str], BasePipeline
         ] = {}
 
+    def _get_initialized_pipelines(
+        self,
+    ) -> dict[tuple[Type[BasePipeline], str], BasePipeline]:
+        return self.initialized_pipelines
+
     def _get_pipeline_options_hash(self, pipeline_options: PipelineOptions) -> str:
         """Generate a hash of pipeline options to use as part of the cache key."""
         options_str = str(pipeline_options.model_dump())
diff --git a/docling/models/api_vlm_model.py b/docling/models/api_vlm_model.py
index 60bc6fce..30bc43ea 100644
--- a/docling/models/api_vlm_model.py
+++ b/docling/models/api_vlm_model.py
@@ -3,7 +3,7 @@ from concurrent.futures import ThreadPoolExecutor
 
 from docling.datamodel.base_models import Page, VlmPrediction
 from docling.datamodel.document import ConversionResult
-from docling.datamodel.pipeline_options import ApiVlmOptions
+from docling.datamodel.pipeline_options_vlm_model import ApiVlmOptions
 from docling.exceptions import OperationNotAllowed
 from docling.models.base_model import BasePageModel
 from docling.utils.api_image_request import api_image_request
diff --git a/docling/models/base_ocr_model.py b/docling/models/base_ocr_model.py
index 9f05aed3..4cbdeba5 100644
--- a/docling/models/base_ocr_model.py
+++ b/docling/models/base_ocr_model.py
@@ -11,9 +11,10 @@ from PIL import Image, ImageDraw
 from rtree import index
 from scipy.ndimage import binary_dilation, find_objects, label
 
+from docling.datamodel.accelerator_options import AcceleratorOptions
 from docling.datamodel.base_models import Page
 from docling.datamodel.document import ConversionResult
-from docling.datamodel.pipeline_options import AcceleratorOptions, OcrOptions
+from docling.datamodel.pipeline_options import OcrOptions
 from docling.datamodel.settings import settings
 from docling.models.base_model import BaseModelWithOptions, BasePageModel
 
diff --git a/docling/models/code_formula_model.py b/docling/models/code_formula_model.py
index bf747c56..19a831ab 100644
--- a/docling/models/code_formula_model.py
+++ b/docling/models/code_formula_model.py
@@ -16,9 +16,10 @@ from docling_core.types.doc.labels import CodeLanguageLabel
 from PIL import Image, ImageOps
 from pydantic import BaseModel
 
+from docling.datamodel.accelerator_options import AcceleratorOptions
 from docling.datamodel.base_models import ItemAndImageEnrichmentElement
-from docling.datamodel.pipeline_options import AcceleratorOptions
 from docling.models.base_model import BaseItemAndImageEnrichmentModel
+from docling.models.utils.hf_model_download import download_hf_model
 from docling.utils.accelerator_utils import decide_device
 
 
@@ -117,20 +118,14 @@ class CodeFormulaModel(BaseItemAndImageEnrichmentModel):
         force: bool = False,
         progress: bool = False,
     ) -> Path:
-        from huggingface_hub import snapshot_download
-        from huggingface_hub.utils import disable_progress_bars
-
-        if not progress:
-            disable_progress_bars()
-        download_path = snapshot_download(
+        return download_hf_model(
             repo_id="ds4sd/CodeFormula",
-            force_download=force,
-            local_dir=local_dir,
             revision="v1.0.2",
+            local_dir=local_dir,
+            force=force,
+            progress=progress,
         )
 
-        return Path(download_path)
-
     def is_processable(self, doc: DoclingDocument, element: NodeItem) -> bool:
         """
         Determines if a given element in a document can be processed by the model.
diff --git a/docling/models/document_picture_classifier.py b/docling/models/document_picture_classifier.py
index 6a57a74d..73a30203 100644
--- a/docling/models/document_picture_classifier.py
+++ b/docling/models/document_picture_classifier.py
@@ -13,8 +13,9 @@ from docling_core.types.doc import (
 from PIL import Image
 from pydantic import BaseModel
 
-from docling.datamodel.pipeline_options import AcceleratorOptions
+from docling.datamodel.accelerator_options import AcceleratorOptions
 from docling.models.base_model import BaseEnrichmentModel
+from docling.models.utils.hf_model_download import download_hf_model
 from docling.utils.accelerator_utils import decide_device
 
 
@@ -105,20 +106,14 @@ class DocumentPictureClassifier(BaseEnrichmentModel):
     def download_models(
         local_dir: Optional[Path] = None, force: bool = False, progress: bool = False
     ) -> Path:
-        from huggingface_hub import snapshot_download
-        from huggingface_hub.utils import disable_progress_bars
-
-        if not progress:
-            disable_progress_bars()
-        download_path = snapshot_download(
+        return download_hf_model(
             repo_id="ds4sd/DocumentFigureClassifier",
-            force_download=force,
-            local_dir=local_dir,
             revision="v1.0.1",
+            local_dir=local_dir,
+            force=force,
+            progress=progress,
         )
 
-        return Path(download_path)
-
     def is_processable(self, doc: DoclingDocument, element: NodeItem) -> bool:
         """
         Determines if the given element can be processed by the classifier.
diff --git a/docling/models/easyocr_model.py b/docling/models/easyocr_model.py
index b40ca506..53bee9c9 100644
--- a/docling/models/easyocr_model.py
+++ b/docling/models/easyocr_model.py
@@ -9,11 +9,10 @@ import numpy
 from docling_core.types.doc import BoundingBox, CoordOrigin
 from docling_core.types.doc.page import BoundingRectangle, TextCell
 
+from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
 from docling.datamodel.base_models import Page
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import (
-    AcceleratorDevice,
-    AcceleratorOptions,
     EasyOcrOptions,
     OcrOptions,
 )
diff --git a/docling/models/hf_vlm_model.py b/docling/models/hf_vlm_model.py
deleted file mode 100644
index 29276fc4..00000000
--- a/docling/models/hf_vlm_model.py
+++ /dev/null
@@ -1,182 +0,0 @@
-import logging
-import time
-from collections.abc import Iterable
-from pathlib import Path
-from typing import Optional
-
-from docling.datamodel.base_models import Page, VlmPrediction
-from docling.datamodel.document import ConversionResult
-from docling.datamodel.pipeline_options import (
-    AcceleratorOptions,
-    HuggingFaceVlmOptions,
-)
-from docling.models.base_model import BasePageModel
-from docling.utils.accelerator_utils import decide_device
-from docling.utils.profiling import TimeRecorder
-
-_log = logging.getLogger(__name__)
-
-
-class HuggingFaceVlmModel(BasePageModel):
-    def __init__(
-        self,
-        enabled: bool,
-        artifacts_path: Optional[Path],
-        accelerator_options: AcceleratorOptions,
-        vlm_options: HuggingFaceVlmOptions,
-    ):
-        self.enabled = enabled
-
-        self.vlm_options = vlm_options
-
-        if self.enabled:
-            import torch
-            from transformers import (  # type: ignore
-                AutoModelForVision2Seq,
-                AutoProcessor,
-                BitsAndBytesConfig,
-            )
-
-            device = decide_device(accelerator_options.device)
-            self.device = device
-
-            _log.debug(f"Available device for HuggingFace VLM: {device}")
-
-            repo_cache_folder = vlm_options.repo_id.replace("/", "--")
-
-            # PARAMETERS:
-            if artifacts_path is None:
-                artifacts_path = self.download_models(self.vlm_options.repo_id)
-            elif (artifacts_path / repo_cache_folder).exists():
-                artifacts_path = artifacts_path / repo_cache_folder
-
-            self.param_question = vlm_options.prompt  # "Perform Layout Analysis."
-            self.param_quantization_config = BitsAndBytesConfig(
-                load_in_8bit=vlm_options.load_in_8bit,  # True,
-                llm_int8_threshold=vlm_options.llm_int8_threshold,  # 6.0
-            )
-            self.param_quantized = vlm_options.quantized  # False
-
-            self.processor = AutoProcessor.from_pretrained(artifacts_path)
-            if not self.param_quantized:
-                self.vlm_model = AutoModelForVision2Seq.from_pretrained(
-                    artifacts_path,
-                    device_map=device,
-                    torch_dtype=torch.bfloat16,
-                    _attn_implementation=(
-                        "flash_attention_2"
-                        if self.device.startswith("cuda")
-                        and accelerator_options.cuda_use_flash_attention2
-                        else "eager"
-                    ),
-                )  # .to(self.device)
-
-            else:
-                self.vlm_model = AutoModelForVision2Seq.from_pretrained(
-                    artifacts_path,
-                    device_map=device,
-                    torch_dtype="auto",
-                    quantization_config=self.param_quantization_config,
-                    _attn_implementation=(
-                        "flash_attention_2"
-                        if self.device.startswith("cuda")
-                        and accelerator_options.cuda_use_flash_attention2
-                        else "eager"
-                    ),
-                )  # .to(self.device)
-
-    @staticmethod
-    def download_models(
-        repo_id: str,
-        local_dir: Optional[Path] = None,
-        force: bool = False,
-        progress: bool = False,
-    ) -> Path:
-        from huggingface_hub import snapshot_download
-        from huggingface_hub.utils import disable_progress_bars
-
-        if not progress:
-            disable_progress_bars()
-        download_path = snapshot_download(
-            repo_id=repo_id,
-            force_download=force,
-            local_dir=local_dir,
-            # revision="v0.0.1",
-        )
-
-        return Path(download_path)
-
-    def __call__(
-        self, conv_res: ConversionResult, page_batch: Iterable[Page]
-    ) -> Iterable[Page]:
-        for page in page_batch:
-            assert page._backend is not None
-            if not page._backend.is_valid():
-                yield page
-            else:
-                with TimeRecorder(conv_res, "vlm"):
-                    assert page.size is not None
-
-                    hi_res_image = page.get_image(scale=2.0)  # 144dpi
-                    # hi_res_image = page.get_image(scale=1.0)  # 72dpi
-
-                    if hi_res_image is not None:
-                        im_width, im_height = hi_res_image.size
-
-                    # populate page_tags with predicted doc tags
-                    page_tags = ""
-
-                    if hi_res_image:
-                        if hi_res_image.mode != "RGB":
-                            hi_res_image = hi_res_image.convert("RGB")
-
-                    messages = [
-                        {
-                            "role": "user",
-                            "content": [
-                                {
-                                    "type": "text",
-                                    "text": "This is a page from a document.",
-                                },
-                                {"type": "image"},
-                                {"type": "text", "text": self.param_question},
-                            ],
-                        }
-                    ]
-                    prompt = self.processor.apply_chat_template(
-                        messages, add_generation_prompt=False
-                    )
-                    inputs = self.processor(
-                        text=prompt, images=[hi_res_image], return_tensors="pt"
-                    )
-                    inputs = {k: v.to(self.device) for k, v in inputs.items()}
-
-                    start_time = time.time()
-                    # Call model to generate:
-                    generated_ids = self.vlm_model.generate(
-                        **inputs, max_new_tokens=4096, use_cache=True
-                    )
-
-                    generation_time = time.time() - start_time
-                    generated_texts = self.processor.batch_decode(
-                        generated_ids[:, inputs["input_ids"].shape[1] :],
-                        skip_special_tokens=False,
-                    )[0]
-
-                    num_tokens = len(generated_ids[0])
-                    page_tags = generated_texts
-
-                    _log.debug(
-                        f"Generated {num_tokens} tokens in time {generation_time:.2f} seconds."
-                    )
-
-                    # inference_time = time.time() - start_time
-                    # tokens_per_second = num_tokens / generation_time
-                    # print("")
-                    # print(f"Page Inference Time: {inference_time:.2f} seconds")
-                    # print(f"Total tokens on page: {num_tokens:.2f}")
-                    # print(f"Tokens/sec: {tokens_per_second:.2f}")
-                    # print("")
-                    page.predictions.vlm_response = VlmPrediction(text=page_tags)
-
-                yield page
diff --git a/docling/models/layout_model.py b/docling/models/layout_model.py
index e2abb373..d8e9c032 100644
--- a/docling/models/layout_model.py
+++ b/docling/models/layout_model.py
@@ -10,11 +10,12 @@ from docling_core.types.doc import DocItemLabel
 from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor
 from PIL import Image
 
+from docling.datamodel.accelerator_options import AcceleratorOptions
 from docling.datamodel.base_models import BoundingBox, Cluster, LayoutPrediction, Page
 from docling.datamodel.document import ConversionResult
-from docling.datamodel.pipeline_options import AcceleratorOptions
 from docling.datamodel.settings import settings
 from docling.models.base_model import BasePageModel
+from docling.models.utils.hf_model_download import download_hf_model
 from docling.utils.accelerator_utils import decide_device
 from docling.utils.layout_postprocessor import LayoutPostprocessor
 from docling.utils.profiling import TimeRecorder
@@ -83,20 +84,14 @@ class LayoutModel(BasePageModel):
         force: bool = False,
         progress: bool = False,
     ) -> Path:
-        from huggingface_hub import snapshot_download
-        from huggingface_hub.utils import disable_progress_bars
-
-        if not progress:
-            disable_progress_bars()
-        download_path = snapshot_download(
+        return download_hf_model(
             repo_id="ds4sd/docling-models",
-            force_download=force,
+            revision="v2.2.0",
             local_dir=local_dir,
-            revision="v2.1.0",
+            force=force,
+            progress=progress,
         )
 
-        return Path(download_path)
-
     def draw_clusters_and_cells_side_by_side(
         self, conv_res, page, clusters, mode_prefix: str, show: bool = False
     ):
@@ -185,13 +180,23 @@ class LayoutModel(BasePageModel):
                     ).postprocess()
                     # processed_clusters, processed_cells = clusters, page.cells
 
-                    conv_res.confidence.pages[page.page_no].layout_score = float(
-                        np.mean([c.confidence for c in processed_clusters])
-                    )
+                    with warnings.catch_warnings():
+                        warnings.filterwarnings(
+                            "ignore",
+                            "Mean of empty slice|invalid value encountered in scalar divide",
+                            RuntimeWarning,
+                            "numpy",
+                        )
 
-                    conv_res.confidence.pages[page.page_no].ocr_score = float(
-                        np.mean([c.confidence for c in processed_cells if c.from_ocr])
-                    )
+                        conv_res.confidence.pages[page.page_no].layout_score = float(
+                            np.mean([c.confidence for c in processed_clusters])
+                        )
+
+                        conv_res.confidence.pages[page.page_no].ocr_score = float(
+                            np.mean(
+                                [c.confidence for c in processed_cells if c.from_ocr]
+                            )
+                        )
 
                     page.cells = processed_cells
                     page.predictions.layout = LayoutPrediction(
diff --git a/docling/models/ocr_mac_model.py b/docling/models/ocr_mac_model.py
index a8ff55b8..a410a7f6 100644
--- a/docling/models/ocr_mac_model.py
+++ b/docling/models/ocr_mac_model.py
@@ -8,10 +8,10 @@ from typing import Optional, Type
 from docling_core.types.doc import BoundingBox, CoordOrigin
 from docling_core.types.doc.page import BoundingRectangle, TextCell
 
+from docling.datamodel.accelerator_options import AcceleratorOptions
 from docling.datamodel.base_models import Page
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import (
-    AcceleratorOptions,
     OcrMacOptions,
     OcrOptions,
 )
diff --git a/docling/models/page_preprocessing_model.py b/docling/models/page_preprocessing_model.py
index 6a1dcf19..3cfa6352 100644
--- a/docling/models/page_preprocessing_model.py
+++ b/docling/models/page_preprocessing_model.py
@@ -1,4 +1,5 @@
 import re
+import warnings
 from collections.abc import Iterable
 from pathlib import Path
 from typing import Optional
@@ -7,7 +8,7 @@ import numpy as np
 from PIL import ImageDraw
 from pydantic import BaseModel
 
-from docling.datamodel.base_models import Page, ScoreValue
+from docling.datamodel.base_models import Page
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.settings import settings
 from docling.models.base_model import BasePageModel
@@ -76,11 +77,15 @@ class PagePreprocessingModel(BasePageModel):
             score = self.rate_text_quality(c.text)
             text_scores.append(score)
 
-        conv_res.confidence.pages[page.page_no].parse_score = float(
-            np.nanquantile(
-                text_scores, q=0.10
-            )  # To emphasise problems in the parse_score, we take the 10% percentile score of all text cells.
-        )
+        with warnings.catch_warnings():
+            warnings.filterwarnings(
+                "ignore", "Mean of empty slice", RuntimeWarning, "numpy"
+            )
+            conv_res.confidence.pages[page.page_no].parse_score = float(
+                np.nanquantile(
+                    text_scores, q=0.10
+                )  # To emphasise problems in the parse_score, we take the 10% percentile score of all text cells.
+            )
 
         # DEBUG code:
         def draw_text_boxes(image, cells, show: bool = False):
diff --git a/docling/models/picture_description_api_model.py b/docling/models/picture_description_api_model.py
index eb331b29..a3c0c2ee 100644
--- a/docling/models/picture_description_api_model.py
+++ b/docling/models/picture_description_api_model.py
@@ -5,8 +5,8 @@ from typing import Optional, Type, Union
 
 from PIL import Image
 
+from docling.datamodel.accelerator_options import AcceleratorOptions
 from docling.datamodel.pipeline_options import (
-    AcceleratorOptions,
     PictureDescriptionApiOptions,
     PictureDescriptionBaseOptions,
 )
diff --git a/docling/models/picture_description_base_model.py b/docling/models/picture_description_base_model.py
index 2f6e6479..055c74b1 100644
--- a/docling/models/picture_description_base_model.py
+++ b/docling/models/picture_description_base_model.py
@@ -13,8 +13,8 @@ from docling_core.types.doc.document import (  # TODO: move import to docling_co
 )
 from PIL import Image
 
+from docling.datamodel.accelerator_options import AcceleratorOptions
 from docling.datamodel.pipeline_options import (
-    AcceleratorOptions,
     PictureDescriptionBaseOptions,
 )
 from docling.models.base_model import (
diff --git a/docling/models/picture_description_vlm_model.py b/docling/models/picture_description_vlm_model.py
index 679e80c2..230151d6 100644
--- a/docling/models/picture_description_vlm_model.py
+++ b/docling/models/picture_description_vlm_model.py
@@ -4,16 +4,21 @@ from typing import Optional, Type, Union
 
 from PIL import Image
 
+from docling.datamodel.accelerator_options import AcceleratorOptions
 from docling.datamodel.pipeline_options import (
-    AcceleratorOptions,
     PictureDescriptionBaseOptions,
     PictureDescriptionVlmOptions,
 )
 from docling.models.picture_description_base_model import PictureDescriptionBaseModel
+from docling.models.utils.hf_model_download import (
+    HuggingFaceModelDownloadMixin,
+)
 from docling.utils.accelerator_utils import decide_device
 
 
-class PictureDescriptionVlmModel(PictureDescriptionBaseModel):
+class PictureDescriptionVlmModel(
+    PictureDescriptionBaseModel, HuggingFaceModelDownloadMixin
+):
     @classmethod
     def get_options_type(cls) -> Type[PictureDescriptionBaseOptions]:
         return PictureDescriptionVlmOptions
@@ -66,26 +71,6 @@ class PictureDescriptionVlmModel(PictureDescriptionBaseModel):
 
             self.provenance = f"{self.options.repo_id}"
 
-    @staticmethod
-    def download_models(
-        repo_id: str,
-        local_dir: Optional[Path] = None,
-        force: bool = False,
-        progress: bool = False,
-    ) -> Path:
-        from huggingface_hub import snapshot_download
-        from huggingface_hub.utils import disable_progress_bars
-
-        if not progress:
-            disable_progress_bars()
-        download_path = snapshot_download(
-            repo_id=repo_id,
-            force_download=force,
-            local_dir=local_dir,
-        )
-
-        return Path(download_path)
-
     def _annotate_images(self, images: Iterable[Image.Image]) -> Iterable[str]:
         from transformers import GenerationConfig
 
diff --git a/docling/models/rapid_ocr_model.py b/docling/models/rapid_ocr_model.py
index 2c7f4357..b01c5acf 100644
--- a/docling/models/rapid_ocr_model.py
+++ b/docling/models/rapid_ocr_model.py
@@ -7,11 +7,10 @@ import numpy
 from docling_core.types.doc import BoundingBox, CoordOrigin
 from docling_core.types.doc.page import BoundingRectangle, TextCell
 
+from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
 from docling.datamodel.base_models import Page
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import (
-    AcceleratorDevice,
-    AcceleratorOptions,
     OcrOptions,
     RapidOcrOptions,
 )
diff --git a/docling/models/table_structure_model.py b/docling/models/table_structure_model.py
index a7679eab..b90e85d5 100644
--- a/docling/models/table_structure_model.py
+++ b/docling/models/table_structure_model.py
@@ -13,16 +13,16 @@ from docling_core.types.doc.page import (
 from docling_ibm_models.tableformer.data_management.tf_predictor import TFPredictor
 from PIL import ImageDraw
 
+from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
 from docling.datamodel.base_models import Page, Table, TableStructurePrediction
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import (
-    AcceleratorDevice,
-    AcceleratorOptions,
     TableFormerMode,
     TableStructureOptions,
 )
 from docling.datamodel.settings import settings
 from docling.models.base_model import BasePageModel
+from docling.models.utils.hf_model_download import download_hf_model
 from docling.utils.accelerator_utils import decide_device
 from docling.utils.profiling import TimeRecorder
 
@@ -90,20 +90,14 @@ class TableStructureModel(BasePageModel):
     def download_models(
         local_dir: Optional[Path] = None, force: bool = False, progress: bool = False
     ) -> Path:
-        from huggingface_hub import snapshot_download
-        from huggingface_hub.utils import disable_progress_bars
-
-        if not progress:
-            disable_progress_bars()
-        download_path = snapshot_download(
+        return download_hf_model(
             repo_id="ds4sd/docling-models",
-            force_download=force,
-            local_dir=local_dir,
             revision="v2.2.0",
+            local_dir=local_dir,
+            force=force,
+            progress=progress,
         )
 
-        return Path(download_path)
-
     def draw_table_and_cells(
         self,
         conv_res: ConversionResult,
diff --git a/docling/models/tesseract_ocr_cli_model.py b/docling/models/tesseract_ocr_cli_model.py
index 8bca5479..e6901045 100644
--- a/docling/models/tesseract_ocr_cli_model.py
+++ b/docling/models/tesseract_ocr_cli_model.py
@@ -13,10 +13,10 @@ import pandas as pd
 from docling_core.types.doc import BoundingBox, CoordOrigin
 from docling_core.types.doc.page import TextCell
 
+from docling.datamodel.accelerator_options import AcceleratorOptions
 from docling.datamodel.base_models import Page
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import (
-    AcceleratorOptions,
     OcrOptions,
     TesseractCliOcrOptions,
 )
diff --git a/docling/models/tesseract_ocr_model.py b/docling/models/tesseract_ocr_model.py
index 108485d7..18ec34dc 100644
--- a/docling/models/tesseract_ocr_model.py
+++ b/docling/models/tesseract_ocr_model.py
@@ -7,10 +7,10 @@ from typing import Iterable, Optional, Type
 from docling_core.types.doc import BoundingBox, CoordOrigin
 from docling_core.types.doc.page import TextCell
 
+from docling.datamodel.accelerator_options import AcceleratorOptions
 from docling.datamodel.base_models import Page
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import (
-    AcceleratorOptions,
     OcrOptions,
     TesseractOcrOptions,
 )
diff --git a/docling/models/utils/__init__.py b/docling/models/utils/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/docling/models/utils/hf_model_download.py b/docling/models/utils/hf_model_download.py
new file mode 100644
index 00000000..3595166a
--- /dev/null
+++ b/docling/models/utils/hf_model_download.py
@@ -0,0 +1,40 @@
+import logging
+from pathlib import Path
+from typing import Optional
+
+_log = logging.getLogger(__name__)
+
+
+def download_hf_model(
+    repo_id: str,
+    local_dir: Optional[Path] = None,
+    force: bool = False,
+    progress: bool = False,
+    revision: Optional[str] = None,
+) -> Path:
+    from huggingface_hub import snapshot_download
+    from huggingface_hub.utils import disable_progress_bars
+
+    if not progress:
+        disable_progress_bars()
+    download_path = snapshot_download(
+        repo_id=repo_id,
+        force_download=force,
+        local_dir=local_dir,
+        revision=revision,
+    )
+
+    return Path(download_path)
+
+
+class HuggingFaceModelDownloadMixin:
+    @staticmethod
+    def download_models(
+        repo_id: str,
+        local_dir: Optional[Path] = None,
+        force: bool = False,
+        progress: bool = False,
+    ) -> Path:
+        return download_hf_model(
+            repo_id=repo_id, local_dir=local_dir, force=force, progress=progress
+        )
diff --git a/docling/models/vlm_models_inline/__init__.py b/docling/models/vlm_models_inline/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/docling/models/vlm_models_inline/hf_transformers_model.py b/docling/models/vlm_models_inline/hf_transformers_model.py
new file mode 100644
index 00000000..de7f289d
--- /dev/null
+++ b/docling/models/vlm_models_inline/hf_transformers_model.py
@@ -0,0 +1,194 @@
+import importlib.metadata
+import logging
+import time
+from collections.abc import Iterable
+from pathlib import Path
+from typing import Any, Optional
+
+from docling.datamodel.accelerator_options import (
+    AcceleratorOptions,
+)
+from docling.datamodel.base_models import Page, VlmPrediction
+from docling.datamodel.document import ConversionResult
+from docling.datamodel.pipeline_options_vlm_model import (
+    InlineVlmOptions,
+    TransformersModelType,
+)
+from docling.models.base_model import BasePageModel
+from docling.models.utils.hf_model_download import (
+    HuggingFaceModelDownloadMixin,
+)
+from docling.utils.accelerator_utils import decide_device
+from docling.utils.profiling import TimeRecorder
+
+_log = logging.getLogger(__name__)
+
+
+class HuggingFaceTransformersVlmModel(BasePageModel, HuggingFaceModelDownloadMixin):
+    def __init__(
+        self,
+        enabled: bool,
+        artifacts_path: Optional[Path],
+        accelerator_options: AcceleratorOptions,
+        vlm_options: InlineVlmOptions,
+    ):
+        self.enabled = enabled
+
+        self.vlm_options = vlm_options
+
+        if self.enabled:
+            import torch
+            from transformers import (
+                AutoModel,
+                AutoModelForCausalLM,
+                AutoModelForVision2Seq,
+                AutoProcessor,
+                BitsAndBytesConfig,
+                GenerationConfig,
+            )
+
+            transformers_version = importlib.metadata.version("transformers")
+            if (
+                self.vlm_options.repo_id == "microsoft/Phi-4-multimodal-instruct"
+                and transformers_version >= "4.52.0"
+            ):
+                raise NotImplementedError(
+                    f"Phi 4 only works with transformers<4.52.0 but you have {transformers_version=}. Please downgrage running pip install -U 'transformers<4.52.0'."
+                )
+
+            self.device = decide_device(
+                accelerator_options.device,
+                supported_devices=vlm_options.supported_devices,
+            )
+            _log.debug(f"Available device for VLM: {self.device}")
+
+            self.use_cache = vlm_options.use_kv_cache
+            self.max_new_tokens = vlm_options.max_new_tokens
+            self.temperature = vlm_options.temperature
+
+            repo_cache_folder = vlm_options.repo_id.replace("/", "--")
+
+            if artifacts_path is None:
+                artifacts_path = self.download_models(self.vlm_options.repo_id)
+            elif (artifacts_path / repo_cache_folder).exists():
+                artifacts_path = artifacts_path / repo_cache_folder
+
+            self.param_quantization_config: Optional[BitsAndBytesConfig] = None
+            if vlm_options.quantized:
+                self.param_quantization_config = BitsAndBytesConfig(
+                    load_in_8bit=vlm_options.load_in_8bit,
+                    llm_int8_threshold=vlm_options.llm_int8_threshold,
+                )
+
+            model_cls: Any = AutoModel
+            if (
+                self.vlm_options.transformers_model_type
+                == TransformersModelType.AUTOMODEL_CAUSALLM
+            ):
+                model_cls = AutoModelForCausalLM
+            elif (
+                self.vlm_options.transformers_model_type
+                == TransformersModelType.AUTOMODEL_VISION2SEQ
+            ):
+                model_cls = AutoModelForVision2Seq
+
+            self.processor = AutoProcessor.from_pretrained(
+                artifacts_path,
+                trust_remote_code=vlm_options.trust_remote_code,
+            )
+            self.vlm_model = model_cls.from_pretrained(
+                artifacts_path,
+                device_map=self.device,
+                _attn_implementation=(
+                    "flash_attention_2"
+                    if self.device.startswith("cuda")
+                    and accelerator_options.cuda_use_flash_attention2
+                    else "eager"
+                ),
+                trust_remote_code=vlm_options.trust_remote_code,
+            )
+
+            # Load generation config
+            self.generation_config = GenerationConfig.from_pretrained(artifacts_path)
+
+    def __call__(
+        self, conv_res: ConversionResult, page_batch: Iterable[Page]
+    ) -> Iterable[Page]:
+        for page in page_batch:
+            assert page._backend is not None
+            if not page._backend.is_valid():
+                yield page
+            else:
+                with TimeRecorder(conv_res, "vlm"):
+                    assert page.size is not None
+
+                    hi_res_image = page.get_image(scale=self.vlm_options.scale)
+
+                    # Define prompt structure
+                    prompt = self.formulate_prompt()
+
+                    inputs = self.processor(
+                        text=prompt, images=[hi_res_image], return_tensors="pt"
+                    ).to(self.device)
+
+                    start_time = time.time()
+                    # Call model to generate:
+                    generated_ids = self.vlm_model.generate(
+                        **inputs,
+                        max_new_tokens=self.max_new_tokens,
+                        use_cache=self.use_cache,
+                        temperature=self.temperature,
+                        generation_config=self.generation_config,
+                        **self.vlm_options.extra_generation_config,
+                    )
+
+                    generation_time = time.time() - start_time
+                    generated_texts = self.processor.batch_decode(
+                        generated_ids[:, inputs["input_ids"].shape[1] :],
+                        skip_special_tokens=False,
+                    )[0]
+
+                    num_tokens = len(generated_ids[0])
+                    _log.debug(
+                        f"Generated {num_tokens} tokens in time {generation_time:.2f} seconds."
+                    )
+                    page.predictions.vlm_response = VlmPrediction(
+                        text=generated_texts,
+                        generation_time=generation_time,
+                    )
+
+                yield page
+
+    def formulate_prompt(self) -> str:
+        """Formulate a prompt for the VLM."""
+
+        if self.vlm_options.repo_id == "microsoft/Phi-4-multimodal-instruct":
+            _log.debug("Using specialized prompt for Phi-4")
+            # more info here: https://huggingface.co/microsoft/Phi-4-multimodal-instruct#loading-the-model-locally
+
+            user_prompt = "<|user|>"
+            assistant_prompt = "<|assistant|>"
+            prompt_suffix = "<|end|>"
+
+            prompt = f"{user_prompt}<|image_1|>{self.vlm_options.prompt}{prompt_suffix}{assistant_prompt}"
+            _log.debug(f"prompt for {self.vlm_options.repo_id}: {prompt}")
+
+            return prompt
+
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "This is a page from a document.",
+                    },
+                    {"type": "image"},
+                    {"type": "text", "text": self.vlm_options.prompt},
+                ],
+            }
+        ]
+        prompt = self.processor.apply_chat_template(
+            messages, add_generation_prompt=False
+        )
+        return prompt
diff --git a/docling/models/hf_mlx_model.py b/docling/models/vlm_models_inline/mlx_model.py
similarity index 55%
rename from docling/models/hf_mlx_model.py
rename to docling/models/vlm_models_inline/mlx_model.py
index 63f8fc95..d8b90407 100644
--- a/docling/models/hf_mlx_model.py
+++ b/docling/models/vlm_models_inline/mlx_model.py
@@ -4,29 +4,34 @@ from collections.abc import Iterable
 from pathlib import Path
 from typing import Optional
 
-from docling.datamodel.base_models import Page, VlmPrediction
-from docling.datamodel.document import ConversionResult
-from docling.datamodel.pipeline_options import (
+from docling.datamodel.accelerator_options import (
     AcceleratorOptions,
-    HuggingFaceVlmOptions,
 )
+from docling.datamodel.base_models import Page, VlmPrediction, VlmPredictionToken
+from docling.datamodel.document import ConversionResult
+from docling.datamodel.pipeline_options_vlm_model import InlineVlmOptions
 from docling.models.base_model import BasePageModel
+from docling.models.utils.hf_model_download import (
+    HuggingFaceModelDownloadMixin,
+)
 from docling.utils.profiling import TimeRecorder
 
 _log = logging.getLogger(__name__)
 
 
-class HuggingFaceMlxModel(BasePageModel):
+class HuggingFaceMlxModel(BasePageModel, HuggingFaceModelDownloadMixin):
     def __init__(
         self,
         enabled: bool,
         artifacts_path: Optional[Path],
         accelerator_options: AcceleratorOptions,
-        vlm_options: HuggingFaceVlmOptions,
+        vlm_options: InlineVlmOptions,
     ):
         self.enabled = enabled
 
         self.vlm_options = vlm_options
+        self.max_tokens = vlm_options.max_new_tokens
+        self.temperature = vlm_options.temperature
 
         if self.enabled:
             try:
@@ -39,42 +44,24 @@ class HuggingFaceMlxModel(BasePageModel):
                 )
 
             repo_cache_folder = vlm_options.repo_id.replace("/", "--")
+
             self.apply_chat_template = apply_chat_template
             self.stream_generate = stream_generate
 
             # PARAMETERS:
             if artifacts_path is None:
-                artifacts_path = self.download_models(self.vlm_options.repo_id)
+                artifacts_path = self.download_models(
+                    self.vlm_options.repo_id,
+                )
             elif (artifacts_path / repo_cache_folder).exists():
                 artifacts_path = artifacts_path / repo_cache_folder
 
-            self.param_question = vlm_options.prompt  # "Perform Layout Analysis."
+            self.param_question = vlm_options.prompt
 
             ## Load the model
             self.vlm_model, self.processor = load(artifacts_path)
             self.config = load_config(artifacts_path)
 
-    @staticmethod
-    def download_models(
-        repo_id: str,
-        local_dir: Optional[Path] = None,
-        force: bool = False,
-        progress: bool = False,
-    ) -> Path:
-        from huggingface_hub import snapshot_download
-        from huggingface_hub.utils import disable_progress_bars
-
-        if not progress:
-            disable_progress_bars()
-        download_path = snapshot_download(
-            repo_id=repo_id,
-            force_download=force,
-            local_dir=local_dir,
-            # revision="v0.0.1",
-        )
-
-        return Path(download_path)
-
     def __call__(
         self, conv_res: ConversionResult, page_batch: Iterable[Page]
     ) -> Iterable[Page]:
@@ -83,12 +70,10 @@ class HuggingFaceMlxModel(BasePageModel):
             if not page._backend.is_valid():
                 yield page
             else:
-                with TimeRecorder(conv_res, "vlm"):
+                with TimeRecorder(conv_res, f"vlm-mlx-{self.vlm_options.repo_id}"):
                     assert page.size is not None
 
-                    hi_res_image = page.get_image(scale=2.0)  # 144dpi
-                    # hi_res_image = page.get_image(scale=1.0)  # 72dpi
-
+                    hi_res_image = page.get_image(scale=self.vlm_options.scale)
                     if hi_res_image is not None:
                         im_width, im_height = hi_res_image.size
 
@@ -104,16 +89,45 @@ class HuggingFaceMlxModel(BasePageModel):
                     )
 
                     start_time = time.time()
+                    _log.debug("start generating ...")
+
                     # Call model to generate:
+                    tokens: list[VlmPredictionToken] = []
+
                     output = ""
                     for token in self.stream_generate(
                         self.vlm_model,
                         self.processor,
                         prompt,
                         [hi_res_image],
-                        max_tokens=4096,
+                        max_tokens=self.max_tokens,
                         verbose=False,
+                        temp=self.temperature,
                     ):
+                        if len(token.logprobs.shape) == 1:
+                            tokens.append(
+                                VlmPredictionToken(
+                                    text=token.text,
+                                    token=token.token,
+                                    logprob=token.logprobs[token.token],
+                                )
+                            )
+                        elif (
+                            len(token.logprobs.shape) == 2
+                            and token.logprobs.shape[0] == 1
+                        ):
+                            tokens.append(
+                                VlmPredictionToken(
+                                    text=token.text,
+                                    token=token.token,
+                                    logprob=token.logprobs[0, token.token],
+                                )
+                            )
+                        else:
+                            _log.warning(
+                                f"incompatible shape for logprobs: {token.logprobs.shape}"
+                            )
+
                         output += token.text
                         if "</doctag>" in token.text:
                             break
@@ -121,15 +135,13 @@ class HuggingFaceMlxModel(BasePageModel):
                     generation_time = time.time() - start_time
                     page_tags = output
 
-                    _log.debug(f"Generation time {generation_time:.2f} seconds.")
-
-                    # inference_time = time.time() - start_time
-                    # tokens_per_second = num_tokens / generation_time
-                    # print("")
-                    # print(f"Page Inference Time: {inference_time:.2f} seconds")
-                    # print(f"Total tokens on page: {num_tokens:.2f}")
-                    # print(f"Tokens/sec: {tokens_per_second:.2f}")
-                    # print("")
-                    page.predictions.vlm_response = VlmPrediction(text=page_tags)
+                    _log.debug(
+                        f"{generation_time:.2f} seconds for {len(tokens)} tokens ({len(tokens) / generation_time} tokens/sec)."
+                    )
+                    page.predictions.vlm_response = VlmPrediction(
+                        text=page_tags,
+                        generation_time=generation_time,
+                        generated_tokens=tokens,
+                    )
 
                 yield page
diff --git a/docling/pipeline/standard_pdf_pipeline.py b/docling/pipeline/standard_pdf_pipeline.py
index 4269900c..88317fd3 100644
--- a/docling/pipeline/standard_pdf_pipeline.py
+++ b/docling/pipeline/standard_pdf_pipeline.py
@@ -8,7 +8,7 @@ from docling_core.types.doc import DocItem, ImageRef, PictureItem, TableItem
 
 from docling.backend.abstract_backend import AbstractDocumentBackend
 from docling.backend.pdf_backend import PdfDocumentBackend
-from docling.datamodel.base_models import AssembledUnit, Page, PageConfidenceScores
+from docling.datamodel.base_models import AssembledUnit, Page
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import PdfPipelineOptions
 from docling.datamodel.settings import settings
@@ -55,11 +55,13 @@ class StandardPdfPipeline(PaginatedPipeline):
                 "When defined, it must point to a folder containing all models required by the pipeline."
             )
 
-        self.keep_images = (
-            self.pipeline_options.generate_page_images
-            or self.pipeline_options.generate_picture_images
-            or self.pipeline_options.generate_table_images
-        )
+        with warnings.catch_warnings():  # deprecated generate_table_images
+            warnings.filterwarnings("ignore", category=DeprecationWarning)
+            self.keep_images = (
+                self.pipeline_options.generate_page_images
+                or self.pipeline_options.generate_picture_images
+                or self.pipeline_options.generate_table_images
+            )
 
         self.reading_order_model = ReadingOrderModel(options=ReadingOrderOptions())
 
@@ -210,64 +212,74 @@ class StandardPdfPipeline(PaginatedPipeline):
                     )
 
             # Generate images of the requested element types
-            if (
-                self.pipeline_options.generate_picture_images
-                or self.pipeline_options.generate_table_images
-            ):
-                scale = self.pipeline_options.images_scale
-                for element, _level in conv_res.document.iterate_items():
-                    if not isinstance(element, DocItem) or len(element.prov) == 0:
-                        continue
-                    if (
-                        isinstance(element, PictureItem)
-                        and self.pipeline_options.generate_picture_images
-                    ) or (
-                        isinstance(element, TableItem)
-                        and self.pipeline_options.generate_table_images
-                    ):
-                        page_ix = element.prov[0].page_no - 1
-                        page = next(
-                            (p for p in conv_res.pages if p.page_no == page_ix),
-                            cast("Page", None),
-                        )
-                        assert page is not None
-                        assert page.size is not None
-                        assert page.image is not None
+            with warnings.catch_warnings():  # deprecated generate_table_images
+                warnings.filterwarnings("ignore", category=DeprecationWarning)
+                if (
+                    self.pipeline_options.generate_picture_images
+                    or self.pipeline_options.generate_table_images
+                ):
+                    scale = self.pipeline_options.images_scale
+                    for element, _level in conv_res.document.iterate_items():
+                        if not isinstance(element, DocItem) or len(element.prov) == 0:
+                            continue
+                        if (
+                            isinstance(element, PictureItem)
+                            and self.pipeline_options.generate_picture_images
+                        ) or (
+                            isinstance(element, TableItem)
+                            and self.pipeline_options.generate_table_images
+                        ):
+                            page_ix = element.prov[0].page_no - 1
+                            page = next(
+                                (p for p in conv_res.pages if p.page_no == page_ix),
+                                cast("Page", None),
+                            )
+                            assert page is not None
+                            assert page.size is not None
+                            assert page.image is not None
 
-                        crop_bbox = (
-                            element.prov[0]
-                            .bbox.scaled(scale=scale)
-                            .to_top_left_origin(page_height=page.size.height * scale)
-                        )
+                            crop_bbox = (
+                                element.prov[0]
+                                .bbox.scaled(scale=scale)
+                                .to_top_left_origin(
+                                    page_height=page.size.height * scale
+                                )
+                            )
 
-                        cropped_im = page.image.crop(crop_bbox.as_tuple())
-                        element.image = ImageRef.from_pil(
-                            cropped_im, dpi=int(72 * scale)
-                        )
+                            cropped_im = page.image.crop(crop_bbox.as_tuple())
+                            element.image = ImageRef.from_pil(
+                                cropped_im, dpi=int(72 * scale)
+                            )
 
             # Aggregate confidence values for document:
             if len(conv_res.pages) > 0:
-                conv_res.confidence.layout_score = float(
-                    np.nanmean(
-                        [c.layout_score for c in conv_res.confidence.pages.values()]
+                with warnings.catch_warnings():
+                    warnings.filterwarnings(
+                        "ignore",
+                        category=RuntimeWarning,
+                        message="Mean of empty slice|All-NaN slice encountered",
                     )
-                )
-                conv_res.confidence.parse_score = float(
-                    np.nanquantile(
-                        [c.parse_score for c in conv_res.confidence.pages.values()],
-                        q=0.1,  # parse score should relate to worst 10% of pages.
+                    conv_res.confidence.layout_score = float(
+                        np.nanmean(
+                            [c.layout_score for c in conv_res.confidence.pages.values()]
+                        )
                     )
-                )
-                conv_res.confidence.table_score = float(
-                    np.nanmean(
-                        [c.table_score for c in conv_res.confidence.pages.values()]
+                    conv_res.confidence.parse_score = float(
+                        np.nanquantile(
+                            [c.parse_score for c in conv_res.confidence.pages.values()],
+                            q=0.1,  # parse score should relate to worst 10% of pages.
+                        )
                     )
-                )
-                conv_res.confidence.ocr_score = float(
-                    np.nanmean(
-                        [c.ocr_score for c in conv_res.confidence.pages.values()]
+                    conv_res.confidence.table_score = float(
+                        np.nanmean(
+                            [c.table_score for c in conv_res.confidence.pages.values()]
+                        )
+                    )
+                    conv_res.confidence.ocr_score = float(
+                        np.nanmean(
+                            [c.ocr_score for c in conv_res.confidence.pages.values()]
+                        )
                     )
-                )
 
         return conv_res
 
diff --git a/docling/pipeline/vlm_pipeline.py b/docling/pipeline/vlm_pipeline.py
index 70a46920..2ecfe55a 100644
--- a/docling/pipeline/vlm_pipeline.py
+++ b/docling/pipeline/vlm_pipeline.py
@@ -1,29 +1,46 @@
 import logging
+import re
 from io import BytesIO
 from pathlib import Path
 from typing import List, Optional, Union, cast
 
-from docling_core.types import DoclingDocument
-from docling_core.types.doc import BoundingBox, DocItem, ImageRef, PictureItem, TextItem
+from docling_core.types.doc import (
+    BoundingBox,
+    DocItem,
+    DoclingDocument,
+    ImageRef,
+    PictureItem,
+    ProvenanceItem,
+    TextItem,
+)
+from docling_core.types.doc.base import (
+    BoundingBox,
+    Size,
+)
 from docling_core.types.doc.document import DocTagsDocument
 from PIL import Image as PILImage
 
 from docling.backend.abstract_backend import AbstractDocumentBackend
+from docling.backend.html_backend import HTMLDocumentBackend
 from docling.backend.md_backend import MarkdownDocumentBackend
 from docling.backend.pdf_backend import PdfDocumentBackend
 from docling.datamodel.base_models import InputFormat, Page
 from docling.datamodel.document import ConversionResult, InputDocument
 from docling.datamodel.pipeline_options import (
-    ApiVlmOptions,
-    HuggingFaceVlmOptions,
-    InferenceFramework,
-    ResponseFormat,
     VlmPipelineOptions,
 )
+from docling.datamodel.pipeline_options_vlm_model import (
+    ApiVlmOptions,
+    InferenceFramework,
+    InlineVlmOptions,
+    ResponseFormat,
+)
 from docling.datamodel.settings import settings
 from docling.models.api_vlm_model import ApiVlmModel
-from docling.models.hf_mlx_model import HuggingFaceMlxModel
-from docling.models.hf_vlm_model import HuggingFaceVlmModel
+from docling.models.vlm_models_inline.hf_transformers_model import (
+    HuggingFaceTransformersVlmModel,
+)
+from docling.models.vlm_models_inline.mlx_model import HuggingFaceMlxModel
 from docling.pipeline.base_pipeline import PaginatedPipeline
 from docling.utils.profiling import ProfilingScope, TimeRecorder
 
@@ -66,8 +83,8 @@ class VlmPipeline(PaginatedPipeline):
                     vlm_options=cast(ApiVlmOptions, self.pipeline_options.vlm_options),
                 ),
             ]
-        elif isinstance(self.pipeline_options.vlm_options, HuggingFaceVlmOptions):
-            vlm_options = cast(HuggingFaceVlmOptions, self.pipeline_options.vlm_options)
+        elif isinstance(self.pipeline_options.vlm_options, InlineVlmOptions):
+            vlm_options = cast(InlineVlmOptions, self.pipeline_options.vlm_options)
             if vlm_options.inference_framework == InferenceFramework.MLX:
                 self.build_pipe = [
                     HuggingFaceMlxModel(
@@ -77,15 +94,19 @@ class VlmPipeline(PaginatedPipeline):
                         vlm_options=vlm_options,
                     ),
                 ]
-            else:
+            elif vlm_options.inference_framework == InferenceFramework.TRANSFORMERS:
                 self.build_pipe = [
-                    HuggingFaceVlmModel(
+                    HuggingFaceTransformersVlmModel(
                         enabled=True,  # must be always enabled for this pipeline to make sense.
                         artifacts_path=artifacts_path,
                         accelerator_options=pipeline_options.accelerator_options,
                         vlm_options=vlm_options,
                     ),
                 ]
+            else:
+                raise ValueError(
+                    f"Could not instantiate the right type of VLM pipeline: {vlm_options.inference_framework}"
+                )
 
         self.enrichment_pipe = [
             # Other models working on `NodeItem` elements in the DoclingDocument
@@ -116,49 +137,19 @@ class VlmPipeline(PaginatedPipeline):
                 self.pipeline_options.vlm_options.response_format
                 == ResponseFormat.DOCTAGS
             ):
-                doctags_list = []
-                image_list = []
-                for page in conv_res.pages:
-                    predicted_doctags = ""
-                    img = PILImage.new("RGB", (1, 1), "rgb(255,255,255)")
-                    if page.predictions.vlm_response:
-                        predicted_doctags = page.predictions.vlm_response.text
-                    if page.image:
-                        img = page.image
-                    image_list.append(img)
-                    doctags_list.append(predicted_doctags)
+                conv_res.document = self._turn_dt_into_doc(conv_res)
 
-                doctags_list_c = cast(List[Union[Path, str]], doctags_list)
-                image_list_c = cast(List[Union[Path, PILImage.Image]], image_list)
-                doctags_doc = DocTagsDocument.from_doctags_and_image_pairs(
-                    doctags_list_c, image_list_c
-                )
-                conv_res.document = DoclingDocument.load_from_doctags(doctags_doc)
-
-                # If forced backend text, replace model predicted text with backend one
-                if self.force_backend_text:
-                    scale = self.pipeline_options.images_scale
-                    for element, _level in conv_res.document.iterate_items():
-                        if not isinstance(element, TextItem) or len(element.prov) == 0:
-                            continue
-                        page_ix = element.prov[0].page_no - 1
-                        page = conv_res.pages[page_ix]
-                        if not page.size:
-                            continue
-                        crop_bbox = (
-                            element.prov[0]
-                            .bbox.scaled(scale=scale)
-                            .to_top_left_origin(page_height=page.size.height * scale)
-                        )
-                        txt = self.extract_text_from_backend(page, crop_bbox)
-                        element.text = txt
-                        element.orig = txt
             elif (
                 self.pipeline_options.vlm_options.response_format
                 == ResponseFormat.MARKDOWN
             ):
                 conv_res.document = self._turn_md_into_doc(conv_res)
 
+            elif (
+                self.pipeline_options.vlm_options.response_format == ResponseFormat.HTML
+            ):
+                conv_res.document = self._turn_html_into_doc(conv_res)
+
             else:
                 raise RuntimeError(
                     f"Unsupported VLM response format {self.pipeline_options.vlm_options.response_format}"
@@ -192,23 +183,199 @@ class VlmPipeline(PaginatedPipeline):
 
         return conv_res
 
-    def _turn_md_into_doc(self, conv_res):
-        predicted_text = ""
-        for pg_idx, page in enumerate(conv_res.pages):
+    def _turn_dt_into_doc(self, conv_res) -> DoclingDocument:
+        doctags_list = []
+        image_list = []
+        for page in conv_res.pages:
+            predicted_doctags = ""
+            img = PILImage.new("RGB", (1, 1), "rgb(255,255,255)")
             if page.predictions.vlm_response:
-                predicted_text += page.predictions.vlm_response.text + "\n\n"
-        response_bytes = BytesIO(predicted_text.encode("utf8"))
-        out_doc = InputDocument(
-            path_or_stream=response_bytes,
-            filename=conv_res.input.file.name,
-            format=InputFormat.MD,
-            backend=MarkdownDocumentBackend,
+                predicted_doctags = page.predictions.vlm_response.text
+            if page.image:
+                img = page.image
+            image_list.append(img)
+            doctags_list.append(predicted_doctags)
+
+        doctags_list_c = cast(List[Union[Path, str]], doctags_list)
+        image_list_c = cast(List[Union[Path, PILImage.Image]], image_list)
+        doctags_doc = DocTagsDocument.from_doctags_and_image_pairs(
+            doctags_list_c, image_list_c
         )
-        backend = MarkdownDocumentBackend(
-            in_doc=out_doc,
-            path_or_stream=response_bytes,
+        conv_res.document = DoclingDocument.load_from_doctags(
+            doctag_document=doctags_doc
         )
-        return backend.convert()
+
+        # If forced backend text, replace model predicted text with backend one
+        if page.size:
+            if self.force_backend_text:
+                scale = self.pipeline_options.images_scale
+                for element, _level in conv_res.document.iterate_items():
+                    if not isinstance(element, TextItem) or len(element.prov) == 0:
+                        continue
+                    crop_bbox = (
+                        element.prov[0]
+                        .bbox.scaled(scale=scale)
+                        .to_top_left_origin(page_height=page.size.height * scale)
+                    )
+                    txt = self.extract_text_from_backend(page, crop_bbox)
+                    element.text = txt
+                    element.orig = txt
+
+        return conv_res.document
+
+    def _turn_md_into_doc(self, conv_res):
+        def _extract_markdown_code(text):
+            """
+            Extracts text from markdown code blocks (enclosed in triple backticks).
+            If no code blocks are found, returns the original text.
+
+            Args:
+                text (str): Input text that may contain markdown code blocks
+
+            Returns:
+                str: Extracted code if code blocks exist, otherwise original text
+            """
+            # Regex pattern to match content between triple backticks
+            # This handles multiline content and optional language specifier
+            pattern = r"^```(?:\w*\n)?(.*?)```(\n)*$"
+
+            # Search with DOTALL flag to match across multiple lines
+            mtch = re.search(pattern, text, re.DOTALL)
+
+            if mtch:
+                # Return only the content of the first capturing group
+                return mtch.group(1)
+            else:
+                # No code blocks found, return original text
+                return text
+
+        for pg_idx, page in enumerate(conv_res.pages):
+            page_no = pg_idx + 1  # FIXME: might be incorrect
+
+            predicted_text = ""
+            if page.predictions.vlm_response:
+                predicted_text = page.predictions.vlm_response.text + "\n\n"
+
+            predicted_text = _extract_markdown_code(text=predicted_text)
+
+            response_bytes = BytesIO(predicted_text.encode("utf8"))
+            out_doc = InputDocument(
+                path_or_stream=response_bytes,
+                filename=conv_res.input.file.name,
+                format=InputFormat.MD,
+                backend=MarkdownDocumentBackend,
+            )
+            backend = MarkdownDocumentBackend(
+                in_doc=out_doc,
+                path_or_stream=response_bytes,
+            )
+            page_doc = backend.convert()
+
+            if page.image is not None:
+                pg_width = page.image.width
+                pg_height = page.image.height
+            else:
+                pg_width = 1
+                pg_height = 1
+
+            conv_res.document.add_page(
+                page_no=page_no,
+                size=Size(width=pg_width, height=pg_height),
+                image=ImageRef.from_pil(image=page.image, dpi=72)
+                if page.image
+                else None,
+            )
+
+            for item, level in page_doc.iterate_items():
+                item.prov = [
+                    ProvenanceItem(
+                        page_no=pg_idx + 1,
+                        bbox=BoundingBox(
+                            t=0.0, b=0.0, l=0.0, r=0.0
+                        ),  # FIXME: would be nice not to have to "fake" it
+                        charspan=[0, 0],
+                    )
+                ]
+                conv_res.document.append_child_item(child=item)
+
+        return conv_res.document
+
+    def _turn_html_into_doc(self, conv_res):
+        def _extract_html_code(text):
+            """
+            Extracts text from markdown code blocks (enclosed in triple backticks).
+            If no code blocks are found, returns the original text.
+
+            Args:
+                text (str): Input text that may contain markdown code blocks
+
+            Returns:
+                str: Extracted code if code blocks exist, otherwise original text
+            """
+            # Regex pattern to match content between triple backticks
+            # This handles multiline content and optional language specifier
+            pattern = r"^```(?:\w*\n)?(.*?)```(\n)*$"
+
+            # Search with DOTALL flag to match across multiple lines
+            mtch = re.search(pattern, text, re.DOTALL)
+
+            if mtch:
+                # Return only the content of the first capturing group
+                return mtch.group(1)
+            else:
+                # No code blocks found, return original text
+                return text
+
+        for pg_idx, page in enumerate(conv_res.pages):
+            page_no = pg_idx + 1  # FIXME: might be incorrect
+
+            predicted_text = ""
+            if page.predictions.vlm_response:
+                predicted_text = page.predictions.vlm_response.text + "\n\n"
+
+            predicted_text = _extract_html_code(text=predicted_text)
+
+            response_bytes = BytesIO(predicted_text.encode("utf8"))
+            out_doc = InputDocument(
+                path_or_stream=response_bytes,
+                filename=conv_res.input.file.name,
+                format=InputFormat.MD,
+                backend=HTMLDocumentBackend,
+            )
+            backend = HTMLDocumentBackend(
+                in_doc=out_doc,
+                path_or_stream=response_bytes,
+            )
+            page_doc = backend.convert()
+
+            if page.image is not None:
+                pg_width = page.image.width
+                pg_height = page.image.height
+            else:
+                pg_width = 1
+                pg_height = 1
+
+            conv_res.document.add_page(
+                page_no=page_no,
+                size=Size(width=pg_width, height=pg_height),
+                image=ImageRef.from_pil(image=page.image, dpi=72)
+                if page.image
+                else None,
+            )
+
+            for item, level in page_doc.iterate_items():
+                item.prov = [
+                    ProvenanceItem(
+                        page_no=pg_idx + 1,
+                        bbox=BoundingBox(
+                            t=0.0, b=0.0, l=0.0, r=0.0
+                        ),  # FIXME: would be nice not to have to "fake" it
+                        charspan=[0, 0],
+                    )
+                ]
+                conv_res.document.append_child_item(child=item)
+
+        return conv_res.document
 
     @classmethod
     def get_default_options(cls) -> VlmPipelineOptions:
diff --git a/docling/utils/accelerator_utils.py b/docling/utils/accelerator_utils.py
index 8c930250..09b6651e 100644
--- a/docling/utils/accelerator_utils.py
+++ b/docling/utils/accelerator_utils.py
@@ -1,13 +1,16 @@
 import logging
+from typing import List, Optional
 
 import torch
 
-from docling.datamodel.pipeline_options import AcceleratorDevice
+from docling.datamodel.accelerator_options import AcceleratorDevice
 
 _log = logging.getLogger(__name__)
 
 
-def decide_device(accelerator_device: str) -> str:
+def decide_device(
+    accelerator_device: str, supported_devices: Optional[List[AcceleratorDevice]] = None
+) -> str:
     r"""
     Resolve the device based on the acceleration options and the available devices in the system.
 
@@ -20,6 +23,18 @@ def decide_device(accelerator_device: str) -> str:
     has_cuda = torch.backends.cuda.is_built() and torch.cuda.is_available()
     has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available()
 
+    if supported_devices is not None:
+        if has_cuda and AcceleratorDevice.CUDA not in supported_devices:
+            _log.info(
+                f"Removing CUDA from available devices because it is not in {supported_devices=}"
+            )
+            has_cuda = False
+        if has_mps and AcceleratorDevice.MPS not in supported_devices:
+            _log.info(
+                f"Removing MPS from available devices because it is not in {supported_devices=}"
+            )
+            has_mps = False
+
     if accelerator_device == AcceleratorDevice.AUTO.value:  # Handle 'auto'
         if has_cuda:
             device = "cuda:0"
diff --git a/docling/utils/model_downloader.py b/docling/utils/model_downloader.py
index eb20f255..55383c03 100644
--- a/docling/utils/model_downloader.py
+++ b/docling/utils/model_downloader.py
@@ -4,18 +4,20 @@ from typing import Optional
 
 from docling.datamodel.pipeline_options import (
     granite_picture_description,
-    smoldocling_vlm_conversion_options,
-    smoldocling_vlm_mlx_conversion_options,
     smolvlm_picture_description,
 )
 from docling.datamodel.settings import settings
+from docling.datamodel.vlm_model_specs import (
+    SMOLDOCLING_MLX,
+    SMOLDOCLING_TRANSFORMERS,
+)
 from docling.models.code_formula_model import CodeFormulaModel
 from docling.models.document_picture_classifier import DocumentPictureClassifier
 from docling.models.easyocr_model import EasyOcrModel
-from docling.models.hf_vlm_model import HuggingFaceVlmModel
 from docling.models.layout_model import LayoutModel
 from docling.models.picture_description_vlm_model import PictureDescriptionVlmModel
 from docling.models.table_structure_model import TableStructureModel
+from docling.models.utils.hf_model_download import download_hf_model
 
 _log = logging.getLogger(__name__)
 
@@ -75,7 +77,7 @@ def download_models(
 
     if with_smolvlm:
         _log.info("Downloading SmolVlm model...")
-        PictureDescriptionVlmModel.download_models(
+        download_hf_model(
             repo_id=smolvlm_picture_description.repo_id,
             local_dir=output_dir / smolvlm_picture_description.repo_cache_folder,
             force=force,
@@ -84,26 +86,25 @@ def download_models(
 
     if with_smoldocling:
         _log.info("Downloading SmolDocling model...")
-        HuggingFaceVlmModel.download_models(
-            repo_id=smoldocling_vlm_conversion_options.repo_id,
-            local_dir=output_dir / smoldocling_vlm_conversion_options.repo_cache_folder,
+        download_hf_model(
+            repo_id=SMOLDOCLING_TRANSFORMERS.repo_id,
+            local_dir=output_dir / SMOLDOCLING_TRANSFORMERS.repo_cache_folder,
             force=force,
             progress=progress,
         )
 
     if with_smoldocling_mlx:
         _log.info("Downloading SmolDocling MLX model...")
-        HuggingFaceVlmModel.download_models(
-            repo_id=smoldocling_vlm_mlx_conversion_options.repo_id,
-            local_dir=output_dir
-            / smoldocling_vlm_mlx_conversion_options.repo_cache_folder,
+        download_hf_model(
+            repo_id=SMOLDOCLING_MLX.repo_id,
+            local_dir=output_dir / SMOLDOCLING_MLX.repo_cache_folder,
             force=force,
             progress=progress,
         )
 
     if with_granite_vision:
         _log.info("Downloading Granite Vision model...")
-        PictureDescriptionVlmModel.download_models(
+        download_hf_model(
             repo_id=granite_picture_description.repo_id,
             local_dir=output_dir / granite_picture_description.repo_cache_folder,
             force=force,
diff --git a/docs/examples/compare_vlm_models.py b/docs/examples/compare_vlm_models.py
new file mode 100644
index 00000000..f9bd2dcd
--- /dev/null
+++ b/docs/examples/compare_vlm_models.py
@@ -0,0 +1,160 @@
+# Compare VLM models
+# ==================
+#
+# This example runs the VLM pipeline with different vision-language models.
+# Their runtime as well output quality is compared.
+
+import json
+import sys
+import time
+from pathlib import Path
+
+from docling_core.types.doc import DocItemLabel, ImageRefMode
+from docling_core.types.doc.document import DEFAULT_EXPORT_LABELS
+from tabulate import tabulate
+
+from docling.datamodel import vlm_model_specs
+from docling.datamodel.base_models import InputFormat
+from docling.datamodel.pipeline_options import (
+    VlmPipelineOptions,
+)
+from docling.datamodel.pipeline_options_vlm_model import InferenceFramework
+from docling.document_converter import DocumentConverter, PdfFormatOption
+from docling.pipeline.vlm_pipeline import VlmPipeline
+
+
+def convert(sources: list[Path], converter: DocumentConverter):
+    model_id = pipeline_options.vlm_options.repo_id.replace("/", "_")
+    framework = pipeline_options.vlm_options.inference_framework
+    for source in sources:
+        print("================================================")
+        print("Processing...")
+        print(f"Source: {source}")
+        print("---")
+        print(f"Model: {model_id}")
+        print(f"Framework: {framework}")
+        print("================================================")
+        print("")
+
+        res = converter.convert(source)
+
+        print("")
+
+        fname = f"{res.input.file.stem}-{model_id}-{framework}"
+
+        inference_time = 0.0
+        for i, page in enumerate(res.pages):
+            inference_time += page.predictions.vlm_response.generation_time
+            print("")
+            print(
+                f" ---------- Predicted page {i} in {pipeline_options.vlm_options.response_format} in {page.predictions.vlm_response.generation_time} [sec]:"
+            )
+            print(page.predictions.vlm_response.text)
+            print(" ---------- ")
+
+        print("===== Final output of the converted document =======")
+
+        with (out_path / f"{fname}.json").open("w") as fp:
+            fp.write(json.dumps(res.document.export_to_dict()))
+
+        res.document.save_as_json(
+            out_path / f"{fname}.json",
+            image_mode=ImageRefMode.PLACEHOLDER,
+        )
+        print(f" => produced {out_path / fname}.json")
+
+        res.document.save_as_markdown(
+            out_path / f"{fname}.md",
+            image_mode=ImageRefMode.PLACEHOLDER,
+        )
+        print(f" => produced {out_path / fname}.md")
+
+        res.document.save_as_html(
+            out_path / f"{fname}.html",
+            image_mode=ImageRefMode.EMBEDDED,
+            labels=[*DEFAULT_EXPORT_LABELS, DocItemLabel.FOOTNOTE],
+            split_page_view=True,
+        )
+        print(f" => produced {out_path / fname}.html")
+
+        pg_num = res.document.num_pages()
+        print("")
+        print(
+            f"Total document prediction time: {inference_time:.2f} seconds, pages: {pg_num}"
+        )
+        print("====================================================")
+
+        return [
+            source,
+            model_id,
+            str(framework),
+            pg_num,
+            inference_time,
+        ]
+
+
+if __name__ == "__main__":
+    sources = [
+        "tests/data/pdf/2305.03393v1-pg9.pdf",
+    ]
+
+    out_path = Path("scratch")
+    out_path.mkdir(parents=True, exist_ok=True)
+
+    ## Use VlmPipeline
+    pipeline_options = VlmPipelineOptions()
+    pipeline_options.generate_page_images = True
+
+    ## On GPU systems, enable flash_attention_2 with CUDA:
+    # pipeline_options.accelerator_options.device = AcceleratorDevice.CUDA
+    # pipeline_options.accelerator_options.cuda_use_flash_attention2 = True
+
+    vlm_models = [
+        ## DocTags / SmolDocling models
+        vlm_model_specs.SMOLDOCLING_MLX,
+        vlm_model_specs.SMOLDOCLING_TRANSFORMERS,
+        ## Markdown models (using MLX framework)
+        vlm_model_specs.QWEN25_VL_3B_MLX,
+        vlm_model_specs.PIXTRAL_12B_MLX,
+        vlm_model_specs.GEMMA3_12B_MLX,
+        ## Markdown models (using Transformers framework)
+        vlm_model_specs.GRANITE_VISION_TRANSFORMERS,
+        vlm_model_specs.PHI4_TRANSFORMERS,
+        vlm_model_specs.PIXTRAL_12B_TRANSFORMERS,
+    ]
+
+    # Remove MLX models if not on Mac
+    if sys.platform != "darwin":
+        vlm_models = [
+            m for m in vlm_models if m.inference_framework != InferenceFramework.MLX
+        ]
+
+    rows = []
+    for vlm_options in vlm_models:
+        pipeline_options.vlm_options = vlm_options
+
+        ## Set up pipeline for PDF or image inputs
+        converter = DocumentConverter(
+            format_options={
+                InputFormat.PDF: PdfFormatOption(
+                    pipeline_cls=VlmPipeline,
+                    pipeline_options=pipeline_options,
+                ),
+                InputFormat.IMAGE: PdfFormatOption(
+                    pipeline_cls=VlmPipeline,
+                    pipeline_options=pipeline_options,
+                ),
+            },
+        )
+
+        row = convert(sources=sources, converter=converter)
+        rows.append(row)
+
+        print(
+            tabulate(
+                rows, headers=["source", "model_id", "framework", "num_pages", "time"]
+            )
+        )
+
+        print("see if memory gets released ...")
+        time.sleep(10)
diff --git a/docs/examples/custom_convert.py b/docs/examples/custom_convert.py
index 3b8ae6df..12dfacd5 100644
--- a/docs/examples/custom_convert.py
+++ b/docs/examples/custom_convert.py
@@ -3,10 +3,9 @@ import logging
 import time
 from pathlib import Path
 
+from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
 from docling.datamodel.base_models import InputFormat
 from docling.datamodel.pipeline_options import (
-    AcceleratorDevice,
-    AcceleratorOptions,
     PdfPipelineOptions,
 )
 from docling.document_converter import DocumentConverter, PdfFormatOption
diff --git a/docs/examples/minimal_vlm_pipeline.py b/docs/examples/minimal_vlm_pipeline.py
index 96288b08..666c0604 100644
--- a/docs/examples/minimal_vlm_pipeline.py
+++ b/docs/examples/minimal_vlm_pipeline.py
@@ -1,101 +1,46 @@
-import json
-import time
-from pathlib import Path
-
-from docling_core.types.doc import DocItemLabel, ImageRefMode
-from docling_core.types.doc.document import DEFAULT_EXPORT_LABELS
-
+from docling.datamodel import vlm_model_specs
 from docling.datamodel.base_models import InputFormat
 from docling.datamodel.pipeline_options import (
     VlmPipelineOptions,
-    smoldocling_vlm_mlx_conversion_options,
 )
 from docling.document_converter import DocumentConverter, PdfFormatOption
 from docling.pipeline.vlm_pipeline import VlmPipeline
 
-sources = [
-    # "tests/data/2305.03393v1-pg9-img.png",
-    "tests/data/pdf/2305.03393v1-pg9.pdf",
-]
+source = "https://arxiv.org/pdf/2501.17887"
 
-## Use experimental VlmPipeline
-pipeline_options = VlmPipelineOptions()
-# If force_backend_text = True, text from backend will be used instead of generated text
-pipeline_options.force_backend_text = False
+###### USING SIMPLE DEFAULT VALUES
+# - SmolDocling model
+# - Using the transformers framework
 
-## On GPU systems, enable flash_attention_2 with CUDA:
-# pipeline_options.accelerator_options.device = AcceleratorDevice.CUDA
-# pipeline_options.accelerator_options.cuda_use_flash_attention2 = True
+converter = DocumentConverter(
+    format_options={
+        InputFormat.PDF: PdfFormatOption(
+            pipeline_cls=VlmPipeline,
+        ),
+    }
+)
 
-## Pick a VLM model. We choose SmolDocling-256M by default
-# pipeline_options.vlm_options = smoldocling_vlm_conversion_options
+doc = converter.convert(source=source).document
 
-## Pick a VLM model. Fast Apple Silicon friendly implementation for SmolDocling-256M via MLX
-pipeline_options.vlm_options = smoldocling_vlm_mlx_conversion_options
+print(doc.export_to_markdown())
 
-## Alternative VLM models:
-# pipeline_options.vlm_options = granite_vision_vlm_conversion_options
 
-## Set up pipeline for PDF or image inputs
+###### USING MACOS MPS ACCELERATOR
+# For more options see the compare_vlm_models.py example.
+
+pipeline_options = VlmPipelineOptions(
+    vlm_options=vlm_model_specs.SMOLDOCLING_MLX,
+)
+
 converter = DocumentConverter(
     format_options={
         InputFormat.PDF: PdfFormatOption(
             pipeline_cls=VlmPipeline,
             pipeline_options=pipeline_options,
         ),
-        InputFormat.IMAGE: PdfFormatOption(
-            pipeline_cls=VlmPipeline,
-            pipeline_options=pipeline_options,
-        ),
     }
 )
 
-out_path = Path("scratch")
-out_path.mkdir(parents=True, exist_ok=True)
+doc = converter.convert(source=source).document
 
-for source in sources:
-    start_time = time.time()
-    print("================================================")
-    print(f"Processing... {source}")
-    print("================================================")
-    print("")
-
-    res = converter.convert(source)
-
-    print("")
-    print(res.document.export_to_markdown())
-
-    for page in res.pages:
-        print("")
-        print("Predicted page in DOCTAGS:")
-        print(page.predictions.vlm_response.text)
-
-    res.document.save_as_html(
-        filename=Path(f"{out_path}/{res.input.file.stem}.html"),
-        image_mode=ImageRefMode.REFERENCED,
-        labels=[*DEFAULT_EXPORT_LABELS, DocItemLabel.FOOTNOTE],
-    )
-
-    with (out_path / f"{res.input.file.stem}.json").open("w") as fp:
-        fp.write(json.dumps(res.document.export_to_dict()))
-
-    res.document.save_as_json(
-        out_path / f"{res.input.file.stem}.json",
-        image_mode=ImageRefMode.PLACEHOLDER,
-    )
-
-    res.document.save_as_markdown(
-        out_path / f"{res.input.file.stem}.md",
-        image_mode=ImageRefMode.PLACEHOLDER,
-    )
-
-    pg_num = res.document.num_pages()
-    print("")
-    inference_time = time.time() - start_time
-    print(
-        f"Total document prediction time: {inference_time:.2f} seconds, pages: {pg_num}"
-    )
-
-print("================================================")
-print("done!")
-print("================================================")
+print(doc.export_to_markdown())
diff --git a/docs/examples/run_with_accelerator.py b/docs/examples/run_with_accelerator.py
index a5380740..6b3ddc68 100644
--- a/docs/examples/run_with_accelerator.py
+++ b/docs/examples/run_with_accelerator.py
@@ -1,9 +1,8 @@
 from pathlib import Path
 
+from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
 from docling.datamodel.base_models import InputFormat
 from docling.datamodel.pipeline_options import (
-    AcceleratorDevice,
-    AcceleratorOptions,
     PdfPipelineOptions,
 )
 from docling.datamodel.settings import settings
diff --git a/docs/examples/translate.py b/docs/examples/translate.py
index 229d5451..f2711a23 100644
--- a/docs/examples/translate.py
+++ b/docs/examples/translate.py
@@ -1,5 +1,4 @@
 import logging
-import time
 from pathlib import Path
 
 from docling_core.types.doc import ImageRefMode, TableItem, TextItem
diff --git a/docs/examples/vlm_pipeline_api_model.py b/docs/examples/vlm_pipeline_api_model.py
index 504cecc5..ec29e21c 100644
--- a/docs/examples/vlm_pipeline_api_model.py
+++ b/docs/examples/vlm_pipeline_api_model.py
@@ -7,10 +7,9 @@ from dotenv import load_dotenv
 
 from docling.datamodel.base_models import InputFormat
 from docling.datamodel.pipeline_options import (
-    ApiVlmOptions,
-    ResponseFormat,
     VlmPipelineOptions,
 )
+from docling.datamodel.pipeline_options_vlm_model import ApiVlmOptions, ResponseFormat
 from docling.document_converter import DocumentConverter, PdfFormatOption
 from docling.pipeline.vlm_pipeline import VlmPipeline
 
diff --git a/docs/index.md b/docs/index.md
index abb1b487..ad9ac80e 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -27,7 +27,7 @@ Docling simplifies document processing, parsing diverse formats — including ad
 * 🔒 Local execution capabilities for sensitive data and air-gapped environments
 * 🤖 Plug-and-play [integrations][integrations] incl. LangChain, LlamaIndex, Crew AI & Haystack for agentic AI
 * 🔍 Extensive OCR support for scanned PDFs and images
-* 🥚 Support of Visual Language Models ([SmolDocling](https://huggingface.co/ds4sd/SmolDocling-256M-preview)) 🆕🔥
+* 🥚 Support of several Visual Language Models ([SmolDocling](https://huggingface.co/ds4sd/SmolDocling-256M-preview)) 🔥
 * 💻 Simple and convenient CLI
 
 ### Coming soon
@@ -39,7 +39,7 @@ Docling simplifies document processing, parsing diverse formats — including ad
 ## Get started
 
 <div class="grid">
-  <a href="concepts/" class="card"><b>Concepts</b><br />Learn Docling fundamendals</a>
+  <a href="concepts/" class="card"><b>Concepts</b><br />Learn Docling fundamentals</a>
   <a href="examples/" class="card"><b>Examples</b><br />Try out recipes for various use cases, including conversion, RAG, and more</a>
   <a href="integrations/" class="card"><b>Integrations</b><br />Check out integrations with popular frameworks and tools</a>
   <a href="reference/document_converter/" class="card"><b>Reference</b><br />See more API details</a>
diff --git a/docs/usage/vision_models.md b/docs/usage/vision_models.md
new file mode 100644
index 00000000..ba3fc3eb
--- /dev/null
+++ b/docs/usage/vision_models.md
@@ -0,0 +1,121 @@
+
+The `VlmPipeline` in Docling allows to convert documents end-to-end using a vision-language model.
+
+Docling supports vision-language models which output:
+
+- DocTags (e.g. [SmolDocling](https://huggingface.co/ds4sd/SmolDocling-256M-preview)), the preferred choice
+- Markdown
+- HTML
+
+
+For running Docling using local models with the `VlmPipeline`:
+
+=== "CLI"
+
+    ```bash
+    docling --pipeline vlm FILE
+    ```
+
+=== "Python"
+
+    See also the example [minimal_vlm_pipeline.py](./../examples/minimal_vlm_pipeline.py).
+
+    ```python
+    from docling.datamodel.base_models import InputFormat
+    from docling.document_converter import DocumentConverter, PdfFormatOption
+    from docling.pipeline.vlm_pipeline import VlmPipeline
+
+    converter = DocumentConverter(
+        format_options={
+            InputFormat.PDF: PdfFormatOption(
+                pipeline_cls=VlmPipeline,
+            ),
+        }
+    )
+
+    doc = converter.convert(source="FILE").document
+    ```
+
+## Available local models
+
+By default, the vision-language models are running locally.
+Docling allows to choose between the Hugging Face [Transformers](https://github.com/huggingface/transformers) framweork and the [MLX](https://github.com/Blaizzy/mlx-vlm) (for Apple devices with MPS acceleration) one.
+
+The following table reports the models currently available out-of-the-box.
+
+| Model instance | Model | Framework | Device | Num pages | Inference time (sec) |
+| ---------------|------ | --------- | ------ | --------- | ---------------------|
+| `vlm_model_specs.SMOLDOCLING_TRANSFORMERS` | [ds4sd/SmolDocling-256M-preview](https://huggingface.co/ds4sd/SmolDocling-256M-preview) | `Transformers/AutoModelForVision2Seq` | MPS | 1 |  102.212 |
+| `vlm_model_specs.SMOLDOCLING_MLX` | [ds4sd/SmolDocling-256M-preview-mlx-bf16](https://huggingface.co/ds4sd/SmolDocling-256M-preview-mlx-bf16) | `MLX`| MPS | 1 |    6.15453 |
+| `vlm_model_specs.QWEN25_VL_3B_MLX` | [mlx-community/Qwen2.5-VL-3B-Instruct-bf16](https://huggingface.co/mlx-community/Qwen2.5-VL-3B-Instruct-bf16)  |  `MLX`| MPS | 1 |   23.4951 |
+| `vlm_model_specs.PIXTRAL_12B_MLX` | [mlx-community/pixtral-12b-bf16](https://huggingface.co/mlx-community/pixtral-12b-bf16) |  `MLX` | MPS | 1 |  308.856 |
+| `vlm_model_specs.GEMMA3_12B_MLX` | [mlx-community/gemma-3-12b-it-bf16](https://huggingface.co/mlx-community/gemma-3-12b-it-bf16) |  `MLX` | MPS | 1 |  378.486 |
+| `vlm_model_specs.GRANITE_VISION_TRANSFORMERS` | [ibm-granite/granite-vision-3.2-2b](https://huggingface.co/ibm-granite/granite-vision-3.2-2b) | `Transformers/AutoModelForVision2Seq` | MPS | 1 |  104.75 |
+| `vlm_model_specs.PHI4_TRANSFORMERS` | [microsoft/Phi-4-multimodal-instruct](https://huggingface.co/microsoft/Phi-4-multimodal-instruct) | `Transformers/AutoModelForCasualLM` | CPU | 1 | 1175.67 |
+| `vlm_model_specs.PIXTRAL_12B_TRANSFORMERS` | [mistral-community/pixtral-12b](https://huggingface.co/mistral-community/pixtral-12b) | `Transformers/AutoModelForVision2Seq` | CPU | 1 | 1828.21 |
+
+_Inference time is computed on a Macbook M3 Max using the example page `tests/data/pdf/2305.03393v1-pg9.pdf`. The comparision is done with the example [compare_vlm_models.py](./../examples/compare_vlm_models.py)._
+
+For choosing the model, the code snippet above can be extended as follow
+
+```python
+from docling.datamodel.base_models import InputFormat
+from docling.document_converter import DocumentConverter, PdfFormatOption
+from docling.pipeline.vlm_pipeline import VlmPipeline
+from docling.datamodel.pipeline_options import (
+    VlmPipelineOptions,
+)
+from docling.datamodel import vlm_model_specs
+
+pipeline_options = VlmPipelineOptions(
+    vlm_options=vlm_model_specs.SMOLDOCLING_MLX,  # <-- change the model here
+)
+
+converter = DocumentConverter(
+    format_options={
+        InputFormat.PDF: PdfFormatOption(
+            pipeline_cls=VlmPipeline,
+            pipeline_options=pipeline_options,
+        ),
+    }
+)
+
+doc = converter.convert(source="FILE").document
+```
+
+### Other models
+
+Other models can be configured by directly providing the Hugging Face `repo_id`, the prompt and a few more options.
+
+For example:
+
+```python
+from docling.datamodel.pipeline_options_vlm_model import InlineVlmOptions, InferenceFramework, TransformersModelType
+
+pipeline_options = VlmPipelineOptions(
+    vlm_options=InlineVlmOptions(
+        repo_id="ibm-granite/granite-vision-3.2-2b",
+        prompt="Convert this page to markdown. Do not miss any text and only output the bare markdown!",
+        response_format=ResponseFormat.MARKDOWN,
+        inference_framework=InferenceFramework.TRANSFORMERS,
+        transformers_model_type=TransformersModelType.AUTOMODEL_VISION2SEQ,
+        supported_devices=[
+            AcceleratorDevice.CPU,
+            AcceleratorDevice.CUDA,
+            AcceleratorDevice.MPS,
+        ],
+        scale=2.0,
+        temperature=0.0,
+    )
+)
+```
+
+
+## Remote models
+
+Additionally to local models, the `VlmPipeline` allows to offload the inference to a remote service hosting the models.
+Many remote inference services are provided, the key requirement is to offer an OpenAI-compatible API. This includes vLLM, Ollama, etc.
+
+More examples on how to connect with the remote inference services can be found in the following examples:
+
+- [vlm_pipeline_api_model.py](./../examples/vlm_pipeline_api_model.py)
diff --git a/mkdocs.yml b/mkdocs.yml
index 2e40158e..db8bf27e 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -60,6 +60,7 @@ nav:
       - Usage: usage/index.md
       - Supported formats: usage/supported_formats.md
       - Enrichment features: usage/enrichments.md
+      - Vision models: usage/vision_models.md
     - FAQ:
       - FAQ: faq/index.md
   - Concepts:
@@ -78,6 +79,7 @@ nav:
       - "Multi-format conversion": examples/run_with_formats.py
       - "VLM pipeline with SmolDocling": examples/minimal_vlm_pipeline.py
       - "VLM pipeline with remote model": examples/vlm_pipeline_api_model.py
+      - "VLM comparison": examples/compare_vlm_models.py
       - "Figure export": examples/export_figures.py
       - "Table export": examples/export_tables.py
       - "Multimodal export": examples/export_multimodal.py
diff --git a/pyproject.toml b/pyproject.toml
index c28c6588..6075a9fe 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "docling"
-version = "2.34.0" # DO NOT EDIT, updated automatically
+version = "2.35.0"  # DO NOT EDIT, updated automatically
 description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications."
 license = "MIT"
 keywords = [
diff --git a/tests/data/groundtruth/docling_v2/example_08.html.itxt b/tests/data/groundtruth/docling_v2/example_08.html.itxt
new file mode 100644
index 00000000..505408e3
--- /dev/null
+++ b/tests/data/groundtruth/docling_v2/example_08.html.itxt
@@ -0,0 +1,8 @@
+item-0 at level 0: unspecified: group _root_
+  item-1 at level 1: section: group header-1
+    item-2 at level 2: section_header: Pivot table with with 1 row header
+      item-3 at level 3: table with [6x4]
+    item-4 at level 2: section_header: Pivot table with 2 row headers
+      item-5 at level 3: table with [6x5]
+    item-6 at level 2: section_header: Equivalent pivot table
+      item-7 at level 3: table with [6x5]
\ No newline at end of file
diff --git a/tests/data/groundtruth/docling_v2/example_08.html.json b/tests/data/groundtruth/docling_v2/example_08.html.json
new file mode 100644
index 00000000..085be7ef
--- /dev/null
+++ b/tests/data/groundtruth/docling_v2/example_08.html.json
@@ -0,0 +1,2008 @@
+{
+  "schema_name": "DoclingDocument",
+  "version": "1.3.0",
+  "name": "example_08",
+  "origin": {
+    "mimetype": "text/html",
+    "binary_hash": 12799593797322619937,
+    "filename": "example_08.html"
+  },
+  "furniture": {
+    "self_ref": "#/furniture",
+    "children": [],
+    "content_layer": "furniture",
+    "name": "_root_",
+    "label": "unspecified"
+  },
+  "body": {
+    "self_ref": "#/body",
+    "children": [
+      {
+        "$ref": "#/groups/0"
+      }
+    ],
+    "content_layer": "body",
+    "name": "_root_",
+    "label": "unspecified"
+  },
+  "groups": [
+    {
+      "self_ref": "#/groups/0",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [
+        {
+          "$ref": "#/texts/0"
+        },
+        {
+          "$ref": "#/texts/1"
+        },
+        {
+          "$ref": "#/texts/2"
+        }
+      ],
+      "content_layer": "body",
+      "name": "header-1",
+      "label": "section"
+    }
+  ],
+  "texts": [
+    {
+      "self_ref": "#/texts/0",
+      "parent": {
+        "$ref": "#/groups/0"
+      },
+      "children": [
+        {
+          "$ref": "#/tables/0"
+        }
+      ],
+      "content_layer": "body",
+      "label": "section_header",
+      "prov": [],
+      "orig": "Pivot table with with 1 row header",
+      "text": "Pivot table with with 1 row header",
+      "level": 1
+    },
+    {
+      "self_ref": "#/texts/1",
+      "parent": {
+        "$ref": "#/groups/0"
+      },
+      "children": [
+        {
+          "$ref": "#/tables/1"
+        }
+      ],
+      "content_layer": "body",
+      "label": "section_header",
+      "prov": [],
+      "orig": "Pivot table with 2 row headers",
+      "text": "Pivot table with 2 row headers",
+      "level": 1
+    },
+    {
+      "self_ref": "#/texts/2",
+      "parent": {
+        "$ref": "#/groups/0"
+      },
+      "children": [
+        {
+          "$ref": "#/tables/2"
+        }
+      ],
+      "content_layer": "body",
+      "label": "section_header",
+      "prov": [],
+      "orig": "Equivalent pivot table",
+      "text": "Equivalent pivot table",
+      "level": 1
+    }
+  ],
+  "pictures": [],
+  "tables": [
+    {
+      "self_ref": "#/tables/0",
+      "parent": {
+        "$ref": "#/texts/0"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "table",
+      "prov": [],
+      "captions": [],
+      "references": [],
+      "footnotes": [],
+      "data": {
+        "table_cells": [
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 0,
+            "end_row_offset_idx": 1,
+            "start_col_offset_idx": 0,
+            "end_col_offset_idx": 1,
+            "text": "Year",
+            "column_header": true,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 0,
+            "end_row_offset_idx": 1,
+            "start_col_offset_idx": 1,
+            "end_col_offset_idx": 2,
+            "text": "Month",
+            "column_header": true,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 0,
+            "end_row_offset_idx": 1,
+            "start_col_offset_idx": 2,
+            "end_col_offset_idx": 3,
+            "text": "Revenue",
+            "column_header": true,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 0,
+            "end_row_offset_idx": 1,
+            "start_col_offset_idx": 3,
+            "end_col_offset_idx": 4,
+            "text": "Cost",
+            "column_header": true,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 5,
+            "col_span": 1,
+            "start_row_offset_idx": 1,
+            "end_row_offset_idx": 6,
+            "start_col_offset_idx": 0,
+            "end_col_offset_idx": 1,
+            "text": "2025",
+            "column_header": true,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 1,
+            "end_row_offset_idx": 2,
+            "start_col_offset_idx": 1,
+            "end_col_offset_idx": 2,
+            "text": "January",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 1,
+            "end_row_offset_idx": 2,
+            "start_col_offset_idx": 2,
+            "end_col_offset_idx": 3,
+            "text": "$134",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 1,
+            "end_row_offset_idx": 2,
+            "start_col_offset_idx": 3,
+            "end_col_offset_idx": 4,
+            "text": "$162",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 2,
+            "end_row_offset_idx": 3,
+            "start_col_offset_idx": 1,
+            "end_col_offset_idx": 2,
+            "text": "February",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 2,
+            "end_row_offset_idx": 3,
+            "start_col_offset_idx": 2,
+            "end_col_offset_idx": 3,
+            "text": "$150",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 2,
+            "end_row_offset_idx": 3,
+            "start_col_offset_idx": 3,
+            "end_col_offset_idx": 4,
+            "text": "$155",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 3,
+            "end_row_offset_idx": 4,
+            "start_col_offset_idx": 1,
+            "end_col_offset_idx": 2,
+            "text": "March",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 3,
+            "end_row_offset_idx": 4,
+            "start_col_offset_idx": 2,
+            "end_col_offset_idx": 3,
+            "text": "$160",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 3,
+            "end_row_offset_idx": 4,
+            "start_col_offset_idx": 3,
+            "end_col_offset_idx": 4,
+            "text": "$143",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 4,
+            "end_row_offset_idx": 5,
+            "start_col_offset_idx": 1,
+            "end_col_offset_idx": 2,
+            "text": "April",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 4,
+            "end_row_offset_idx": 5,
+            "start_col_offset_idx": 2,
+            "end_col_offset_idx": 3,
+            "text": "$210",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 4,
+            "end_row_offset_idx": 5,
+            "start_col_offset_idx": 3,
+            "end_col_offset_idx": 4,
+            "text": "$150",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 5,
+            "end_row_offset_idx": 6,
+            "start_col_offset_idx": 1,
+            "end_col_offset_idx": 2,
+            "text": "May",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 5,
+            "end_row_offset_idx": 6,
+            "start_col_offset_idx": 2,
+            "end_col_offset_idx": 3,
+            "text": "$280",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 5,
+            "end_row_offset_idx": 6,
+            "start_col_offset_idx": 3,
+            "end_col_offset_idx": 4,
+            "text": "$120",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          }
+        ],
+        "num_rows": 6,
+        "num_cols": 4,
+        "grid": [
+          [
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 0,
+              "end_row_offset_idx": 1,
+              "start_col_offset_idx": 0,
+              "end_col_offset_idx": 1,
+              "text": "Year",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 0,
+              "end_row_offset_idx": 1,
+              "start_col_offset_idx": 1,
+              "end_col_offset_idx": 2,
+              "text": "Month",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 0,
+              "end_row_offset_idx": 1,
+              "start_col_offset_idx": 2,
+              "end_col_offset_idx": 3,
+              "text": "Revenue",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 0,
+              "end_row_offset_idx": 1,
+              "start_col_offset_idx": 3,
+              "end_col_offset_idx": 4,
+              "text": "Cost",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            }
+          ],
+          [
+            {
+              "row_span": 5,
+              "col_span": 1,
+              "start_row_offset_idx": 1,
+              "end_row_offset_idx": 6,
+              "start_col_offset_idx": 0,
+              "end_col_offset_idx": 1,
+              "text": "2025",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 1,
+              "end_row_offset_idx": 2,
+              "start_col_offset_idx": 1,
+              "end_col_offset_idx": 2,
+              "text": "January",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 1,
+              "end_row_offset_idx": 2,
+              "start_col_offset_idx": 2,
+              "end_col_offset_idx": 3,
+              "text": "$134",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 1,
+              "end_row_offset_idx": 2,
+              "start_col_offset_idx": 3,
+              "end_col_offset_idx": 4,
+              "text": "$162",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            }
+          ],
+          [
+            {
+              "row_span": 5,
+              "col_span": 1,
+              "start_row_offset_idx": 1,
+              "end_row_offset_idx": 6,
+              "start_col_offset_idx": 0,
+              "end_col_offset_idx": 1,
+              "text": "2025",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 2,
+              "end_row_offset_idx": 3,
+              "start_col_offset_idx": 1,
+              "end_col_offset_idx": 2,
+              "text": "February",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 2,
+              "end_row_offset_idx": 3,
+              "start_col_offset_idx": 2,
+              "end_col_offset_idx": 3,
+              "text": "$150",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 2,
+              "end_row_offset_idx": 3,
+              "start_col_offset_idx": 3,
+              "end_col_offset_idx": 4,
+              "text": "$155",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            }
+          ],
+          [
+            {
+              "row_span": 5,
+              "col_span": 1,
+              "start_row_offset_idx": 1,
+              "end_row_offset_idx": 6,
+              "start_col_offset_idx": 0,
+              "end_col_offset_idx": 1,
+              "text": "2025",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 3,
+              "end_row_offset_idx": 4,
+              "start_col_offset_idx": 1,
+              "end_col_offset_idx": 2,
+              "text": "March",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 3,
+              "end_row_offset_idx": 4,
+              "start_col_offset_idx": 2,
+              "end_col_offset_idx": 3,
+              "text": "$160",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 3,
+              "end_row_offset_idx": 4,
+              "start_col_offset_idx": 3,
+              "end_col_offset_idx": 4,
+              "text": "$143",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            }
+          ],
+          [
+            {
+              "row_span": 5,
+              "col_span": 1,
+              "start_row_offset_idx": 1,
+              "end_row_offset_idx": 6,
+              "start_col_offset_idx": 0,
+              "end_col_offset_idx": 1,
+              "text": "2025",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 4,
+              "end_row_offset_idx": 5,
+              "start_col_offset_idx": 1,
+              "end_col_offset_idx": 2,
+              "text": "April",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 4,
+              "end_row_offset_idx": 5,
+              "start_col_offset_idx": 2,
+              "end_col_offset_idx": 3,
+              "text": "$210",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 4,
+              "end_row_offset_idx": 5,
+              "start_col_offset_idx": 3,
+              "end_col_offset_idx": 4,
+              "text": "$150",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            }
+          ],
+          [
+            {
+              "row_span": 5,
+              "col_span": 1,
+              "start_row_offset_idx": 1,
+              "end_row_offset_idx": 6,
+              "start_col_offset_idx": 0,
+              "end_col_offset_idx": 1,
+              "text": "2025",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 5,
+              "end_row_offset_idx": 6,
+              "start_col_offset_idx": 1,
+              "end_col_offset_idx": 2,
+              "text": "May",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 5,
+              "end_row_offset_idx": 6,
+              "start_col_offset_idx": 2,
+              "end_col_offset_idx": 3,
+              "text": "$280",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 5,
+              "end_row_offset_idx": 6,
+              "start_col_offset_idx": 3,
+              "end_col_offset_idx": 4,
+              "text": "$120",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            }
+          ]
+        ]
+      }
+    },
+    {
+      "self_ref": "#/tables/1",
+      "parent": {
+        "$ref": "#/texts/1"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "table",
+      "prov": [],
+      "captions": [],
+      "references": [],
+      "footnotes": [],
+      "data": {
+        "table_cells": [
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 0,
+            "end_row_offset_idx": 1,
+            "start_col_offset_idx": 0,
+            "end_col_offset_idx": 1,
+            "text": "Year",
+            "column_header": true,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 0,
+            "end_row_offset_idx": 1,
+            "start_col_offset_idx": 1,
+            "end_col_offset_idx": 2,
+            "text": "Quarter",
+            "column_header": true,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 0,
+            "end_row_offset_idx": 1,
+            "start_col_offset_idx": 2,
+            "end_col_offset_idx": 3,
+            "text": "Month",
+            "column_header": true,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 0,
+            "end_row_offset_idx": 1,
+            "start_col_offset_idx": 3,
+            "end_col_offset_idx": 4,
+            "text": "Revenue",
+            "column_header": true,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 0,
+            "end_row_offset_idx": 1,
+            "start_col_offset_idx": 4,
+            "end_col_offset_idx": 5,
+            "text": "Cost",
+            "column_header": true,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 6,
+            "col_span": 1,
+            "start_row_offset_idx": 1,
+            "end_row_offset_idx": 7,
+            "start_col_offset_idx": 0,
+            "end_col_offset_idx": 1,
+            "text": "2025",
+            "column_header": true,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 3,
+            "col_span": 1,
+            "start_row_offset_idx": 1,
+            "end_row_offset_idx": 4,
+            "start_col_offset_idx": 1,
+            "end_col_offset_idx": 2,
+            "text": "Q1",
+            "column_header": true,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 1,
+            "end_row_offset_idx": 2,
+            "start_col_offset_idx": 2,
+            "end_col_offset_idx": 3,
+            "text": "January",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 1,
+            "end_row_offset_idx": 2,
+            "start_col_offset_idx": 3,
+            "end_col_offset_idx": 4,
+            "text": "$134",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 1,
+            "end_row_offset_idx": 2,
+            "start_col_offset_idx": 4,
+            "end_col_offset_idx": 5,
+            "text": "$162",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 2,
+            "end_row_offset_idx": 3,
+            "start_col_offset_idx": 2,
+            "end_col_offset_idx": 3,
+            "text": "February",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 2,
+            "end_row_offset_idx": 3,
+            "start_col_offset_idx": 3,
+            "end_col_offset_idx": 4,
+            "text": "$150",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 2,
+            "end_row_offset_idx": 3,
+            "start_col_offset_idx": 4,
+            "end_col_offset_idx": 5,
+            "text": "$155",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 3,
+            "end_row_offset_idx": 4,
+            "start_col_offset_idx": 2,
+            "end_col_offset_idx": 3,
+            "text": "March",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 3,
+            "end_row_offset_idx": 4,
+            "start_col_offset_idx": 3,
+            "end_col_offset_idx": 4,
+            "text": "$160",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 3,
+            "end_row_offset_idx": 4,
+            "start_col_offset_idx": 4,
+            "end_col_offset_idx": 5,
+            "text": "$143",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 2,
+            "col_span": 1,
+            "start_row_offset_idx": 4,
+            "end_row_offset_idx": 6,
+            "start_col_offset_idx": 1,
+            "end_col_offset_idx": 2,
+            "text": "Q2",
+            "column_header": true,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 4,
+            "end_row_offset_idx": 5,
+            "start_col_offset_idx": 2,
+            "end_col_offset_idx": 3,
+            "text": "April",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 4,
+            "end_row_offset_idx": 5,
+            "start_col_offset_idx": 3,
+            "end_col_offset_idx": 4,
+            "text": "$210",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 4,
+            "end_row_offset_idx": 5,
+            "start_col_offset_idx": 4,
+            "end_col_offset_idx": 5,
+            "text": "$150",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 5,
+            "end_row_offset_idx": 6,
+            "start_col_offset_idx": 2,
+            "end_col_offset_idx": 3,
+            "text": "May",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 5,
+            "end_row_offset_idx": 6,
+            "start_col_offset_idx": 3,
+            "end_col_offset_idx": 4,
+            "text": "$280",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 5,
+            "end_row_offset_idx": 6,
+            "start_col_offset_idx": 4,
+            "end_col_offset_idx": 5,
+            "text": "$120",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          }
+        ],
+        "num_rows": 6,
+        "num_cols": 5,
+        "grid": [
+          [
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 0,
+              "end_row_offset_idx": 1,
+              "start_col_offset_idx": 0,
+              "end_col_offset_idx": 1,
+              "text": "Year",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 0,
+              "end_row_offset_idx": 1,
+              "start_col_offset_idx": 1,
+              "end_col_offset_idx": 2,
+              "text": "Quarter",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 0,
+              "end_row_offset_idx": 1,
+              "start_col_offset_idx": 2,
+              "end_col_offset_idx": 3,
+              "text": "Month",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 0,
+              "end_row_offset_idx": 1,
+              "start_col_offset_idx": 3,
+              "end_col_offset_idx": 4,
+              "text": "Revenue",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 0,
+              "end_row_offset_idx": 1,
+              "start_col_offset_idx": 4,
+              "end_col_offset_idx": 5,
+              "text": "Cost",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            }
+          ],
+          [
+            {
+              "row_span": 6,
+              "col_span": 1,
+              "start_row_offset_idx": 1,
+              "end_row_offset_idx": 7,
+              "start_col_offset_idx": 0,
+              "end_col_offset_idx": 1,
+              "text": "2025",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 3,
+              "col_span": 1,
+              "start_row_offset_idx": 1,
+              "end_row_offset_idx": 4,
+              "start_col_offset_idx": 1,
+              "end_col_offset_idx": 2,
+              "text": "Q1",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 1,
+              "end_row_offset_idx": 2,
+              "start_col_offset_idx": 2,
+              "end_col_offset_idx": 3,
+              "text": "January",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 1,
+              "end_row_offset_idx": 2,
+              "start_col_offset_idx": 3,
+              "end_col_offset_idx": 4,
+              "text": "$134",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 1,
+              "end_row_offset_idx": 2,
+              "start_col_offset_idx": 4,
+              "end_col_offset_idx": 5,
+              "text": "$162",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            }
+          ],
+          [
+            {
+              "row_span": 6,
+              "col_span": 1,
+              "start_row_offset_idx": 1,
+              "end_row_offset_idx": 7,
+              "start_col_offset_idx": 0,
+              "end_col_offset_idx": 1,
+              "text": "2025",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 3,
+              "col_span": 1,
+              "start_row_offset_idx": 1,
+              "end_row_offset_idx": 4,
+              "start_col_offset_idx": 1,
+              "end_col_offset_idx": 2,
+              "text": "Q1",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 2,
+              "end_row_offset_idx": 3,
+              "start_col_offset_idx": 2,
+              "end_col_offset_idx": 3,
+              "text": "February",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 2,
+              "end_row_offset_idx": 3,
+              "start_col_offset_idx": 3,
+              "end_col_offset_idx": 4,
+              "text": "$150",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 2,
+              "end_row_offset_idx": 3,
+              "start_col_offset_idx": 4,
+              "end_col_offset_idx": 5,
+              "text": "$155",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            }
+          ],
+          [
+            {
+              "row_span": 6,
+              "col_span": 1,
+              "start_row_offset_idx": 1,
+              "end_row_offset_idx": 7,
+              "start_col_offset_idx": 0,
+              "end_col_offset_idx": 1,
+              "text": "2025",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 3,
+              "col_span": 1,
+              "start_row_offset_idx": 1,
+              "end_row_offset_idx": 4,
+              "start_col_offset_idx": 1,
+              "end_col_offset_idx": 2,
+              "text": "Q1",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 3,
+              "end_row_offset_idx": 4,
+              "start_col_offset_idx": 2,
+              "end_col_offset_idx": 3,
+              "text": "March",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 3,
+              "end_row_offset_idx": 4,
+              "start_col_offset_idx": 3,
+              "end_col_offset_idx": 4,
+              "text": "$160",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 3,
+              "end_row_offset_idx": 4,
+              "start_col_offset_idx": 4,
+              "end_col_offset_idx": 5,
+              "text": "$143",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            }
+          ],
+          [
+            {
+              "row_span": 6,
+              "col_span": 1,
+              "start_row_offset_idx": 1,
+              "end_row_offset_idx": 7,
+              "start_col_offset_idx": 0,
+              "end_col_offset_idx": 1,
+              "text": "2025",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 2,
+              "col_span": 1,
+              "start_row_offset_idx": 4,
+              "end_row_offset_idx": 6,
+              "start_col_offset_idx": 1,
+              "end_col_offset_idx": 2,
+              "text": "Q2",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 4,
+              "end_row_offset_idx": 5,
+              "start_col_offset_idx": 2,
+              "end_col_offset_idx": 3,
+              "text": "April",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 4,
+              "end_row_offset_idx": 5,
+              "start_col_offset_idx": 3,
+              "end_col_offset_idx": 4,
+              "text": "$210",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 4,
+              "end_row_offset_idx": 5,
+              "start_col_offset_idx": 4,
+              "end_col_offset_idx": 5,
+              "text": "$150",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            }
+          ],
+          [
+            {
+              "row_span": 6,
+              "col_span": 1,
+              "start_row_offset_idx": 1,
+              "end_row_offset_idx": 7,
+              "start_col_offset_idx": 0,
+              "end_col_offset_idx": 1,
+              "text": "2025",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 2,
+              "col_span": 1,
+              "start_row_offset_idx": 4,
+              "end_row_offset_idx": 6,
+              "start_col_offset_idx": 1,
+              "end_col_offset_idx": 2,
+              "text": "Q2",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 5,
+              "end_row_offset_idx": 6,
+              "start_col_offset_idx": 2,
+              "end_col_offset_idx": 3,
+              "text": "May",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 5,
+              "end_row_offset_idx": 6,
+              "start_col_offset_idx": 3,
+              "end_col_offset_idx": 4,
+              "text": "$280",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 5,
+              "end_row_offset_idx": 6,
+              "start_col_offset_idx": 4,
+              "end_col_offset_idx": 5,
+              "text": "$120",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            }
+          ]
+        ]
+      }
+    },
+    {
+      "self_ref": "#/tables/2",
+      "parent": {
+        "$ref": "#/texts/2"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "table",
+      "prov": [],
+      "captions": [],
+      "references": [],
+      "footnotes": [],
+      "data": {
+        "table_cells": [
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 0,
+            "end_row_offset_idx": 1,
+            "start_col_offset_idx": 0,
+            "end_col_offset_idx": 1,
+            "text": "Year",
+            "column_header": true,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 0,
+            "end_row_offset_idx": 1,
+            "start_col_offset_idx": 1,
+            "end_col_offset_idx": 2,
+            "text": "Quarter",
+            "column_header": true,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 0,
+            "end_row_offset_idx": 1,
+            "start_col_offset_idx": 2,
+            "end_col_offset_idx": 3,
+            "text": "Month",
+            "column_header": true,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 0,
+            "end_row_offset_idx": 1,
+            "start_col_offset_idx": 3,
+            "end_col_offset_idx": 4,
+            "text": "Revenue",
+            "column_header": true,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 0,
+            "end_row_offset_idx": 1,
+            "start_col_offset_idx": 4,
+            "end_col_offset_idx": 5,
+            "text": "Cost",
+            "column_header": true,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 7,
+            "col_span": 1,
+            "start_row_offset_idx": 1,
+            "end_row_offset_idx": 8,
+            "start_col_offset_idx": 0,
+            "end_col_offset_idx": 1,
+            "text": "2025",
+            "column_header": true,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 3,
+            "col_span": 1,
+            "start_row_offset_idx": 1,
+            "end_row_offset_idx": 4,
+            "start_col_offset_idx": 1,
+            "end_col_offset_idx": 2,
+            "text": "Q1",
+            "column_header": true,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 1,
+            "end_row_offset_idx": 2,
+            "start_col_offset_idx": 2,
+            "end_col_offset_idx": 3,
+            "text": "January",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 1,
+            "end_row_offset_idx": 2,
+            "start_col_offset_idx": 3,
+            "end_col_offset_idx": 4,
+            "text": "$134",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 1,
+            "end_row_offset_idx": 2,
+            "start_col_offset_idx": 4,
+            "end_col_offset_idx": 5,
+            "text": "$162",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 2,
+            "end_row_offset_idx": 3,
+            "start_col_offset_idx": 2,
+            "end_col_offset_idx": 3,
+            "text": "February",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 2,
+            "end_row_offset_idx": 3,
+            "start_col_offset_idx": 3,
+            "end_col_offset_idx": 4,
+            "text": "$150",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 2,
+            "end_row_offset_idx": 3,
+            "start_col_offset_idx": 4,
+            "end_col_offset_idx": 5,
+            "text": "$155",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 3,
+            "end_row_offset_idx": 4,
+            "start_col_offset_idx": 2,
+            "end_col_offset_idx": 3,
+            "text": "March",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 3,
+            "end_row_offset_idx": 4,
+            "start_col_offset_idx": 3,
+            "end_col_offset_idx": 4,
+            "text": "$160",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 3,
+            "end_row_offset_idx": 4,
+            "start_col_offset_idx": 4,
+            "end_col_offset_idx": 5,
+            "text": "$143",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 2,
+            "col_span": 1,
+            "start_row_offset_idx": 4,
+            "end_row_offset_idx": 6,
+            "start_col_offset_idx": 1,
+            "end_col_offset_idx": 2,
+            "text": "Q2",
+            "column_header": true,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 4,
+            "end_row_offset_idx": 5,
+            "start_col_offset_idx": 2,
+            "end_col_offset_idx": 3,
+            "text": "April",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 4,
+            "end_row_offset_idx": 5,
+            "start_col_offset_idx": 3,
+            "end_col_offset_idx": 4,
+            "text": "$210",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 4,
+            "end_row_offset_idx": 5,
+            "start_col_offset_idx": 4,
+            "end_col_offset_idx": 5,
+            "text": "$150",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 5,
+            "end_row_offset_idx": 6,
+            "start_col_offset_idx": 2,
+            "end_col_offset_idx": 3,
+            "text": "May",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 5,
+            "end_row_offset_idx": 6,
+            "start_col_offset_idx": 3,
+            "end_col_offset_idx": 4,
+            "text": "$280",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          },
+          {
+            "row_span": 1,
+            "col_span": 1,
+            "start_row_offset_idx": 5,
+            "end_row_offset_idx": 6,
+            "start_col_offset_idx": 4,
+            "end_col_offset_idx": 5,
+            "text": "$120",
+            "column_header": false,
+            "row_header": false,
+            "row_section": false
+          }
+        ],
+        "num_rows": 6,
+        "num_cols": 5,
+        "grid": [
+          [
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 0,
+              "end_row_offset_idx": 1,
+              "start_col_offset_idx": 0,
+              "end_col_offset_idx": 1,
+              "text": "Year",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 0,
+              "end_row_offset_idx": 1,
+              "start_col_offset_idx": 1,
+              "end_col_offset_idx": 2,
+              "text": "Quarter",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 0,
+              "end_row_offset_idx": 1,
+              "start_col_offset_idx": 2,
+              "end_col_offset_idx": 3,
+              "text": "Month",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 0,
+              "end_row_offset_idx": 1,
+              "start_col_offset_idx": 3,
+              "end_col_offset_idx": 4,
+              "text": "Revenue",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 0,
+              "end_row_offset_idx": 1,
+              "start_col_offset_idx": 4,
+              "end_col_offset_idx": 5,
+              "text": "Cost",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            }
+          ],
+          [
+            {
+              "row_span": 7,
+              "col_span": 1,
+              "start_row_offset_idx": 1,
+              "end_row_offset_idx": 8,
+              "start_col_offset_idx": 0,
+              "end_col_offset_idx": 1,
+              "text": "2025",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 3,
+              "col_span": 1,
+              "start_row_offset_idx": 1,
+              "end_row_offset_idx": 4,
+              "start_col_offset_idx": 1,
+              "end_col_offset_idx": 2,
+              "text": "Q1",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 1,
+              "end_row_offset_idx": 2,
+              "start_col_offset_idx": 2,
+              "end_col_offset_idx": 3,
+              "text": "January",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 1,
+              "end_row_offset_idx": 2,
+              "start_col_offset_idx": 3,
+              "end_col_offset_idx": 4,
+              "text": "$134",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 1,
+              "end_row_offset_idx": 2,
+              "start_col_offset_idx": 4,
+              "end_col_offset_idx": 5,
+              "text": "$162",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            }
+          ],
+          [
+            {
+              "row_span": 7,
+              "col_span": 1,
+              "start_row_offset_idx": 1,
+              "end_row_offset_idx": 8,
+              "start_col_offset_idx": 0,
+              "end_col_offset_idx": 1,
+              "text": "2025",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 3,
+              "col_span": 1,
+              "start_row_offset_idx": 1,
+              "end_row_offset_idx": 4,
+              "start_col_offset_idx": 1,
+              "end_col_offset_idx": 2,
+              "text": "Q1",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 2,
+              "end_row_offset_idx": 3,
+              "start_col_offset_idx": 2,
+              "end_col_offset_idx": 3,
+              "text": "February",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 2,
+              "end_row_offset_idx": 3,
+              "start_col_offset_idx": 3,
+              "end_col_offset_idx": 4,
+              "text": "$150",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 2,
+              "end_row_offset_idx": 3,
+              "start_col_offset_idx": 4,
+              "end_col_offset_idx": 5,
+              "text": "$155",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            }
+          ],
+          [
+            {
+              "row_span": 7,
+              "col_span": 1,
+              "start_row_offset_idx": 1,
+              "end_row_offset_idx": 8,
+              "start_col_offset_idx": 0,
+              "end_col_offset_idx": 1,
+              "text": "2025",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 3,
+              "col_span": 1,
+              "start_row_offset_idx": 1,
+              "end_row_offset_idx": 4,
+              "start_col_offset_idx": 1,
+              "end_col_offset_idx": 2,
+              "text": "Q1",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 3,
+              "end_row_offset_idx": 4,
+              "start_col_offset_idx": 2,
+              "end_col_offset_idx": 3,
+              "text": "March",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 3,
+              "end_row_offset_idx": 4,
+              "start_col_offset_idx": 3,
+              "end_col_offset_idx": 4,
+              "text": "$160",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 3,
+              "end_row_offset_idx": 4,
+              "start_col_offset_idx": 4,
+              "end_col_offset_idx": 5,
+              "text": "$143",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            }
+          ],
+          [
+            {
+              "row_span": 7,
+              "col_span": 1,
+              "start_row_offset_idx": 1,
+              "end_row_offset_idx": 8,
+              "start_col_offset_idx": 0,
+              "end_col_offset_idx": 1,
+              "text": "2025",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 2,
+              "col_span": 1,
+              "start_row_offset_idx": 4,
+              "end_row_offset_idx": 6,
+              "start_col_offset_idx": 1,
+              "end_col_offset_idx": 2,
+              "text": "Q2",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 4,
+              "end_row_offset_idx": 5,
+              "start_col_offset_idx": 2,
+              "end_col_offset_idx": 3,
+              "text": "April",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 4,
+              "end_row_offset_idx": 5,
+              "start_col_offset_idx": 3,
+              "end_col_offset_idx": 4,
+              "text": "$210",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 4,
+              "end_row_offset_idx": 5,
+              "start_col_offset_idx": 4,
+              "end_col_offset_idx": 5,
+              "text": "$150",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            }
+          ],
+          [
+            {
+              "row_span": 7,
+              "col_span": 1,
+              "start_row_offset_idx": 1,
+              "end_row_offset_idx": 8,
+              "start_col_offset_idx": 0,
+              "end_col_offset_idx": 1,
+              "text": "2025",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 2,
+              "col_span": 1,
+              "start_row_offset_idx": 4,
+              "end_row_offset_idx": 6,
+              "start_col_offset_idx": 1,
+              "end_col_offset_idx": 2,
+              "text": "Q2",
+              "column_header": true,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 5,
+              "end_row_offset_idx": 6,
+              "start_col_offset_idx": 2,
+              "end_col_offset_idx": 3,
+              "text": "May",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 5,
+              "end_row_offset_idx": 6,
+              "start_col_offset_idx": 3,
+              "end_col_offset_idx": 4,
+              "text": "$280",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            },
+            {
+              "row_span": 1,
+              "col_span": 1,
+              "start_row_offset_idx": 5,
+              "end_row_offset_idx": 6,
+              "start_col_offset_idx": 4,
+              "end_col_offset_idx": 5,
+              "text": "$120",
+              "column_header": false,
+              "row_header": false,
+              "row_section": false
+            }
+          ]
+        ]
+      }
+    }
+  ],
+  "key_value_items": [],
+  "form_items": [],
+  "pages": {}
+}
\ No newline at end of file
diff --git a/tests/data/groundtruth/docling_v2/example_08.html.md b/tests/data/groundtruth/docling_v2/example_08.html.md
new file mode 100644
index 00000000..462a8101
--- /dev/null
+++ b/tests/data/groundtruth/docling_v2/example_08.html.md
@@ -0,0 +1,29 @@
+## Pivot table with with 1 row header
+
+|   Year | Month    | Revenue   | Cost   |
+|--------|----------|-----------|--------|
+|   2025 | January  | $134      | $162   |
+|   2025 | February | $150      | $155   |
+|   2025 | March    | $160      | $143   |
+|   2025 | April    | $210      | $150   |
+|   2025 | May      | $280      | $120   |
+
+## Pivot table with 2 row headers
+
+|   Year | Quarter   | Month    | Revenue   | Cost   |
+|--------|-----------|----------|-----------|--------|
+|   2025 | Q1        | January  | $134      | $162   |
+|   2025 | Q1        | February | $150      | $155   |
+|   2025 | Q1        | March    | $160      | $143   |
+|   2025 | Q2        | April    | $210      | $150   |
+|   2025 | Q2        | May      | $280      | $120   |
+
+## Equivalent pivot table
+
+|   Year | Quarter   | Month    | Revenue   | Cost   |
+|--------|-----------|----------|-----------|--------|
+|   2025 | Q1        | January  | $134      | $162   |
+|   2025 | Q1        | February | $150      | $155   |
+|   2025 | Q1        | March    | $160      | $143   |
+|   2025 | Q2        | April    | $210      | $150   |
+|   2025 | Q2        | May      | $280      | $120   |
\ No newline at end of file
diff --git a/tests/data/groundtruth/docling_v2/textbox.docx.itxt b/tests/data/groundtruth/docling_v2/textbox.docx.itxt
new file mode 100644
index 00000000..2933724f
--- /dev/null
+++ b/tests/data/groundtruth/docling_v2/textbox.docx.itxt
@@ -0,0 +1,94 @@
+item-0 at level 0: unspecified: group _root_
+  item-1 at level 1: paragraph: Chiayi County Shuishang Township ... mentary School Affiliated Kindergarten
+  item-2 at level 1: paragraph: Infectious Disease Reporting Pro ... r the 113th Academic Year Kindergarten
+  item-3 at level 1: paragraph: 
+  item-4 at level 1: section: group textbox
+    item-5 at level 2: paragraph: Student falls ill
+    item-6 at level 2: paragraph: 
+    item-7 at level 2: paragraph: 
+    item-8 at level 2: list: group list
+      item-9 at level 3: list_item: Suggested Reportable Symptoms:
+＊ ... sh
+＊ Blisters
+＊ Headache
+＊ Sore throat
+  item-10 at level 1: list_item: 
+  item-11 at level 1: paragraph: 
+  item-12 at level 1: paragraph: 
+  item-13 at level 1: section: group textbox
+    item-14 at level 2: paragraph: If a caregiver suspects that wit ... the same suggested reportable symptoms
+  item-15 at level 1: paragraph: 
+  item-16 at level 1: paragraph: 
+  item-17 at level 1: paragraph: 
+  item-18 at level 1: paragraph: 
+  item-19 at level 1: section: group textbox
+    item-20 at level 2: paragraph: Yes
+  item-21 at level 1: paragraph: 
+  item-22 at level 1: paragraph: 
+  item-23 at level 1: section: group textbox
+    item-24 at level 2: paragraph:   A report must be submitted wi ... saster Prevention Information Network.
+    item-25 at level 2: paragraph:   A report must also be submitt ... d Infectious Disease Reporting System.
+    item-26 at level 2: paragraph: 
+    item-27 at level 2: paragraph: 
+  item-28 at level 1: paragraph: 
+  item-29 at level 1: paragraph: 
+  item-30 at level 1: paragraph: 
+  item-31 at level 1: paragraph: 
+  item-32 at level 1: paragraph: 
+  item-33 at level 1: paragraph: 
+  item-34 at level 1: section: group textbox
+    item-35 at level 2: paragraph: Health Bureau:
+    item-36 at level 2: paragraph: Upon receiving a report from the ... rt to the Centers for Disease Control.
+    item-37 at level 2: list: group list
+      item-38 at level 3: list_item: If necessary, provide health edu ... vidual to undergo specimen collection.
+      item-39 at level 3: list_item: Implement appropriate epidemic p ...  the Communicable Disease Control Act.
+    item-40 at level 2: paragraph: 
+    item-41 at level 2: paragraph: 
+  item-42 at level 1: list: group list
+    item-43 at level 2: list_item: 
+  item-44 at level 1: paragraph: 
+  item-45 at level 1: section: group textbox
+    item-46 at level 2: paragraph: Department of Education:
+Collabo ... vention measures at all school levels.
+  item-47 at level 1: paragraph: 
+  item-48 at level 1: paragraph: 
+  item-49 at level 1: paragraph: 
+  item-50 at level 1: paragraph: 
+  item-51 at level 1: paragraph: 
+  item-52 at level 1: paragraph: 
+  item-53 at level 1: paragraph: 
+  item-54 at level 1: section: group textbox
+    item-55 at level 2: inline: group group
+      item-56 at level 3: paragraph: The Health Bureau will handle
+      item-57 at level 3: paragraph: reporting and specimen collection
+      item-58 at level 3: paragraph: .
+    item-59 at level 2: paragraph: 
+    item-60 at level 2: paragraph: 
+  item-61 at level 1: paragraph: 
+  item-62 at level 1: paragraph: 
+  item-63 at level 1: paragraph: 
+  item-64 at level 1: section: group textbox
+    item-65 at level 2: paragraph: Whether the epidemic has eased.
+    item-66 at level 2: paragraph: 
+    item-67 at level 2: paragraph: 
+  item-68 at level 1: paragraph: 
+  item-69 at level 1: section: group textbox
+    item-70 at level 2: paragraph: Whether the test results are pos ... legally designated infectious disease.
+    item-71 at level 2: paragraph: No
+  item-72 at level 1: paragraph: 
+  item-73 at level 1: paragraph: 
+  item-74 at level 1: section: group textbox
+  item-75 at level 1: paragraph: 
+  item-76 at level 1: section: group textbox
+  item-77 at level 1: paragraph: 
+  item-78 at level 1: paragraph: 
+  item-79 at level 1: section: group textbox
+    item-80 at level 2: paragraph: Case closed.
+    item-81 at level 2: paragraph: 
+    item-82 at level 2: paragraph: 
+    item-83 at level 2: paragraph: The Health Bureau will carry out ... ters for Disease Control if necessary.
+  item-84 at level 1: paragraph: 
+  item-85 at level 1: section: group textbox
+  item-86 at level 1: paragraph: 
+  item-87 at level 1: paragraph: 
+  item-88 at level 1: paragraph: 
\ No newline at end of file
diff --git a/tests/data/groundtruth/docling_v2/textbox.docx.json b/tests/data/groundtruth/docling_v2/textbox.docx.json
new file mode 100644
index 00000000..c7985b24
--- /dev/null
+++ b/tests/data/groundtruth/docling_v2/textbox.docx.json
@@ -0,0 +1,1470 @@
+{
+  "schema_name": "DoclingDocument",
+  "version": "1.3.0",
+  "name": "textbox",
+  "origin": {
+    "mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+    "binary_hash": 830302052279341882,
+    "filename": "textbox.docx"
+  },
+  "furniture": {
+    "self_ref": "#/furniture",
+    "children": [],
+    "content_layer": "furniture",
+    "name": "_root_",
+    "label": "unspecified"
+  },
+  "body": {
+    "self_ref": "#/body",
+    "children": [
+      {
+        "$ref": "#/texts/0"
+      },
+      {
+        "$ref": "#/texts/1"
+      },
+      {
+        "$ref": "#/texts/2"
+      },
+      {
+        "$ref": "#/groups/0"
+      },
+      {
+        "$ref": "#/texts/7"
+      },
+      {
+        "$ref": "#/texts/8"
+      },
+      {
+        "$ref": "#/texts/9"
+      },
+      {
+        "$ref": "#/groups/2"
+      },
+      {
+        "$ref": "#/texts/11"
+      },
+      {
+        "$ref": "#/texts/12"
+      },
+      {
+        "$ref": "#/texts/13"
+      },
+      {
+        "$ref": "#/texts/14"
+      },
+      {
+        "$ref": "#/groups/3"
+      },
+      {
+        "$ref": "#/texts/16"
+      },
+      {
+        "$ref": "#/texts/17"
+      },
+      {
+        "$ref": "#/groups/4"
+      },
+      {
+        "$ref": "#/texts/22"
+      },
+      {
+        "$ref": "#/texts/23"
+      },
+      {
+        "$ref": "#/texts/24"
+      },
+      {
+        "$ref": "#/texts/25"
+      },
+      {
+        "$ref": "#/texts/26"
+      },
+      {
+        "$ref": "#/texts/27"
+      },
+      {
+        "$ref": "#/groups/5"
+      },
+      {
+        "$ref": "#/groups/7"
+      },
+      {
+        "$ref": "#/texts/35"
+      },
+      {
+        "$ref": "#/groups/8"
+      },
+      {
+        "$ref": "#/texts/37"
+      },
+      {
+        "$ref": "#/texts/38"
+      },
+      {
+        "$ref": "#/texts/39"
+      },
+      {
+        "$ref": "#/texts/40"
+      },
+      {
+        "$ref": "#/texts/41"
+      },
+      {
+        "$ref": "#/texts/42"
+      },
+      {
+        "$ref": "#/texts/43"
+      },
+      {
+        "$ref": "#/groups/9"
+      },
+      {
+        "$ref": "#/texts/49"
+      },
+      {
+        "$ref": "#/texts/50"
+      },
+      {
+        "$ref": "#/texts/51"
+      },
+      {
+        "$ref": "#/groups/11"
+      },
+      {
+        "$ref": "#/texts/55"
+      },
+      {
+        "$ref": "#/groups/12"
+      },
+      {
+        "$ref": "#/texts/58"
+      },
+      {
+        "$ref": "#/texts/59"
+      },
+      {
+        "$ref": "#/groups/13"
+      },
+      {
+        "$ref": "#/texts/60"
+      },
+      {
+        "$ref": "#/groups/14"
+      },
+      {
+        "$ref": "#/texts/61"
+      },
+      {
+        "$ref": "#/texts/62"
+      },
+      {
+        "$ref": "#/groups/15"
+      },
+      {
+        "$ref": "#/texts/67"
+      },
+      {
+        "$ref": "#/groups/16"
+      },
+      {
+        "$ref": "#/texts/68"
+      },
+      {
+        "$ref": "#/texts/69"
+      },
+      {
+        "$ref": "#/texts/70"
+      }
+    ],
+    "content_layer": "body",
+    "name": "_root_",
+    "label": "unspecified"
+  },
+  "groups": [
+    {
+      "self_ref": "#/groups/0",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [
+        {
+          "$ref": "#/texts/3"
+        },
+        {
+          "$ref": "#/texts/4"
+        },
+        {
+          "$ref": "#/texts/5"
+        },
+        {
+          "$ref": "#/groups/1"
+        }
+      ],
+      "content_layer": "body",
+      "name": "textbox",
+      "label": "section"
+    },
+    {
+      "self_ref": "#/groups/1",
+      "parent": {
+        "$ref": "#/groups/0"
+      },
+      "children": [
+        {
+          "$ref": "#/texts/6"
+        }
+      ],
+      "content_layer": "body",
+      "name": "list",
+      "label": "list"
+    },
+    {
+      "self_ref": "#/groups/2",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [
+        {
+          "$ref": "#/texts/10"
+        }
+      ],
+      "content_layer": "body",
+      "name": "textbox",
+      "label": "section"
+    },
+    {
+      "self_ref": "#/groups/3",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [
+        {
+          "$ref": "#/texts/15"
+        }
+      ],
+      "content_layer": "body",
+      "name": "textbox",
+      "label": "section"
+    },
+    {
+      "self_ref": "#/groups/4",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [
+        {
+          "$ref": "#/texts/18"
+        },
+        {
+          "$ref": "#/texts/19"
+        },
+        {
+          "$ref": "#/texts/20"
+        },
+        {
+          "$ref": "#/texts/21"
+        }
+      ],
+      "content_layer": "body",
+      "name": "textbox",
+      "label": "section"
+    },
+    {
+      "self_ref": "#/groups/5",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [
+        {
+          "$ref": "#/texts/28"
+        },
+        {
+          "$ref": "#/texts/29"
+        },
+        {
+          "$ref": "#/groups/6"
+        },
+        {
+          "$ref": "#/texts/32"
+        },
+        {
+          "$ref": "#/texts/33"
+        }
+      ],
+      "content_layer": "body",
+      "name": "textbox",
+      "label": "section"
+    },
+    {
+      "self_ref": "#/groups/6",
+      "parent": {
+        "$ref": "#/groups/5"
+      },
+      "children": [
+        {
+          "$ref": "#/texts/30"
+        },
+        {
+          "$ref": "#/texts/31"
+        }
+      ],
+      "content_layer": "body",
+      "name": "list",
+      "label": "list"
+    },
+    {
+      "self_ref": "#/groups/7",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [
+        {
+          "$ref": "#/texts/34"
+        }
+      ],
+      "content_layer": "body",
+      "name": "list",
+      "label": "list"
+    },
+    {
+      "self_ref": "#/groups/8",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [
+        {
+          "$ref": "#/texts/36"
+        }
+      ],
+      "content_layer": "body",
+      "name": "textbox",
+      "label": "section"
+    },
+    {
+      "self_ref": "#/groups/9",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [
+        {
+          "$ref": "#/groups/10"
+        },
+        {
+          "$ref": "#/texts/47"
+        },
+        {
+          "$ref": "#/texts/48"
+        }
+      ],
+      "content_layer": "body",
+      "name": "textbox",
+      "label": "section"
+    },
+    {
+      "self_ref": "#/groups/10",
+      "parent": {
+        "$ref": "#/groups/9"
+      },
+      "children": [
+        {
+          "$ref": "#/texts/44"
+        },
+        {
+          "$ref": "#/texts/45"
+        },
+        {
+          "$ref": "#/texts/46"
+        }
+      ],
+      "content_layer": "body",
+      "name": "group",
+      "label": "inline"
+    },
+    {
+      "self_ref": "#/groups/11",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [
+        {
+          "$ref": "#/texts/52"
+        },
+        {
+          "$ref": "#/texts/53"
+        },
+        {
+          "$ref": "#/texts/54"
+        }
+      ],
+      "content_layer": "body",
+      "name": "textbox",
+      "label": "section"
+    },
+    {
+      "self_ref": "#/groups/12",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [
+        {
+          "$ref": "#/texts/56"
+        },
+        {
+          "$ref": "#/texts/57"
+        }
+      ],
+      "content_layer": "body",
+      "name": "textbox",
+      "label": "section"
+    },
+    {
+      "self_ref": "#/groups/13",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "name": "textbox",
+      "label": "section"
+    },
+    {
+      "self_ref": "#/groups/14",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "name": "textbox",
+      "label": "section"
+    },
+    {
+      "self_ref": "#/groups/15",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [
+        {
+          "$ref": "#/texts/63"
+        },
+        {
+          "$ref": "#/texts/64"
+        },
+        {
+          "$ref": "#/texts/65"
+        },
+        {
+          "$ref": "#/texts/66"
+        }
+      ],
+      "content_layer": "body",
+      "name": "textbox",
+      "label": "section"
+    },
+    {
+      "self_ref": "#/groups/16",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "name": "textbox",
+      "label": "section"
+    }
+  ],
+  "texts": [
+    {
+      "self_ref": "#/texts/0",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "Chiayi County Shuishang Township Nanjing Elementary School Affiliated Kindergarten",
+      "text": "Chiayi County Shuishang Township Nanjing Elementary School Affiliated Kindergarten",
+      "formatting": {
+        "bold": true,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false
+      }
+    },
+    {
+      "self_ref": "#/texts/1",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "Infectious Disease Reporting Procedure for the 113th Academic Year Kindergarten",
+      "text": "Infectious Disease Reporting Procedure for the 113th Academic Year Kindergarten",
+      "formatting": {
+        "bold": true,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false
+      }
+    },
+    {
+      "self_ref": "#/texts/2",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/3",
+      "parent": {
+        "$ref": "#/groups/0"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "Student falls ill",
+      "text": "Student falls ill",
+      "formatting": {
+        "bold": true,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false
+      }
+    },
+    {
+      "self_ref": "#/texts/4",
+      "parent": {
+        "$ref": "#/groups/0"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/5",
+      "parent": {
+        "$ref": "#/groups/0"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/6",
+      "parent": {
+        "$ref": "#/groups/1"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "list_item",
+      "prov": [],
+      "orig": "Suggested Reportable Symptoms:\n＊ Fever\n＊ Cough\n＊ Diarrhea\n＊ Vomiting\n＊ Rash\n＊ Blisters\n＊ Headache\n＊ Sore throat",
+      "text": "Suggested Reportable Symptoms:\n＊ Fever\n＊ Cough\n＊ Diarrhea\n＊ Vomiting\n＊ Rash\n＊ Blisters\n＊ Headache\n＊ Sore throat",
+      "formatting": {
+        "bold": false,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false
+      },
+      "enumerated": false,
+      "marker": "-"
+    },
+    {
+      "self_ref": "#/texts/7",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "list_item",
+      "prov": [],
+      "orig": "",
+      "text": "",
+      "enumerated": false,
+      "marker": "-"
+    },
+    {
+      "self_ref": "#/texts/8",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/9",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/10",
+      "parent": {
+        "$ref": "#/groups/2"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "If a caregiver suspects that within one week, a fifth of the class (for classes with more than 15 students) or more than three students (for classes with 15 or fewer students)\nshow the same suggested reportable symptoms",
+      "text": "If a caregiver suspects that within one week, a fifth of the class (for classes with more than 15 students) or more than three students (for classes with 15 or fewer students)\nshow the same suggested reportable symptoms",
+      "formatting": {
+        "bold": false,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false
+      }
+    },
+    {
+      "self_ref": "#/texts/11",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/12",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/13",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/14",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/15",
+      "parent": {
+        "$ref": "#/groups/3"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "Yes",
+      "text": "Yes",
+      "formatting": {
+        "bold": false,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false
+      }
+    },
+    {
+      "self_ref": "#/texts/16",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/17",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/18",
+      "parent": {
+        "$ref": "#/groups/4"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "  A report must be submitted within 24 hours via the Ministry of Education’s Campus Safety and Disaster Prevention Information Network.",
+      "text": "  A report must be submitted within 24 hours via the Ministry of Education’s Campus Safety and Disaster Prevention Information Network.",
+      "formatting": {
+        "bold": false,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false
+      }
+    },
+    {
+      "self_ref": "#/texts/19",
+      "parent": {
+        "$ref": "#/groups/4"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "  A report must also be submitted within 48 hours through Chiayi County’s School Suspected Infectious Disease Reporting System.",
+      "text": "  A report must also be submitted within 48 hours through Chiayi County’s School Suspected Infectious Disease Reporting System.",
+      "formatting": {
+        "bold": false,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false
+      }
+    },
+    {
+      "self_ref": "#/texts/20",
+      "parent": {
+        "$ref": "#/groups/4"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/21",
+      "parent": {
+        "$ref": "#/groups/4"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/22",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/23",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/24",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/25",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/26",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/27",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/28",
+      "parent": {
+        "$ref": "#/groups/5"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "Health Bureau:",
+      "text": "Health Bureau:",
+      "formatting": {
+        "bold": true,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false
+      }
+    },
+    {
+      "self_ref": "#/texts/29",
+      "parent": {
+        "$ref": "#/groups/5"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "Upon receiving a report from the kindergarten, conduct a preliminary assessment of the case, and depending on the situation and type of illness, carry out an epidemiological investigation and report to the Centers for Disease Control.",
+      "text": "Upon receiving a report from the kindergarten, conduct a preliminary assessment of the case, and depending on the situation and type of illness, carry out an epidemiological investigation and report to the Centers for Disease Control.",
+      "formatting": {
+        "bold": false,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false
+      }
+    },
+    {
+      "self_ref": "#/texts/30",
+      "parent": {
+        "$ref": "#/groups/6"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "list_item",
+      "prov": [],
+      "orig": "If necessary, provide health education and important reminders at the kindergarten, or notify the individual to undergo specimen collection.",
+      "text": "If necessary, provide health education and important reminders at the kindergarten, or notify the individual to undergo specimen collection.",
+      "formatting": {
+        "bold": false,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false
+      },
+      "enumerated": false,
+      "marker": "-"
+    },
+    {
+      "self_ref": "#/texts/31",
+      "parent": {
+        "$ref": "#/groups/6"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "list_item",
+      "prov": [],
+      "orig": "Implement appropriate epidemic prevention measures in accordance with the Communicable Disease Control Act.",
+      "text": "Implement appropriate epidemic prevention measures in accordance with the Communicable Disease Control Act.",
+      "formatting": {
+        "bold": false,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false
+      },
+      "enumerated": false,
+      "marker": "-"
+    },
+    {
+      "self_ref": "#/texts/32",
+      "parent": {
+        "$ref": "#/groups/5"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/33",
+      "parent": {
+        "$ref": "#/groups/5"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/34",
+      "parent": {
+        "$ref": "#/groups/7"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "list_item",
+      "prov": [],
+      "orig": "",
+      "text": "",
+      "enumerated": false,
+      "marker": "-"
+    },
+    {
+      "self_ref": "#/texts/35",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/36",
+      "parent": {
+        "$ref": "#/groups/8"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "Department of Education:\nCollaborate with the Health Bureau in conducting epidemiological investigations and assist Health Bureau personnel in implementing necessary epidemic prevention measures at all school levels.",
+      "text": "Department of Education:\nCollaborate with the Health Bureau in conducting epidemiological investigations and assist Health Bureau personnel in implementing necessary epidemic prevention measures at all school levels.",
+      "formatting": {
+        "bold": false,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false
+      }
+    },
+    {
+      "self_ref": "#/texts/37",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/38",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/39",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/40",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/41",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/42",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/43",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/44",
+      "parent": {
+        "$ref": "#/groups/10"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "The Health Bureau will handle",
+      "text": "The Health Bureau will handle",
+      "formatting": {
+        "bold": false,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false
+      }
+    },
+    {
+      "self_ref": "#/texts/45",
+      "parent": {
+        "$ref": "#/groups/10"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "reporting and specimen collection",
+      "text": "reporting and specimen collection",
+      "formatting": {
+        "bold": true,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false
+      }
+    },
+    {
+      "self_ref": "#/texts/46",
+      "parent": {
+        "$ref": "#/groups/10"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": ".",
+      "text": ".",
+      "formatting": {
+        "bold": false,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false
+      }
+    },
+    {
+      "self_ref": "#/texts/47",
+      "parent": {
+        "$ref": "#/groups/9"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/48",
+      "parent": {
+        "$ref": "#/groups/9"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/49",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/50",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/51",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/52",
+      "parent": {
+        "$ref": "#/groups/11"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "Whether the epidemic has eased.",
+      "text": "Whether the epidemic has eased.",
+      "formatting": {
+        "bold": true,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false
+      }
+    },
+    {
+      "self_ref": "#/texts/53",
+      "parent": {
+        "$ref": "#/groups/11"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/54",
+      "parent": {
+        "$ref": "#/groups/11"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/55",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/56",
+      "parent": {
+        "$ref": "#/groups/12"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "Whether the test results are positive for a legally designated infectious disease.",
+      "text": "Whether the test results are positive for a legally designated infectious disease.",
+      "formatting": {
+        "bold": true,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false
+      }
+    },
+    {
+      "self_ref": "#/texts/57",
+      "parent": {
+        "$ref": "#/groups/12"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "No",
+      "text": "No",
+      "formatting": {
+        "bold": false,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false
+      }
+    },
+    {
+      "self_ref": "#/texts/58",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/59",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/60",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/61",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/62",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/63",
+      "parent": {
+        "$ref": "#/groups/15"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "Case closed.",
+      "text": "Case closed.",
+      "formatting": {
+        "bold": true,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false
+      }
+    },
+    {
+      "self_ref": "#/texts/64",
+      "parent": {
+        "$ref": "#/groups/15"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/65",
+      "parent": {
+        "$ref": "#/groups/15"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/66",
+      "parent": {
+        "$ref": "#/groups/15"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "The Health Bureau will carry out subsequent related epidemic prevention measures and follow-up, and will request assistance from the Centers for Disease Control if necessary.",
+      "text": "The Health Bureau will carry out subsequent related epidemic prevention measures and follow-up, and will request assistance from the Centers for Disease Control if necessary.",
+      "formatting": {
+        "bold": false,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false
+      }
+    },
+    {
+      "self_ref": "#/texts/67",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/68",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/69",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/70",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "paragraph",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    }
+  ],
+  "pictures": [],
+  "tables": [],
+  "key_value_items": [],
+  "form_items": [],
+  "pages": {}
+}
\ No newline at end of file
diff --git a/tests/data/groundtruth/docling_v2/textbox.docx.md b/tests/data/groundtruth/docling_v2/textbox.docx.md
new file mode 100644
index 00000000..829abad9
--- /dev/null
+++ b/tests/data/groundtruth/docling_v2/textbox.docx.md
@@ -0,0 +1,46 @@
+**Chiayi County Shuishang Township Nanjing Elementary School Affiliated Kindergarten**
+
+**Infectious Disease Reporting Procedure for the 113th Academic Year Kindergarten**
+
+**Student falls ill**
+
+- Suggested Reportable Symptoms:
+＊ Fever
+＊ Cough
+＊ Diarrhea
+＊ Vomiting
+＊ Rash
+＊ Blisters
+＊ Headache
+＊ Sore throat
+
+If a caregiver suspects that within one week, a fifth of the class (for classes with more than 15 students) or more than three students (for classes with 15 or fewer students)
+show the same suggested reportable symptoms
+
+Yes
+
+  A report must be submitted within 24 hours via the Ministry of Education’s Campus Safety and Disaster Prevention Information Network.
+
+  A report must also be submitted within 48 hours through Chiayi County’s School Suspected Infectious Disease Reporting System.
+
+**Health Bureau:**
+
+Upon receiving a report from the kindergarten, conduct a preliminary assessment of the case, and depending on the situation and type of illness, carry out an epidemiological investigation and report to the Centers for Disease Control.
+
+- If necessary, provide health education and important reminders at the kindergarten, or notify the individual to undergo specimen collection.
+- Implement appropriate epidemic prevention measures in accordance with the Communicable Disease Control Act.
+
+Department of Education:
+Collaborate with the Health Bureau in conducting epidemiological investigations and assist Health Bureau personnel in implementing necessary epidemic prevention measures at all school levels.
+
+The Health Bureau will handle **reporting and specimen collection** .
+
+**Whether the epidemic has eased.**
+
+**Whether the test results are positive for a legally designated infectious disease.**
+
+No
+
+**Case closed.**
+
+The Health Bureau will carry out subsequent related epidemic prevention measures and follow-up, and will request assistance from the Centers for Disease Control if necessary.
\ No newline at end of file
diff --git a/tests/data/html/example_8.html b/tests/data/html/example_08.html
similarity index 100%
rename from tests/data/html/example_8.html
rename to tests/data/html/example_08.html
diff --git a/tests/test_backend_csv.py b/tests/test_backend_csv.py
index d929ae19..f7b5d309 100644
--- a/tests/test_backend_csv.py
+++ b/tests/test_backend_csv.py
@@ -39,8 +39,15 @@ def test_e2e_valid_csv_conversions():
         print(f"converting {csv_path}")
 
         gt_path = csv_path.parent.parent / "groundtruth" / "docling_v2" / csv_path.name
-
-        conv_result: ConversionResult = converter.convert(csv_path)
+        if csv_path.stem in (
+            "csv-too-few-columns",
+            "csv-too-many-columns",
+            "csv-inconsistent-header",
+        ):
+            with warns(UserWarning, match="Inconsistent column lengths"):
+                conv_result: ConversionResult = converter.convert(csv_path)
+        else:
+            conv_result: ConversionResult = converter.convert(csv_path)
 
         doc: DoclingDocument = conv_result.document
 
diff --git a/tests/test_e2e_conversion.py b/tests/test_e2e_conversion.py
index 5dc2e89a..93655527 100644
--- a/tests/test_e2e_conversion.py
+++ b/tests/test_e2e_conversion.py
@@ -1,9 +1,10 @@
 from pathlib import Path
 
 from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
+from docling.datamodel.accelerator_options import AcceleratorDevice
 from docling.datamodel.base_models import InputFormat
 from docling.datamodel.document import ConversionResult
-from docling.datamodel.pipeline_options import AcceleratorDevice, PdfPipelineOptions
+from docling.datamodel.pipeline_options import PdfPipelineOptions
 from docling.document_converter import DocumentConverter, PdfFormatOption
 
 from .test_data_gen_flag import GEN_TEST_DATA
diff --git a/tests/test_e2e_ocr_conversion.py b/tests/test_e2e_ocr_conversion.py
index af6e62ce..b34824a1 100644
--- a/tests/test_e2e_ocr_conversion.py
+++ b/tests/test_e2e_ocr_conversion.py
@@ -3,10 +3,10 @@ from pathlib import Path
 from typing import List, Tuple
 
 from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
+from docling.datamodel.accelerator_options import AcceleratorDevice
 from docling.datamodel.base_models import InputFormat
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import (
-    AcceleratorDevice,
     EasyOcrOptions,
     OcrMacOptions,
     OcrOptions,
diff --git a/tests/test_input_doc.py b/tests/test_input_doc.py
index 94a68873..d5e40f0f 100644
--- a/tests/test_input_doc.py
+++ b/tests/test_input_doc.py
@@ -132,6 +132,13 @@ def test_guess_format(tmp_path):
     doc_path = Path("./tests/data/html/wiki_duck.html")
     assert dci._guess_format(doc_path) == InputFormat.HTML
 
+    html_str = (  # HTML starting with a script
+        "<script>\nconsole.log('foo');\n</script>"
+        '<!doctype html>\n<html lang="en-us class="no-js"></html>'
+    )
+    stream = DocumentStream(name="lorem_ipsum", stream=BytesIO(f"{html_str}".encode()))
+    assert dci._guess_format(stream) == InputFormat.HTML
+
     # Valid MD
     buf = BytesIO(Path("./tests/data/md/wiki.md").open("rb").read())
     stream = DocumentStream(name="wiki.md", stream=buf)
diff --git a/tests/test_legacy_format_transform.py b/tests/test_legacy_format_transform.py
index caef8ffc..73c73c5b 100644
--- a/tests/test_legacy_format_transform.py
+++ b/tests/test_legacy_format_transform.py
@@ -38,17 +38,15 @@ def get_converter():
 
 def test_compare_legacy_output(test_doc_paths):
     converter = get_converter()
-
     res = converter.convert_all(test_doc_paths, raises_on_error=True)
-
     for conv_res in res:
         print(f"Results for {conv_res.input.file}")
-        print(
-            json.dumps(
-                conv_res.legacy_document.model_dump(
-                    mode="json", by_alias=True, exclude_none=True
+        with pytest.warns(DeprecationWarning, match="Use document instead"):
+            print(
+                json.dumps(
+                    conv_res.legacy_document.model_dump(
+                        mode="json", by_alias=True, exclude_none=True
+                    )
                 )
             )
-        )
-
     # assert res.legacy_output == res.legacy_output_transformed
diff --git a/tests/test_options.py b/tests/test_options.py
index 7addda4b..a0835d1c 100644
--- a/tests/test_options.py
+++ b/tests/test_options.py
@@ -7,11 +7,10 @@ from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
 from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
 from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
 from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
+from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
 from docling.datamodel.base_models import ConversionStatus, InputFormat, QualityGrade
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import (
-    AcceleratorDevice,
-    AcceleratorOptions,
     PdfPipelineOptions,
     TableFormerMode,
 )
diff --git a/tests/verify_utils.py b/tests/verify_utils.py
index 46a46ace..96cf9c3c 100644
--- a/tests/verify_utils.py
+++ b/tests/verify_utils.py
@@ -4,6 +4,7 @@ import warnings
 from pathlib import Path
 from typing import List, Optional
 
+import pytest
 from docling_core.types.doc import (
     DocItem,
     DoclingDocument,
@@ -302,9 +303,8 @@ def verify_conversion_result_v1(
     )
 
     doc_pred_pages: List[Page] = doc_result.pages
-    doc_pred: DsDocument = doc_result.legacy_document
-    with warnings.catch_warnings():
-        warnings.simplefilter("ignore", DeprecationWarning)
+    with pytest.warns(DeprecationWarning, match="Use document instead"):
+        doc_pred: DsDocument = doc_result.legacy_document
         doc_pred_md = doc_result.legacy_document.export_to_markdown()
         doc_pred_dt = doc_result.legacy_document.export_to_document_tokens()
 
@@ -323,33 +323,33 @@ def verify_conversion_result_v1(
 
     if generate:  # only used when re-generating truth
         pages_path.parent.mkdir(parents=True, exist_ok=True)
-        with open(pages_path, "w") as fw:
+        with open(pages_path, mode="w", encoding="utf-8") as fw:
             fw.write(
                 json.dumps(doc_pred_pages, default=pydantic_encoder, indent=indent)
             )
 
         json_path.parent.mkdir(parents=True, exist_ok=True)
-        with open(json_path, "w") as fw:
+        with open(json_path, mode="w", encoding="utf-8") as fw:
             fw.write(json.dumps(doc_pred, default=pydantic_encoder, indent=indent))
 
         md_path.parent.mkdir(parents=True, exist_ok=True)
-        with open(md_path, "w") as fw:
+        with open(md_path, mode="w", encoding="utf-8") as fw:
             fw.write(doc_pred_md)
 
         dt_path.parent.mkdir(parents=True, exist_ok=True)
-        with open(dt_path, "w") as fw:
+        with open(dt_path, mode="w", encoding="utf-8") as fw:
             fw.write(doc_pred_dt)
     else:  # default branch in test
-        with open(pages_path) as fr:
+        with open(pages_path, encoding="utf-8") as fr:
             doc_true_pages = PageList.validate_json(fr.read())
 
-        with open(json_path) as fr:
+        with open(json_path, encoding="utf-8") as fr:
             doc_true: DsDocument = DsDocument.model_validate_json(fr.read())
 
-        with open(md_path) as fr:
+        with open(md_path, encoding="utf-8") as fr:
             doc_true_md = fr.read()
 
-        with open(dt_path) as fr:
+        with open(dt_path, encoding="utf-8") as fr:
             doc_true_dt = fr.read()
 
         if not fuzzy:
@@ -391,7 +391,7 @@ def verify_conversion_result_v2(
     doc_pred_pages: List[Page] = doc_result.pages
     doc_pred: DoclingDocument = doc_result.document
     doc_pred_md = doc_result.document.export_to_markdown()
-    doc_pred_dt = doc_result.document.export_to_document_tokens()
+    doc_pred_dt = doc_result.document.export_to_doctags()
 
     engine_suffix = "" if ocr_engine is None else f".{ocr_engine}"
 
@@ -408,33 +408,33 @@ def verify_conversion_result_v2(
 
     if generate:  # only used when re-generating truth
         pages_path.parent.mkdir(parents=True, exist_ok=True)
-        with open(pages_path, "w") as fw:
+        with open(pages_path, mode="w", encoding="utf-8") as fw:
             fw.write(
                 json.dumps(doc_pred_pages, default=pydantic_encoder, indent=indent)
             )
 
         json_path.parent.mkdir(parents=True, exist_ok=True)
-        with open(json_path, "w") as fw:
+        with open(json_path, mode="w", encoding="utf-8") as fw:
             fw.write(json.dumps(doc_pred, default=pydantic_encoder, indent=indent))
 
         md_path.parent.mkdir(parents=True, exist_ok=True)
-        with open(md_path, "w") as fw:
+        with open(md_path, mode="w", encoding="utf-8") as fw:
             fw.write(doc_pred_md)
 
         dt_path.parent.mkdir(parents=True, exist_ok=True)
-        with open(dt_path, "w") as fw:
+        with open(dt_path, mode="w", encoding="utf-8") as fw:
             fw.write(doc_pred_dt)
     else:  # default branch in test
-        with open(pages_path) as fr:
+        with open(pages_path, encoding="utf-8") as fr:
             doc_true_pages = PageList.validate_json(fr.read())
 
-        with open(json_path) as fr:
+        with open(json_path, encoding="utf-8") as fr:
             doc_true: DoclingDocument = DoclingDocument.model_validate_json(fr.read())
 
-        with open(md_path) as fr:
+        with open(md_path, encoding="utf-8") as fr:
             doc_true_md = fr.read()
 
-        with open(dt_path) as fr:
+        with open(dt_path, encoding="utf-8") as fr:
             doc_true_dt = fr.read()
 
         if not fuzzy:
@@ -461,12 +461,12 @@ def verify_conversion_result_v2(
 
 def verify_document(pred_doc: DoclingDocument, gtfile: str, generate: bool = False):
     if not os.path.exists(gtfile) or generate:
-        with open(gtfile, "w") as fw:
+        with open(gtfile, mode="w", encoding="utf-8") as fw:
             json.dump(pred_doc.export_to_dict(), fw, ensure_ascii=False, indent=2)
 
         return True
     else:
-        with open(gtfile) as fr:
+        with open(gtfile, encoding="utf-8") as fr:
             true_doc = DoclingDocument.model_validate_json(fr.read())
 
         return verify_docitems(pred_doc, true_doc, fuzzy=False)
@@ -476,11 +476,11 @@ def verify_export(pred_text: str, gtfile: str, generate: bool = False) -> bool:
     file = Path(gtfile)
 
     if not file.exists() or generate:
-        with file.open("w") as fw:
+        with file.open(mode="w", encoding="utf-8") as fw:
             fw.write(pred_text)
         return True
 
-    with file.open("r") as fr:
+    with file.open(encoding="utf-8") as fr:
         true_text = fr.read()
 
     return pred_text == true_text
diff --git a/uv.lock b/uv.lock
index 498c2379..d986422e 100644
--- a/uv.lock
+++ b/uv.lock
@@ -775,7 +775,7 @@ wheels = [
 
 [[package]]
 name = "docling"
-version = "2.34.0"
+version = "2.35.0"
 source = { editable = "." }
 dependencies = [
     { name = "beautifulsoup4" },