docs: Describe examples (#2262)

* Update .py examples with clearer guidance, update out of date imports and calls Signed-off-by: Mingxuan Zhao <43148277+mingxzhao@users.noreply.github.com> * Fix minimal.py string error, fix ruff format error Signed-off-by: Mingxuan Zhao <43148277+mingxzhao@users.noreply.github.com> * fix more CI issues Signed-off-by: Mingxuan Zhao <43148277+mingxzhao@users.noreply.github.com> --------- Signed-off-by: Mingxuan Zhao <43148277+mingxzhao@users.noreply.github.com>
2025-12-08 12:48:28 +00:00 · 2025-09-16 10:00:38 -04:00
parent 0e95171dd6
commit ff351fd40c
21 changed files with 608 additions and 85 deletions
--- a/docs/examples/batch_convert.py
+++ b/docs/examples/batch_convert.py
@@ -1,3 +1,33 @@
+"""
+Batch convert multiple PDF files and export results in several formats.
+
+What this example does
+- Loads a small set of sample PDFs.
+- Runs the Docling PDF pipeline once per file.
+- Writes outputs to `scratch/` in multiple formats (JSON, HTML, Markdown, text, doctags, YAML).
+
+Prerequisites
+- Install Docling and dependencies as described in the repository README.
+- Ensure you can import `docling` from your Python environment.
+# - YAML export requires `PyYAML` (`pip install pyyaml`).
+
+Input documents
+- By default, this example uses a few PDFs from `tests/data/pdf/` in the repo.
+- If you cloned without test data, or want to use your own files, edit
+  `input_doc_paths` below to point to PDFs on your machine.
+
+Output formats (controlled by flags)
+- `USE_V2 = True` enables the current Docling document exports (recommended).
+- `USE_LEGACY = False` keeps legacy Deep Search exports disabled.
+  You can set it to `True` if you need legacy formats for compatibility tests.
+
+Notes
+- Set `pipeline_options.generate_page_images = True` to include page images in HTML.
+- The script logs conversion progress and raises if any documents fail.
+# - This example shows both helper methods like `save_as_*` and lower-level
+#   `export_to_*` + manual file writes; outputs may overlap intentionally.
+"""
+
 import json
 import logging
 import time
@@ -15,6 +45,9 @@ from docling.document_converter import DocumentConverter, PdfFormatOption

 _log = logging.getLogger(__name__)

+# Export toggles:
+# - USE_V2 controls modern Docling document exports.
+# - USE_LEGACY enables legacy Deep Search exports for comparison or migration.
 USE_V2 = True
 USE_LEGACY = False

@@ -35,6 +68,9 @@ def export_documents(
            doc_filename = conv_res.input.file.stem

            if USE_V2:
+                # Recommended modern Docling exports. These helpers mirror the
+                # lower-level "export_to_*" methods used below, but handle
+                # common details like image handling.
                conv_res.document.save_as_json(
                    output_dir / f"{doc_filename}.json",
                    image_mode=ImageRefMode.PLACEHOLDER,
@@ -121,6 +157,9 @@ def export_documents(
 def main():
    logging.basicConfig(level=logging.INFO)

+    # Location of sample PDFs used by this example. If your checkout does not
+    # include test data, change `data_folder` or point `input_doc_paths` to
+    # your own files.
    data_folder = Path(__file__).parent / "../../tests/data"
    input_doc_paths = [
        data_folder / "pdf/2206.01062.pdf",
@@ -139,6 +178,8 @@ def main():
    # settings.debug.visualize_tables = True
    # settings.debug.visualize_cells = True

+    # Configure the PDF pipeline. Enabling page image generation improves HTML
+    # previews (embedded images) but adds processing time.
    pipeline_options = PdfPipelineOptions()
    pipeline_options.generate_page_images = True

@@ -152,11 +193,14 @@ def main():

    start_time = time.time()

+    # Convert all inputs. Set `raises_on_error=False` to keep processing other
+    # files even if one fails; errors are summarized after the run.
    conv_results = doc_converter.convert_all(
        input_doc_paths,
        raises_on_error=False,  # to let conversion run through all and examine results at the end
    )
-    success_count, partial_success_count, failure_count = export_documents(
+    # Write outputs to ./scratch and log a summary.
+    _success_count, _partial_success_count, failure_count = export_documents(
        conv_results, output_dir=Path("scratch")
    )

--- a/docs/examples/compare_vlm_models.py
+++ b/docs/examples/compare_vlm_models.py
@@ -1,8 +1,28 @@
-# Compare VLM models
-# ==================
+# %% [markdown]
+# Compare different VLM models by running the VLM pipeline and timing outputs.
 #
-# This example runs the VLM pipeline with different vision-language models.
-# Their runtime as well output quality is compared.
+# What this example does
+# - Iterates through a list of VLM model configurations and converts the same file.
+# - Prints per-page generation times and saves JSON/MD/HTML to `scratch/`.
+# - Summarizes total inference time and pages processed in a table.
+#
+# Requirements
+# - Install `tabulate` for pretty printing (`pip install tabulate`).
+#
+# Prerequisites
+# - Install Docling with VLM extras. Ensure models can be downloaded or are available.
+#
+# How to run
+# - From the repo root: `python docs/examples/compare_vlm_models.py`.
+# - Results are saved to `scratch/` with filenames including the model and framework.
+#
+# Notes
+# - MLX models are skipped automatically on non-macOS platforms.
+# - On CUDA systems, you can enable flash_attention_2 (see commented lines).
+# - Running multiple VLMs can be GPU/CPU intensive and time-consuming; ensure
+#   enough VRAM/system RAM and close other memory-heavy apps.
+
+# %%

 import json
 import sys
@@ -31,6 +51,8 @@ from docling.pipeline.vlm_pipeline import VlmPipeline


 def convert(sources: list[Path], converter: DocumentConverter):
+    # Note: this helper assumes a single-item `sources` list. It returns after
+    # processing the first source to keep runtime/output focused.
    model_id = pipeline_options.vlm_options.repo_id.replace("/", "_")
    framework = pipeline_options.vlm_options.inference_framework
    for source in sources:
@@ -61,6 +83,8 @@ def convert(sources: list[Path], converter: DocumentConverter):

        print("===== Final output of the converted document =======")

+        # Manual export for illustration. Below, `save_as_json()` writes the same
+        # JSON again; kept intentionally to show both approaches.
        with (out_path / f"{fname}.json").open("w") as fp:
            fp.write(json.dumps(res.document.export_to_dict()))

--- a/docs/examples/custom_convert.py
+++ b/docs/examples/custom_convert.py
@@ -1,3 +1,39 @@
+# %% [markdown]
+# Customize PDF conversion by toggling OCR/backends and pipeline options.
+#
+# What this example does
+# - Shows several alternative configurations for the Docling PDF pipeline.
+# - Lets you try OCR engines (EasyOCR, Tesseract, system OCR) or no OCR.
+# - Converts a single sample PDF and exports results to `scratch/`.
+#
+# Prerequisites
+# - Install Docling and its optional OCR backends per the docs.
+# - Ensure you can import `docling` from your Python environment.
+#
+# How to run
+# - From the repository root, run: `python docs/examples/custom_convert.py`.
+# - Outputs are written under `scratch/` next to where you run the script.
+#
+# Choosing a configuration
+# - Only one configuration block should be active at a time.
+# - Uncomment exactly one of the sections below to experiment.
+# - The file ships with "Docling Parse with EasyOCR" enabled as a sensible default.
+# - If you uncomment a backend or OCR option that is not imported above, also
+#   import its class, e.g.:
+#   - `from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend`
+#   - `from docling.datamodel.pipeline_options import TesseractOcrOptions, TesseractCliOcrOptions, OcrMacOptions`
+#
+# Input document
+# - Defaults to a single PDF from `tests/data/pdf/` in the repo.
+# - If you don't have the test data, update `input_doc_path` to a local PDF.
+#
+# Notes
+# - EasyOCR language: adjust `pipeline_options.ocr_options.lang` (e.g., ["en"], ["es"], ["en", "de"]).
+# - Accelerators: tune `AcceleratorOptions` to select CPU/GPU or threads.
+# - Exports: JSON, plain text, Markdown, and doctags are saved in `scratch/`.
+
+# %%
+
 import json
 import logging
 import time
@@ -21,9 +57,8 @@ def main():

    ###########################################################################

-    # The following sections contain a combination of PipelineOptions
-    # and PDF Backends for various configurations.
-    # Uncomment one section at the time to see the differences in the output.
+    # The sections below demo combinations of PdfPipelineOptions and backends.
+    # Tip: Uncomment exactly one section at a time to compare outputs.

    # PyPdfium without EasyOCR
    # --------------------
@@ -68,8 +103,10 @@ def main():
    #     }
    # )

-    # Docling Parse with EasyOCR
-    # ----------------------
+    # Docling Parse with EasyOCR (default)
+    # -------------------------------
+    # Enables OCR and table structure with EasyOCR, using automatic device
+    # selection via AcceleratorOptions. Adjust languages as needed.
    pipeline_options = PdfPipelineOptions()
    pipeline_options.do_ocr = True
    pipeline_options.do_table_structure = True
@@ -86,7 +123,7 @@ def main():
    )

    # Docling Parse with EasyOCR (CPU only)
-    # ----------------------
+    # -------------------------------------
    # pipeline_options = PdfPipelineOptions()
    # pipeline_options.do_ocr = True
    # pipeline_options.ocr_options.use_gpu = False  # <-- set this.
@@ -100,7 +137,7 @@ def main():
    # )

    # Docling Parse with Tesseract
-    # ----------------------
+    # ----------------------------
    # pipeline_options = PdfPipelineOptions()
    # pipeline_options.do_ocr = True
    # pipeline_options.do_table_structure = True
@@ -114,7 +151,7 @@ def main():
    # )

    # Docling Parse with Tesseract CLI
-    # ----------------------
+    # --------------------------------
    # pipeline_options = PdfPipelineOptions()
    # pipeline_options.do_ocr = True
    # pipeline_options.do_table_structure = True
@@ -127,8 +164,8 @@ def main():
    #     }
    # )

-    # Docling Parse with ocrmac(Mac only)
-    # ----------------------
+    # Docling Parse with ocrmac (macOS only)
+    # --------------------------------------
    # pipeline_options = PdfPipelineOptions()
    # pipeline_options.do_ocr = True
    # pipeline_options.do_table_structure = True
@@ -154,13 +191,13 @@ def main():
    output_dir.mkdir(parents=True, exist_ok=True)
    doc_filename = conv_result.input.file.stem

-    # Export Deep Search document JSON format:
+    # Export Docling document JSON format:
    with (output_dir / f"{doc_filename}.json").open("w", encoding="utf-8") as fp:
        fp.write(json.dumps(conv_result.document.export_to_dict()))

-    # Export Text format:
+    # Export Text format (plain text via Markdown export):
    with (output_dir / f"{doc_filename}.txt").open("w", encoding="utf-8") as fp:
-        fp.write(conv_result.document.export_to_text())
+        fp.write(conv_result.document.export_to_markdown(strict_text=True))

    # Export Markdown format:
    with (output_dir / f"{doc_filename}.md").open("w", encoding="utf-8") as fp:
--- a/docs/examples/develop_formula_understanding.py
+++ b/docs/examples/develop_formula_understanding.py
@@ -1,6 +1,21 @@
-# WARNING
-# This example demonstrates only how to develop a new enrichment model.
-# It does not run the actual formula understanding model.
+# %% [markdown]
+# Developing an enrichment model example (formula understanding: scaffold only).
+#
+# What this example does
+# - Shows how to define pipeline options, an enrichment model, and extend a pipeline.
+# - Displays cropped images of formula items and yields them back unchanged.
+#
+# Important
+# - This is a development scaffold; it does not run a real formula understanding model.
+#
+# How to run
+# - From the repo root: `python docs/examples/develop_formula_understanding.py`.
+#
+# Notes
+# - Set `do_formula_understanding=True` to enable the example enrichment stage.
+# - Extends `StandardPdfPipeline` and keeps the backend when enrichment is enabled.
+
+# %%

 import logging
 from collections.abc import Iterable
@@ -42,6 +57,8 @@ class ExampleFormulaUnderstandingEnrichmentModel(BaseItemAndImageEnrichmentModel
            return

        for enrich_element in element_batch:
+            # Opens a window for each cropped formula image; comment this out when
+            # running headless or processing many items to avoid blocking spam.
            enrich_element.image.show()

            yield enrich_element.item
--- a/docs/examples/develop_picture_enrichment.py
+++ b/docs/examples/develop_picture_enrichment.py
@@ -1,6 +1,21 @@
-# WARNING
-# This example demonstrates only how to develop a new enrichment model.
-# It does not run the actual picture classifier model.
+# %% [markdown]
+# Developing a picture enrichment model (classifier scaffold only).
+#
+# What this example does
+# - Demonstrates how to implement an enrichment model that annotates pictures.
+# - Adds a dummy PictureClassificationData entry to each PictureItem.
+#
+# Important
+# - This is a scaffold for development; it does not run a real classifier.
+#
+# How to run
+# - From the repo root: `python docs/examples/develop_picture_enrichment.py`.
+#
+# Notes
+# - Enables picture image generation and sets `images_scale` to improve crops.
+# - Extends `StandardPdfPipeline` with a custom enrichment stage.
+
+# %%

 import logging
 from collections.abc import Iterable
@@ -43,7 +58,7 @@ class ExamplePictureClassifierEnrichmentModel(BaseEnrichmentModel):
            assert isinstance(element, PictureItem)

            # uncomment this to interactively visualize the image
-            # element.get_image(doc).show()
+            # element.get_image(doc).show()  # may block; avoid in headless runs

            element.annotations.append(
                PictureClassificationData(
--- a/docs/examples/enrich_doclingdocument.py
+++ b/docs/examples/enrich_doclingdocument.py
@@ -1,6 +1,26 @@
-## Enrich DoclingDocument
-# This example allows to run Docling enrichment models on documents which have been already converted
-# and stored as serialized DoclingDocument JSON files.
+# %% [markdown]
+# Enrich an existing DoclingDocument JSON with a custom model (post-conversion).
+#
+# What this example does
+# - Loads a previously converted DoclingDocument from JSON (no reconversion).
+# - Uses a backend to crop images for items and runs an enrichment model in batches.
+# - Prints a few example annotations to stdout.
+#
+# Prerequisites
+# - A DoclingDocument JSON produced by another conversion (path configured below).
+# - Install Docling and dependencies for the chosen enrichment model.
+# - Ensure the JSON and the referenced PDF match (same document/version), so
+#   provenance bounding boxes line up for accurate cropping.
+#
+# How to run
+# - From the repo root: `python docs/examples/enrich_doclingdocument.py`.
+# - Adjust `input_doc_path` and `input_pdf_path` if your data is elsewhere.
+#
+# Notes
+# - `BATCH_SIZE` controls how many elements are passed to the model at once.
+# - `prepare_element()` crops context around elements based on the model's expansion.
+
+# %%

 ### Load modules

@@ -24,6 +44,7 @@ from docling.utils.utils import chunkify
 ### Define batch size used for processing

 BATCH_SIZE = 4
+# Trade-off: larger batches improve throughput but increase memory usage.

 ### From DocItem to the model inputs
 # The following function is responsible for taking an item and applying the required pre-processing for the model.
--- a/docs/examples/export_figures.py
+++ b/docs/examples/export_figures.py
@@ -1,3 +1,29 @@
+# %% [markdown]
+# Export page, figure, and table images from a PDF and save rich outputs.
+#
+# What this example does
+# - Converts a PDF, keeps page/element images, and writes them to `scratch/`.
+# - Exports Markdown and HTML with either embedded or referenced images.
+#
+# Prerequisites
+# - Install Docling and image dependencies. Pillow is used for image saves
+#   (`pip install pillow`) if not already available via Docling's deps.
+# - Ensure you can import `docling` from your Python environment.
+#
+# How to run
+# - From the repo root: `python docs/examples/export_figures.py`.
+# - Outputs (PNG, MD, HTML) are written to `scratch/`.
+#
+# Key options
+# - `IMAGE_RESOLUTION_SCALE`: increase to render higher-resolution images (e.g., 2.0).
+# - `PdfPipelineOptions.generate_page_images`/`generate_picture_images`: preserve images for export.
+# - `ImageRefMode`: choose `EMBEDDED` or `REFERENCED` when saving Markdown/HTML.
+#
+# Input document
+# - Defaults to `tests/data/pdf/2206.01062.pdf`. Change `input_doc_path` as needed.
+
+# %%
+
 import logging
 import time
 from pathlib import Path
@@ -20,12 +46,9 @@ def main():
    input_doc_path = data_folder / "pdf/2206.01062.pdf"
    output_dir = Path("scratch")

-    # Important: For operating with page images, we must keep them, otherwise the DocumentConverter
-    # will destroy them for cleaning up memory.
-    # This is done by setting PdfPipelineOptions.images_scale, which also defines the scale of images.
-    # scale=1 correspond of a standard 72 DPI image
-    # The PdfPipelineOptions.generate_* are the selectors for the document elements which will be enriched
-    # with the image field
+    # Keep page/element images so they can be exported. The `images_scale` controls
+    # the rendered image resolution (scale=1 ~ 72 DPI). The `generate_*` toggles
+    # decide which elements are enriched with images.
    pipeline_options = PdfPipelineOptions()
    pipeline_options.images_scale = IMAGE_RESOLUTION_SCALE
    pipeline_options.generate_page_images = True
--- a/docs/examples/export_multimodal.py
+++ b/docs/examples/export_multimodal.py
@@ -1,3 +1,34 @@
+# %% [markdown]
+# Export multimodal page data (image bytes, text, segments) to a Parquet file.
+#
+# What this example does
+# - Converts a PDF and assembles per-page multimodal records: image, cells, text, segments.
+# - Normalizes records to a pandas DataFrame and writes a timestamped `.parquet` in `scratch/`.
+#
+# Prerequisites
+# - Install Docling and `pandas`. Optional: `datasets` and `Pillow` for the commented demo.
+#
+# How to run
+# - From the repo root: `python docs/examples/export_multimodal.py`.
+# - Output parquet is written to `scratch/`.
+#
+# Key options
+# - `IMAGE_RESOLUTION_SCALE`: page rendering scale (1 ~ 72 DPI).
+# - `PdfPipelineOptions.generate_page_images`: keep page images for export.
+#
+# Requirements
+# - Writing Parquet requires an engine such as `pyarrow` or `fastparquet`
+#   (`pip install pyarrow` is the most common choice).
+#
+# Input document
+# - Defaults to `tests/data/pdf/2206.01062.pdf`. Change `input_doc_path` as needed.
+#
+# Notes
+# - The commented block at the bottom shows how to load the Parquet with HF Datasets
+#   and reconstruct images from raw bytes.
+
+# %%
+
 import datetime
 import logging
 import time
@@ -23,10 +54,8 @@ def main():
    input_doc_path = data_folder / "pdf/2206.01062.pdf"
    output_dir = Path("scratch")

-    # Important: For operating with page images, we must keep them, otherwise the DocumentConverter
-    # will destroy them for cleaning up memory.
-    # This is done by setting AssembleOptions.images_scale, which also defines the scale of images.
-    # scale=1 correspond of a standard 72 DPI image
+    # Keep page images so they can be exported to the multimodal rows.
+    # Use PdfPipelineOptions.images_scale to control the render scale (1 ~ 72 DPI).
    pipeline_options = PdfPipelineOptions()
    pipeline_options.images_scale = IMAGE_RESOLUTION_SCALE
    pipeline_options.generate_page_images = True
--- a/docs/examples/export_tables.py
+++ b/docs/examples/export_tables.py
@@ -1,3 +1,27 @@
+# %% [markdown]
+# Extract tables from a PDF and export them as CSV and HTML.
+#
+# What this example does
+# - Converts a PDF and iterates detected tables.
+# - Prints each table as Markdown to stdout, and saves CSV/HTML to `scratch/`.
+#
+# Prerequisites
+# - Install Docling and `pandas`.
+#
+# How to run
+# - From the repo root: `python docs/examples/export_tables.py`.
+# - Outputs are written to `scratch/`.
+#
+# Input document
+# - Defaults to `tests/data/pdf/2206.01062.pdf`. Change `input_doc_path` as needed.
+#
+# Notes
+# - `table.export_to_dataframe()` returns a pandas DataFrame for convenient export/processing.
+# - Printing via `DataFrame.to_markdown()` may require the optional `tabulate` package
+#   (`pip install tabulate`). If unavailable, skip the print or use `to_csv()`.
+
+# %%
+
 import logging
 import time
 from pathlib import Path
@@ -32,12 +56,12 @@ def main():
        print(f"## Table {table_ix}")
        print(table_df.to_markdown())

-        # Save the table as csv
+        # Save the table as CSV
        element_csv_filename = output_dir / f"{doc_filename}-table-{table_ix + 1}.csv"
        _log.info(f"Saving CSV table to {element_csv_filename}")
        table_df.to_csv(element_csv_filename)

-        # Save the table as html
+        # Save the table as HTML
        element_html_filename = output_dir / f"{doc_filename}-table-{table_ix + 1}.html"
        _log.info(f"Saving HTML table to {element_html_filename}")
        with element_html_filename.open("w") as fp:
--- a/docs/examples/full_page_ocr.py
+++ b/docs/examples/full_page_ocr.py
@@ -1,3 +1,31 @@
+# %% [markdown]
+# Force full-page OCR on a PDF using different OCR backends.
+#
+# What this example does
+# - Enables full-page OCR and table structure extraction for a sample PDF.
+# - Demonstrates how to switch between OCR backends via `ocr_options`.
+#
+# Prerequisites
+# - Install Docling and the desired OCR backend's dependencies (Tesseract, EasyOCR,
+#   RapidOCR, or macOS OCR).
+#
+# How to run
+# - From the repo root: `python docs/examples/full_page_ocr.py`.
+# - The script prints Markdown text to stdout.
+#
+# Choosing an OCR backend
+# - Uncomment one `ocr_options = ...` line below. Exactly one should be active.
+# - `force_full_page_ocr=True` processes each page purely via OCR (often slower
+#   than hybrid detection). Use when layout extraction is unreliable or the PDF
+#   contains scanned pages.
+# - If you switch OCR backends, ensure the corresponding option class is imported,
+#   e.g., `EasyOcrOptions`, `TesseractOcrOptions`, `OcrMacOptions`, `RapidOcrOptions`.
+#
+# Input document
+# - Defaults to `tests/data/pdf/2206.01062.pdf`. Change `input_doc_path` as needed.
+
+# %%
+
 from pathlib import Path

 from docling.datamodel.base_models import InputFormat
@@ -17,7 +45,8 @@ def main():
    pipeline_options.do_table_structure = True
    pipeline_options.table_structure_options.do_cell_matching = True

-    # Any of the OCR options can be used:EasyOcrOptions, TesseractOcrOptions, TesseractCliOcrOptions, OcrMacOptions(Mac only), RapidOcrOptions
+    # Any of the OCR options can be used: EasyOcrOptions, TesseractOcrOptions,
+    # TesseractCliOcrOptions, OcrMacOptions (macOS only), RapidOcrOptions
    # ocr_options = EasyOcrOptions(force_full_page_ocr=True)
    # ocr_options = TesseractOcrOptions(force_full_page_ocr=True)
    # ocr_options = OcrMacOptions(force_full_page_ocr=True)
--- a/docs/examples/inspect_picture_content.py
+++ b/docs/examples/inspect_picture_content.py
@@ -1,12 +1,30 @@
+# %% [markdown]
+# Inspect the contents associated with each picture in a converted document.
+#
+# What this example does
+# - Converts a PDF and iterates over each PictureItem.
+# - Prints the caption and the textual items contained within the picture region.
+#
+# How to run
+# - From the repo root: `python docs/examples/inspect_picture_content.py`.
+#
+# Notes
+# - Uncomment `picture.get_image(doc).show()` to visually inspect each picture.
+# - Adjust `source` to point to a different PDF if desired.
+
+# %%
+
 from docling_core.types.doc import TextItem

 from docling.datamodel.base_models import InputFormat
 from docling.datamodel.pipeline_options import PdfPipelineOptions
 from docling.document_converter import DocumentConverter, PdfFormatOption

+# Change this to a local path if desired
 source = "tests/data/pdf/amt_handbook_sample.pdf"

 pipeline_options = PdfPipelineOptions()
+# Higher scale yields sharper crops when inspecting picture content.
 pipeline_options.images_scale = 2
 pipeline_options.generate_page_images = True

@@ -19,7 +37,7 @@ result = doc_converter.convert(source)
 doc = result.document

 for picture in doc.pictures:
-    # picture.get_image(doc).show() # display the picture
+    # picture.get_image(doc).show()  # display the picture
    print(picture.caption_text(doc), " contains these elements:")

    for item, level in doc.iterate_items(root=picture, traverse_pictures=True):
--- a/docs/examples/minimal.py
+++ b/docs/examples/minimal.py
@@ -1,9 +1,32 @@
+# %% [markdown]
+# Simple conversion: one document to Markdown
+# ==========================================
+#
+# What this example does
+# - Converts a single source (URL or local file path) to a unified Docling
+#   document and prints Markdown to stdout.
+#
+# Requirements
+# - Python 3.9+
+# - Install Docling: `pip install docling`
+#
+# How to run
+# - Use the default sample URL: `python docs/examples/minimal.py`
+# - To use your own file or URL, edit the `source` variable below.
+#
+# Notes
+# - The converter auto-detects supported formats (PDF, DOCX, HTML, PPTX, images, etc.).
+# - For batch processing or saving outputs to files, see `docs/examples/batch_convert.py`.
+
 from docling.document_converter import DocumentConverter

-source = "https://arxiv.org/pdf/2408.09869"  # document per local path or URL
+# Change this to a local path or another URL if desired.
+# Note: using the default URL requires network access; if offline, provide a
+# local file path (e.g., Path("/path/to/file.pdf")).
+source = "https://arxiv.org/pdf/2408.09869"

 converter = DocumentConverter()
-doc = converter.convert(source).document
+result = converter.convert(source)

-print(doc.export_to_markdown())
-# output: ## Docling Technical Report [...]"
+# Print Markdown to stdout.
+print(result.document.export_to_markdown())
--- a/docs/examples/minimal_asr_pipeline.py
+++ b/docs/examples/minimal_asr_pipeline.py
@@ -1,3 +1,28 @@
+# %% [markdown]
+# Minimal ASR pipeline example: transcribe an audio file to Markdown text.
+#
+# What this example does
+# - Configures the ASR pipeline with a default model spec and converts one audio file.
+# - Prints the recognized speech segments in Markdown with timestamps.
+#
+# Prerequisites
+# - Install Docling with ASR extras and any audio dependencies (ffmpeg, etc.).
+# - Ensure your environment can download or access the configured ASR model.
+# - Some formats require ffmpeg codecs; install ffmpeg and ensure it's on PATH.
+#
+# How to run
+# - From the repository root, run: `python docs/examples/minimal_asr_pipeline.py`.
+# - The script prints the transcription to stdout.
+#
+# Customizing the model
+# - Edit `get_asr_converter()` to switch `asr_model_specs` (e.g., language or model size).
+# - Keep `InputFormat.AUDIO` and `AsrPipeline` unchanged for a minimal setup.
+#
+# Input audio
+# - Defaults to `tests/data/audio/sample_10s.mp3`. Update `audio_path` to your own file if needed.
+
+# %%
+
 from pathlib import Path

 from docling_core.types.doc import DoclingDocument
@@ -11,7 +36,11 @@ from docling.pipeline.asr_pipeline import AsrPipeline


 def get_asr_converter():
-    """Create a DocumentConverter configured for ASR with whisper_turbo model."""
+    """Create a DocumentConverter configured for ASR with a default model.
+
+    Uses `asr_model_specs.WHISPER_TURBO` by default. You can swap in another
+    model spec from `docling.datamodel.asr_model_specs` to experiment.
+    """
    pipeline_options = AsrPipelineOptions()
    pipeline_options.asr_options = asr_model_specs.WHISPER_TURBO

@@ -27,7 +56,7 @@ def get_asr_converter():


 def asr_pipeline_conversion(audio_path: Path) -> DoclingDocument:
-    """ASR pipeline conversion using whisper_turbo"""
+    """Run the ASR pipeline and return a `DoclingDocument` transcript."""
    # Check if the test audio file exists
    assert audio_path.exists(), f"Test audio file not found: {audio_path}"

--- a/docs/examples/minimal_vlm_pipeline.py
+++ b/docs/examples/minimal_vlm_pipeline.py
@@ -1,3 +1,25 @@
+# %% [markdown]
+# Minimal VLM pipeline example: convert a PDF using a vision-language model.
+#
+# What this example does
+# - Runs the VLM-powered pipeline on a PDF (by URL) and prints Markdown output.
+# - Shows two setups: default (Transformers/SmolDocling) and macOS MPS/MLX.
+#
+# Prerequisites
+# - Install Docling with VLM extras and the appropriate backend (Transformers or MLX).
+# - Ensure your environment can download model weights (e.g., from Hugging Face).
+#
+# How to run
+# - From the repository root, run: `python docs/examples/minimal_vlm_pipeline.py`.
+# - The script prints the converted Markdown to stdout.
+#
+# Notes
+# - `source` may be a local path or a URL to a PDF.
+# - The second section demonstrates macOS MPS acceleration via MLX (`vlm_model_specs.SMOLDOCLING_MLX`).
+# - For more configurations and model comparisons, see `docs/examples/compare_vlm_models.py`.
+
+# %%
+
 from docling.datamodel import vlm_model_specs
 from docling.datamodel.base_models import InputFormat
 from docling.datamodel.pipeline_options import (
@@ -6,6 +28,7 @@ from docling.datamodel.pipeline_options import (
 from docling.document_converter import DocumentConverter, PdfFormatOption
 from docling.pipeline.vlm_pipeline import VlmPipeline

+# Convert a public arXiv PDF; replace with a local path if preferred.
 source = "https://arxiv.org/pdf/2501.17887"

 ###### USING SIMPLE DEFAULT VALUES
@@ -26,7 +49,8 @@ print(doc.export_to_markdown())


 ###### USING MACOS MPS ACCELERATOR
-# For more options see the compare_vlm_models.py example.
+# Demonstrates using MLX on macOS with MPS acceleration (macOS only).
+# For more options see the `compare_vlm_models.py` example.

 pipeline_options = VlmPipelineOptions(
    vlm_options=vlm_model_specs.SMOLDOCLING_MLX,
--- a/docs/examples/pictures_description_api.py
+++ b/docs/examples/pictures_description_api.py
@@ -1,3 +1,27 @@
+# %% [markdown]
+# Describe pictures using a remote VLM API (vLLM, LM Studio, or watsonx.ai).
+#
+# What this example does
+# - Configures `PictureDescriptionApiOptions` for local or cloud providers.
+# - Converts a PDF, then prints each picture's caption and annotations.
+#
+# Prerequisites
+# - Install Docling and `python-dotenv` if loading env vars from a `.env` file.
+# - For local providers: ensure vLLM or LM Studio is running.
+# - For watsonx.ai: set `WX_API_KEY` and `WX_PROJECT_ID` in the environment.
+#
+# How to run
+# - From the repo root: `python docs/examples/pictures_description_api.py`.
+# - Uncomment exactly one provider config and set `enable_remote_services=True` (already set).
+#
+# Notes
+# - vLLM default endpoint: `http://localhost:8000/v1/chat/completions`.
+# - LM Studio default endpoint: `http://localhost:1234/v1/chat/completions`.
+# - Calling remote APIs sends page images/text to the provider; review privacy and
+#   costs. For local testing, LM Studio runs everything on your machine.
+
+# %%
+
 import logging
 import os
 from pathlib import Path
--- a/docs/examples/rapidocr_with_custom_models.py
+++ b/docs/examples/rapidocr_with_custom_models.py
@@ -1,25 +1,46 @@
+# %% [markdown]
+# Use RapidOCR with custom ONNX models to OCR a PDF page and print Markdown.
+#
+# What this example does
+# - Downloads RapidOCR models from Hugging Face via ModelScope.
+# - Configures `RapidOcrOptions` with explicit det/rec/cls model paths.
+# - Runs the PDF pipeline with RapidOCR and prints Markdown output.
+#
+# Prerequisites
+# - Install Docling, `modelscope`, and have network access to download models.
+# - Ensure your environment can import `docling` and `modelscope`.
+#
+# How to run
+# - From the repo root: `python docs/examples/rapidocr_with_custom_models.py`.
+# - The script prints the recognized text as Markdown to stdout.
+#
+# Notes
+# - The default `source` points to an arXiv PDF URL; replace with a local path if desired.
+# - Model paths are derived from the downloaded snapshot directory.
+# - ModelScope caches downloads (typically under `~/.cache/modelscope`); set a proxy
+#   or pre-download models if running in a restricted network environment.
+
+# %%
+
 import os

 from modelscope import snapshot_download

+from docling.datamodel.base_models import InputFormat
+from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import PdfPipelineOptions, RapidOcrOptions
-from docling.document_converter import (
-    ConversionResult,
-    DocumentConverter,
-    InputFormat,
-    PdfFormatOption,
-)
+from docling.document_converter import DocumentConverter, PdfFormatOption


 def main():
    # Source document to convert
    source = "https://arxiv.org/pdf/2408.09869v4"

-    # Download RappidOCR models from HuggingFace
+    # Download RapidOCR models from Hugging Face
    print("Downloading RapidOCR models")
    download_path = snapshot_download(repo_id="RapidAI/RapidOCR")

-    # Setup RapidOcrOptions for english detection
+    # Setup RapidOcrOptions for English detection
    det_model_path = os.path.join(
        download_path, "onnx", "PP-OCRv5", "det", "ch_PP-OCRv5_server_det.onnx"
    )
--- a/docs/examples/run_with_accelerator.py
+++ b/docs/examples/run_with_accelerator.py
@@ -1,3 +1,22 @@
+# %% [markdown]
+# Run conversion with an explicit accelerator configuration (CPU/MPS/CUDA).
+#
+# What this example does
+# - Shows how to select the accelerator device and thread count.
+# - Enables OCR and table structure to exercise compute paths, and prints timings.
+#
+# How to run
+# - From the repo root: `python docs/examples/run_with_accelerator.py`.
+# - Toggle the commented `AcceleratorOptions` examples to try AUTO/MPS/CUDA.
+#
+# Notes
+# - EasyOCR does not support `cuda:N` device selection (defaults to `cuda:0`).
+# - `settings.debug.profile_pipeline_timings = True` prints profiling details.
+# - `AcceleratorDevice.MPS` is macOS-only; `CUDA` requires a compatible GPU and
+#   CUDA-enabled PyTorch build. CPU mode works everywhere.
+
+# %%
+
 from pathlib import Path

 from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
--- a/docs/examples/run_with_formats.py
+++ b/docs/examples/run_with_formats.py
@@ -1,3 +1,32 @@
+# %% [markdown]
+# Run conversion across multiple input formats and customize handling per type.
+#
+# What this example does
+# - Demonstrates converting a mixed list of files (PDF, DOCX, PPTX, HTML, images, etc.).
+# - Shows how to restrict `allowed_formats` and override `format_options` per format.
+# - Writes results (Markdown, JSON, YAML) to `scratch/`.
+#
+# Prerequisites
+# - Install Docling and any format-specific dependencies (e.g., for DOCX/PPTX parsing).
+# - Ensure you can import `docling` from your Python environment.
+# - YAML export requires `PyYAML` (`pip install pyyaml`).
+#
+# How to run
+# - From the repository root, run: `python docs/examples/run_with_formats.py`.
+# - Outputs are written under `scratch/` next to where you run the script.
+# - If `scratch/` does not exist, create it before running.
+#
+# Customizing inputs
+# - Update `input_paths` to include or remove files on your machine.
+# - Non-whitelisted formats are ignored (see `allowed_formats`).
+#
+# Notes
+# - `allowed_formats`: explicit whitelist of formats that will be processed.
+# - `format_options`: per-format pipeline/backend overrides. Everything is optional; defaults exist.
+# - Exports: per input, writes `<stem>.md`, `<stem>.json`, and `<stem>.yaml` in `scratch/`.
+
+# %%
+
 import json
 import logging
 from pathlib import Path
@@ -34,39 +63,41 @@ def main():

    ## to customize use:

-    doc_converter = (
-        DocumentConverter(  # all of the below is optional, has internal defaults.
-            allowed_formats=[
-                InputFormat.PDF,
-                InputFormat.IMAGE,
-                InputFormat.DOCX,
-                InputFormat.HTML,
-                InputFormat.PPTX,
-                InputFormat.ASCIIDOC,
-                InputFormat.CSV,
-                InputFormat.MD,
-            ],  # whitelist formats, non-matching files are ignored.
-            format_options={
-                InputFormat.PDF: PdfFormatOption(
-                    pipeline_cls=StandardPdfPipeline, backend=PyPdfiumDocumentBackend
-                ),
-                InputFormat.DOCX: WordFormatOption(
-                    pipeline_cls=SimplePipeline  # , backend=MsWordDocumentBackend
-                ),
-            },
-        )
+    # Below we explicitly whitelist formats and override behavior for some of them.
+    # You can omit this block and use the defaults (see above) for a quick start.
+    doc_converter = DocumentConverter(  # all of the below is optional, has internal defaults.
+        allowed_formats=[
+            InputFormat.PDF,
+            InputFormat.IMAGE,
+            InputFormat.DOCX,
+            InputFormat.HTML,
+            InputFormat.PPTX,
+            InputFormat.ASCIIDOC,
+            InputFormat.CSV,
+            InputFormat.MD,
+        ],  # whitelist formats, non-matching files are ignored.
+        format_options={
+            InputFormat.PDF: PdfFormatOption(
+                pipeline_cls=StandardPdfPipeline, backend=PyPdfiumDocumentBackend
+            ),
+            InputFormat.DOCX: WordFormatOption(
+                pipeline_cls=SimplePipeline  # or set a backend, e.g., MsWordDocumentBackend
+                # If you change the backend, remember to import it, e.g.:
+                #   from docling.backend.msword_backend import MsWordDocumentBackend
+            ),
+        },
    )

    conv_results = doc_converter.convert_all(input_paths)

    for res in conv_results:
-        out_path = Path("scratch")
+        out_path = Path("scratch")  # ensure this directory exists before running
        print(
            f"Document {res.input.file.name} converted."
            f"\nSaved markdown output to: {out_path!s}"
        )
        _log.debug(res.document._export_to_indented_text(max_text_len=16))
-        # Export Docling document format to markdowndoc:
+        # Export Docling document to Markdown:
        with (out_path / f"{res.input.file.stem}.md").open("w") as fp:
            fp.write(res.document.export_to_markdown())

--- a/docs/examples/tesseract_lang_detection.py
+++ b/docs/examples/tesseract_lang_detection.py
@@ -1,3 +1,22 @@
+# %% [markdown]
+# Detect language automatically with Tesseract OCR and force full-page OCR.
+#
+# What this example does
+# - Configures Tesseract (CLI in this snippet) with `lang=["auto"]`.
+# - Forces full-page OCR and prints the recognized text as Markdown.
+#
+# How to run
+# - From the repo root: `python docs/examples/tesseract_lang_detection.py`.
+# - Ensure Tesseract CLI (or library) is installed and on PATH.
+#
+# Notes
+# - You can switch to `TesseractOcrOptions` instead of `TesseractCliOcrOptions`.
+# - Language packs must be installed; set `TESSDATA_PREFIX` if Tesseract
+#   cannot find language data. Using `lang=["auto"]` requires traineddata
+#   that supports script/language detection on your system.
+
+# %%
+
 from pathlib import Path

 from docling.datamodel.base_models import InputFormat
--- a/docs/examples/translate.py
+++ b/docs/examples/translate.py
@@ -1,3 +1,23 @@
+# %% [markdown]
+# Translate extracted text content and regenerate Markdown with embedded images.
+#
+# What this example does
+# - Converts a PDF and saves original Markdown with embedded images.
+# - Translates text elements and table cell contents, then saves a translated Markdown.
+#
+# Prerequisites
+# - Install Docling. Add a translation library of your choice inside `translate()`.
+#
+# How to run
+# - From the repo root: `python docs/examples/translate.py`.
+# - The script writes original and translated Markdown to `scratch/`.
+#
+# Notes
+# - `translate()` is a placeholder; integrate your preferred translation API/client.
+# - Image generation is enabled to preserve embedded images in the output.
+
+# %%
+
 import logging
 from pathlib import Path

@@ -32,7 +52,7 @@ def main():

    data_folder = Path(__file__).parent / "../../tests/data"
    input_doc_path = data_folder / "pdf/2206.01062.pdf"
-    output_dir = Path("scratch")
+    output_dir = Path("scratch")  # ensure this directory exists before saving

    # Important: For operating with page images, we must keep them, otherwise the DocumentConverter
    # will destroy them for cleaning up memory.
@@ -56,6 +76,7 @@ def main():
    doc_filename = conv_res.input.file.name

    # Save markdown with embedded pictures in original text
+    # Tip: create the `scratch/` folder first or adjust `output_dir`.
    md_filename = output_dir / f"{doc_filename}-with-images-orig.md"
    conv_doc.save_as_markdown(md_filename, image_mode=ImageRefMode.EMBEDDED)

--- a/docs/examples/vlm_pipeline_api_model.py
+++ b/docs/examples/vlm_pipeline_api_model.py
@@ -1,3 +1,31 @@
+# %% [markdown]
+# Use the VLM pipeline with remote API models (LM Studio, Ollama, watsonx.ai).
+#
+# What this example does
+# - Shows how to configure `ApiVlmOptions` for different VLM providers.
+# - Converts a single PDF page using the VLM pipeline and prints Markdown.
+#
+# Prerequisites
+# - Install Docling with VLM extras and `python-dotenv` if using environment files.
+# - For local APIs: run LM Studio (HTTP server) or Ollama locally.
+# - For cloud APIs: set required environment variables (see below).
+# - Requires `requests` for HTTP calls and `python-dotenv` if loading env vars from `.env`.
+#
+# How to run
+# - From the repo root: `python docs/examples/vlm_pipeline_api_model.py`.
+# - The script prints the converted Markdown to stdout.
+#
+# Choosing a provider
+# - Uncomment exactly one `pipeline_options.vlm_options = ...` block below.
+# - Keep `enable_remote_services=True` to permit calling remote APIs.
+#
+# Notes
+# - LM Studio default endpoint: `http://localhost:1234/v1/chat/completions`.
+# - Ollama default endpoint: `http://localhost:11434/v1/chat/completions`.
+# - watsonx.ai requires `WX_API_KEY` and `WX_PROJECT_ID` in env/`.env`.
+
+# %%
+
 import json
 import logging
 import os
@@ -170,14 +198,16 @@ def main():
    data_folder = Path(__file__).parent / "../../tests/data"
    input_doc_path = data_folder / "pdf/2305.03393v1-pg9.pdf"

+    # Configure the VLM pipeline. Enabling remote services allows HTTP calls to
+    # locally hosted APIs (LM Studio, Ollama) or cloud services.
    pipeline_options = VlmPipelineOptions(
-        enable_remote_services=True  # <-- this is required!
+        enable_remote_services=True  # required when calling remote VLM endpoints
    )

    # The ApiVlmOptions() allows to interface with APIs supporting
    # the multi-modal chat interface. Here follow a few example on how to configure those.

-    # One possibility is self-hosting model, e.g. via LM Studio, Ollama or others.
+    # One possibility is self-hosting the model, e.g., via LM Studio or Ollama.

    # Example using the SmolDocling model with LM Studio:
    # (uncomment the following lines)
@@ -208,8 +238,9 @@ def main():
    #     prompt="OCR the full page to markdown.",
    # )

-    # Another possibility is using online services, e.g. watsonx.ai.
-    # Using requires setting the env variables WX_API_KEY and WX_PROJECT_ID.
+    # Another possibility is using online services, e.g., watsonx.ai.
+    # Using watsonx.ai requires setting env variables WX_API_KEY and WX_PROJECT_ID
+    # (see the top-level docstring for details). You can use a .env file as well.
    # (uncomment the following lines)
    # pipeline_options.vlm_options = watsonx_vlm_options(
    #     model="ibm/granite-vision-3-2-2b", prompt="OCR the full page to markdown."