docs: Describe examples (#2262)

* Update .py examples with clearer guidance, update out of date imports and calls Signed-off-by: Mingxuan Zhao <43148277+mingxzhao@users.noreply.github.com> * Fix minimal.py string error, fix ruff format error Signed-off-by: Mingxuan Zhao <43148277+mingxzhao@users.noreply.github.com> * fix more CI issues Signed-off-by: Mingxuan Zhao <43148277+mingxzhao@users.noreply.github.com> --------- Signed-off-by: Mingxuan Zhao <43148277+mingxzhao@users.noreply.github.com>
2025-12-08 20:58:11 +00:00 · 2025-09-16 10:00:38 -04:00
parent 0e95171dd6
commit ff351fd40c
21 changed files with 608 additions and 85 deletions
--- a/docs/examples/minimal_asr_pipeline.py
+++ b/docs/examples/minimal_asr_pipeline.py
@@ -1,3 +1,28 @@
+# %% [markdown]
+# Minimal ASR pipeline example: transcribe an audio file to Markdown text.
+#
+# What this example does
+# - Configures the ASR pipeline with a default model spec and converts one audio file.
+# - Prints the recognized speech segments in Markdown with timestamps.
+#
+# Prerequisites
+# - Install Docling with ASR extras and any audio dependencies (ffmpeg, etc.).
+# - Ensure your environment can download or access the configured ASR model.
+# - Some formats require ffmpeg codecs; install ffmpeg and ensure it's on PATH.
+#
+# How to run
+# - From the repository root, run: `python docs/examples/minimal_asr_pipeline.py`.
+# - The script prints the transcription to stdout.
+#
+# Customizing the model
+# - Edit `get_asr_converter()` to switch `asr_model_specs` (e.g., language or model size).
+# - Keep `InputFormat.AUDIO` and `AsrPipeline` unchanged for a minimal setup.
+#
+# Input audio
+# - Defaults to `tests/data/audio/sample_10s.mp3`. Update `audio_path` to your own file if needed.
+
+# %%
+
 from pathlib import Path

 from docling_core.types.doc import DoclingDocument
@@ -11,7 +36,11 @@ from docling.pipeline.asr_pipeline import AsrPipeline


 def get_asr_converter():
-    """Create a DocumentConverter configured for ASR with whisper_turbo model."""
+    """Create a DocumentConverter configured for ASR with a default model.
+
+    Uses `asr_model_specs.WHISPER_TURBO` by default. You can swap in another
+    model spec from `docling.datamodel.asr_model_specs` to experiment.
+    """
    pipeline_options = AsrPipelineOptions()
    pipeline_options.asr_options = asr_model_specs.WHISPER_TURBO

@@ -27,7 +56,7 @@ def get_asr_converter():


 def asr_pipeline_conversion(audio_path: Path) -> DoclingDocument:
-    """ASR pipeline conversion using whisper_turbo"""
+    """Run the ASR pipeline and return a `DoclingDocument` transcript."""
    # Check if the test audio file exists
    assert audio_path.exists(), f"Test audio file not found: {audio_path}"