added minimal_asr_pipeline

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
This commit is contained in:
Peter Staar 2025-06-23 15:44:14 +02:00
parent 602eaf9682
commit 9afc2c7673

49
docs/examples/minimal_asr_pipeline.py vendored Normal file
View File

@ -0,0 +1,49 @@
from pathlib import Path
from docling.datamodel import asr_model_specs
from docling.datamodel.base_models import ConversionStatus, InputFormat
from docling.datamodel.document import ConversionResult
from docling.datamodel.pipeline_options import AsrPipelineOptions
from docling.document_converter import AudioFormatOption, DocumentConverter
from docling.pipeline.asr_pipeline import AsrPipeline
def get_asr_converter():
"""Create a DocumentConverter configured for ASR with whisper_turbo model."""
pipeline_options = AsrPipelineOptions()
pipeline_options.asr_options = asr_model_specs.WHISPER_TURBO
converter = DocumentConverter(
format_options={
InputFormat.AUDIO: AudioFormatOption(
pipeline_cls=AsrPipeline,
pipeline_options=pipeline_options,
)
}
)
return converter
def asr_pipeline_conversion(audio_path:Path) -> DoclingDocument:
"""ASR pipeline conversion using whisper_turbo"""
# Check if the test audio file exists
assert audio_path.exists(), f"Test audio file not found: {audio_path}"
converter = get_asr_converter()
# Convert the audio file
result: ConversionResult = converter.convert(audio_path)
# Verify conversion was successful
assert result.status == ConversionStatus.SUCCESS, (
f"Conversion failed with status: {result.status}"
)
return result.document
if __name__=="__main__":
audio_path = Path("<audio-file.wav/mp3>")
doc = asr_pipeline_conversion(audio_path=audio_path)
print(doc.export_to_markdown())