docling/docs/examples/minimal_asr_pipeline.py
Peter Staar 9afc2c7673 added minimal_asr_pipeline
Signed-off-by: Peter Staar <taa@zurich.ibm.com>
2025-06-23 15:44:14 +02:00

50 lines
1.6 KiB
Python
Vendored

from pathlib import Path
from docling.datamodel import asr_model_specs
from docling.datamodel.base_models import ConversionStatus, InputFormat
from docling.datamodel.document import ConversionResult
from docling.datamodel.pipeline_options import AsrPipelineOptions
from docling.document_converter import AudioFormatOption, DocumentConverter
from docling.pipeline.asr_pipeline import AsrPipeline
def get_asr_converter():
"""Create a DocumentConverter configured for ASR with whisper_turbo model."""
pipeline_options = AsrPipelineOptions()
pipeline_options.asr_options = asr_model_specs.WHISPER_TURBO
converter = DocumentConverter(
format_options={
InputFormat.AUDIO: AudioFormatOption(
pipeline_cls=AsrPipeline,
pipeline_options=pipeline_options,
)
}
)
return converter
def asr_pipeline_conversion(audio_path:Path) -> DoclingDocument:
"""ASR pipeline conversion using whisper_turbo"""
# Check if the test audio file exists
assert audio_path.exists(), f"Test audio file not found: {audio_path}"
converter = get_asr_converter()
# Convert the audio file
result: ConversionResult = converter.convert(audio_path)
# Verify conversion was successful
assert result.status == ConversionStatus.SUCCESS, (
f"Conversion failed with status: {result.status}"
)
return result.document
if __name__=="__main__":
audio_path = Path("<audio-file.wav/mp3>")
doc = asr_pipeline_conversion(audio_path=audio_path)
print(doc.export_to_markdown())