From b5f7fef29b4bbb51393a212041011ec5950aba28 Mon Sep 17 00:00:00 2001 From: pixiake Date: Fri, 10 Oct 2025 15:37:15 +0800 Subject: [PATCH] fix: AsrPipeline to handle absolute paths and BytesIO streams correctly (#2407) Fix AsrPipeline to handle absolute paths and BytesIO streams correctly Signed-off-by: pixiake Co-authored-by: pixiake --- docling/pipeline/asr_pipeline.py | 34 +++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/docling/pipeline/asr_pipeline.py b/docling/pipeline/asr_pipeline.py index d043f9bb..611d49ee 100644 --- a/docling/pipeline/asr_pipeline.py +++ b/docling/pipeline/asr_pipeline.py @@ -1,6 +1,7 @@ import logging import os import re +import tempfile from io import BytesIO from pathlib import Path from typing import List, Optional, Union, cast @@ -147,7 +148,25 @@ class _NativeWhisperModel: self.word_timestamps = asr_options.word_timestamps def run(self, conv_res: ConversionResult) -> ConversionResult: - audio_path: Path = Path(conv_res.input.file).resolve() + # Access the file path from the backend, similar to how other pipelines handle it + path_or_stream = conv_res.input._backend.path_or_stream + + # Handle both Path and BytesIO inputs + temp_file_path: Optional[Path] = None + + if isinstance(path_or_stream, BytesIO): + # For BytesIO, write to a temporary file since whisper requires a file path + suffix = Path(conv_res.input.file.name).suffix or ".wav" + with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_file: + tmp_file.write(path_or_stream.getvalue()) + temp_file_path = Path(tmp_file.name) + audio_path = temp_file_path + elif isinstance(path_or_stream, Path): + audio_path = path_or_stream + else: + raise RuntimeError( + f"ASR pipeline requires a file path or BytesIO stream, but got {type(path_or_stream)}" + ) try: conversation = self.transcribe(audio_path) @@ -172,9 +191,18 @@ class _NativeWhisperModel: except Exception as exc: _log.error(f"Audio tranciption has an error: {exc}") + conv_res.status = ConversionStatus.FAILURE + return conv_res - conv_res.status = ConversionStatus.FAILURE - return conv_res + finally: + # Clean up temporary file if created + if temp_file_path is not None and temp_file_path.exists(): + try: + temp_file_path.unlink() + except Exception as e: + _log.warning( + f"Failed to delete temporary file {temp_file_path}: {e}" + ) def transcribe(self, fpath: Path) -> list[_ConversationItem]: result = self.model.transcribe(