fix: AsrPipeline to handle absolute paths and BytesIO streams correctly (#2407)

Fix AsrPipeline to handle absolute paths and BytesIO streams correctly Signed-off-by: pixiake <guofeng@spader-ai.com> Co-authored-by: pixiake <guofeng@spader-ai.com>
2025-12-08 20:58:11 +00:00 · 2025-10-10 15:37:15 +08:00
parent f2854b2e1d
commit b5f7fef29b
1 changed files with 31 additions and 3 deletions
--- a/docling/pipeline/asr_pipeline.py
+++ b/docling/pipeline/asr_pipeline.py
@@ -1,6 +1,7 @@
 import logging
 import os
 import re
 import tempfile
 from io import BytesIO
 from pathlib import Path
 from typing import List, Optional, Union, cast
@@ -147,7 +148,25 @@ class _NativeWhisperModel:
            self.word_timestamps = asr_options.word_timestamps
    def run(self, conv_res: ConversionResult) -> ConversionResult:
-        audio_path: Path = Path(conv_res.input.file).resolve()
+        # Access the file path from the backend, similar to how other pipelines handle it
        path_or_stream = conv_res.input._backend.path_or_stream
        # Handle both Path and BytesIO inputs
        temp_file_path: Optional[Path] = None
        if isinstance(path_or_stream, BytesIO):
            # For BytesIO, write to a temporary file since whisper requires a file path
            suffix = Path(conv_res.input.file.name).suffix or ".wav"
            with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_file:
                tmp_file.write(path_or_stream.getvalue())
                temp_file_path = Path(tmp_file.name)
            audio_path = temp_file_path
        elif isinstance(path_or_stream, Path):
            audio_path = path_or_stream
        else:
            raise RuntimeError(
                f"ASR pipeline requires a file path or BytesIO stream, but got {type(path_or_stream)}"
            )
        try:
            conversation = self.transcribe(audio_path)
@@ -172,10 +191,19 @@ class _NativeWhisperModel:
        except Exception as exc:
            _log.error(f"Audio tranciption has an error: {exc}")
            conv_res.status = ConversionStatus.FAILURE
            return conv_res
        finally:
            # Clean up temporary file if created
            if temp_file_path is not None and temp_file_path.exists():
                try:
                    temp_file_path.unlink()
                except Exception as e:
                    _log.warning(
                        f"Failed to delete temporary file {temp_file_path}: {e}"
                    )
    def transcribe(self, fpath: Path) -> list[_ConversationItem]:
        result = self.model.transcribe(
            str(fpath), verbose=self.verbose, word_timestamps=self.word_timestamps