fix: AsrPipeline to handle absolute paths and BytesIO streams correctly (#2407)

Fix AsrPipeline to handle absolute paths and BytesIO streams correctly Signed-off-by: pixiake <guofeng@spader-ai.com> Co-authored-by: pixiake <guofeng@spader-ai.com>
2025-12-08 12:48:28 +00:00 · 2025-10-10 15:37:15 +08:00
parent f2854b2e1d
commit b5f7fef29b
1 changed files with 31 additions and 3 deletions
--- a/docling/pipeline/asr_pipeline.py
+++ b/docling/pipeline/asr_pipeline.py
@@ -1,6 +1,7 @@
 import logging
 import os
 import re
+import tempfile
 from io import BytesIO
 from pathlib import Path
 from typing import List, Optional, Union, cast
@@ -147,7 +148,25 @@ class _NativeWhisperModel:
            self.word_timestamps = asr_options.word_timestamps

    def run(self, conv_res: ConversionResult) -> ConversionResult:
-        audio_path: Path = Path(conv_res.input.file).resolve()
+        # Access the file path from the backend, similar to how other pipelines handle it
+        path_or_stream = conv_res.input._backend.path_or_stream
+
+        # Handle both Path and BytesIO inputs
+        temp_file_path: Optional[Path] = None
+
+        if isinstance(path_or_stream, BytesIO):
+            # For BytesIO, write to a temporary file since whisper requires a file path
+            suffix = Path(conv_res.input.file.name).suffix or ".wav"
+            with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_file:
+                tmp_file.write(path_or_stream.getvalue())
+                temp_file_path = Path(tmp_file.name)
+            audio_path = temp_file_path
+        elif isinstance(path_or_stream, Path):
+            audio_path = path_or_stream
+        else:
+            raise RuntimeError(
+                f"ASR pipeline requires a file path or BytesIO stream, but got {type(path_or_stream)}"
+            )

        try:
            conversation = self.transcribe(audio_path)
@@ -172,9 +191,18 @@ class _NativeWhisperModel:

        except Exception as exc:
            _log.error(f"Audio tranciption has an error: {exc}")
+            conv_res.status = ConversionStatus.FAILURE
+            return conv_res

-        conv_res.status = ConversionStatus.FAILURE
-        return conv_res
+        finally:
+            # Clean up temporary file if created
+            if temp_file_path is not None and temp_file_path.exists():
+                try:
+                    temp_file_path.unlink()
+                except Exception as e:
+                    _log.warning(
+                        f"Failed to delete temporary file {temp_file_path}: {e}"
+                    )

    def transcribe(self, fpath: Path) -> list[_ConversationItem]:
        result = self.model.transcribe(