fix: AsrPipeline to handle absolute paths and BytesIO streams correctly (#2407)

Fix AsrPipeline to handle absolute paths and BytesIO streams correctly

Signed-off-by: pixiake <guofeng@spader-ai.com>
Co-authored-by: pixiake <guofeng@spader-ai.com>
This commit is contained in:
pixiake
2025-10-10 15:37:15 +08:00
committed by GitHub
parent f2854b2e1d
commit b5f7fef29b

View File

@@ -1,6 +1,7 @@
import logging
import os
import re
import tempfile
from io import BytesIO
from pathlib import Path
from typing import List, Optional, Union, cast
@@ -147,7 +148,25 @@ class _NativeWhisperModel:
self.word_timestamps = asr_options.word_timestamps
def run(self, conv_res: ConversionResult) -> ConversionResult:
audio_path: Path = Path(conv_res.input.file).resolve()
# Access the file path from the backend, similar to how other pipelines handle it
path_or_stream = conv_res.input._backend.path_or_stream
# Handle both Path and BytesIO inputs
temp_file_path: Optional[Path] = None
if isinstance(path_or_stream, BytesIO):
# For BytesIO, write to a temporary file since whisper requires a file path
suffix = Path(conv_res.input.file.name).suffix or ".wav"
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_file:
tmp_file.write(path_or_stream.getvalue())
temp_file_path = Path(tmp_file.name)
audio_path = temp_file_path
elif isinstance(path_or_stream, Path):
audio_path = path_or_stream
else:
raise RuntimeError(
f"ASR pipeline requires a file path or BytesIO stream, but got {type(path_or_stream)}"
)
try:
conversation = self.transcribe(audio_path)
@@ -172,9 +191,18 @@ class _NativeWhisperModel:
except Exception as exc:
_log.error(f"Audio tranciption has an error: {exc}")
conv_res.status = ConversionStatus.FAILURE
return conv_res
conv_res.status = ConversionStatus.FAILURE
return conv_res
finally:
# Clean up temporary file if created
if temp_file_path is not None and temp_file_path.exists():
try:
temp_file_path.unlink()
except Exception as e:
_log.warning(
f"Failed to delete temporary file {temp_file_path}: {e}"
)
def transcribe(self, fpath: Path) -> list[_ConversationItem]:
result = self.model.transcribe(