fix: AsrPipeline to handle absolute paths and BytesIO streams correctly (#2407)

Fix AsrPipeline to handle absolute paths and BytesIO streams correctly

Signed-off-by: pixiake <guofeng@spader-ai.com>
Co-authored-by: pixiake <guofeng@spader-ai.com>
This commit is contained in:
pixiake
2025-10-10 15:37:15 +08:00
committed by GitHub
parent f2854b2e1d
commit b5f7fef29b

View File

@@ -1,6 +1,7 @@
import logging import logging
import os import os
import re import re
import tempfile
from io import BytesIO from io import BytesIO
from pathlib import Path from pathlib import Path
from typing import List, Optional, Union, cast from typing import List, Optional, Union, cast
@@ -147,7 +148,25 @@ class _NativeWhisperModel:
self.word_timestamps = asr_options.word_timestamps self.word_timestamps = asr_options.word_timestamps
def run(self, conv_res: ConversionResult) -> ConversionResult: def run(self, conv_res: ConversionResult) -> ConversionResult:
audio_path: Path = Path(conv_res.input.file).resolve() # Access the file path from the backend, similar to how other pipelines handle it
path_or_stream = conv_res.input._backend.path_or_stream
# Handle both Path and BytesIO inputs
temp_file_path: Optional[Path] = None
if isinstance(path_or_stream, BytesIO):
# For BytesIO, write to a temporary file since whisper requires a file path
suffix = Path(conv_res.input.file.name).suffix or ".wav"
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_file:
tmp_file.write(path_or_stream.getvalue())
temp_file_path = Path(tmp_file.name)
audio_path = temp_file_path
elif isinstance(path_or_stream, Path):
audio_path = path_or_stream
else:
raise RuntimeError(
f"ASR pipeline requires a file path or BytesIO stream, but got {type(path_or_stream)}"
)
try: try:
conversation = self.transcribe(audio_path) conversation = self.transcribe(audio_path)
@@ -172,10 +191,19 @@ class _NativeWhisperModel:
except Exception as exc: except Exception as exc:
_log.error(f"Audio tranciption has an error: {exc}") _log.error(f"Audio tranciption has an error: {exc}")
conv_res.status = ConversionStatus.FAILURE conv_res.status = ConversionStatus.FAILURE
return conv_res return conv_res
finally:
# Clean up temporary file if created
if temp_file_path is not None and temp_file_path.exists():
try:
temp_file_path.unlink()
except Exception as e:
_log.warning(
f"Failed to delete temporary file {temp_file_path}: {e}"
)
def transcribe(self, fpath: Path) -> list[_ConversationItem]: def transcribe(self, fpath: Path) -> list[_ConversationItem]:
result = self.model.transcribe( result = self.model.transcribe(
str(fpath), verbose=self.verbose, word_timestamps=self.word_timestamps str(fpath), verbose=self.verbose, word_timestamps=self.word_timestamps