mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 20:58:11 +00:00
fix: AsrPipeline to handle absolute paths and BytesIO streams correctly (#2407)
Fix AsrPipeline to handle absolute paths and BytesIO streams correctly Signed-off-by: pixiake <guofeng@spader-ai.com> Co-authored-by: pixiake <guofeng@spader-ai.com>
This commit is contained in:
@@ -1,6 +1,7 @@
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import tempfile
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List, Optional, Union, cast
|
from typing import List, Optional, Union, cast
|
||||||
@@ -147,7 +148,25 @@ class _NativeWhisperModel:
|
|||||||
self.word_timestamps = asr_options.word_timestamps
|
self.word_timestamps = asr_options.word_timestamps
|
||||||
|
|
||||||
def run(self, conv_res: ConversionResult) -> ConversionResult:
|
def run(self, conv_res: ConversionResult) -> ConversionResult:
|
||||||
audio_path: Path = Path(conv_res.input.file).resolve()
|
# Access the file path from the backend, similar to how other pipelines handle it
|
||||||
|
path_or_stream = conv_res.input._backend.path_or_stream
|
||||||
|
|
||||||
|
# Handle both Path and BytesIO inputs
|
||||||
|
temp_file_path: Optional[Path] = None
|
||||||
|
|
||||||
|
if isinstance(path_or_stream, BytesIO):
|
||||||
|
# For BytesIO, write to a temporary file since whisper requires a file path
|
||||||
|
suffix = Path(conv_res.input.file.name).suffix or ".wav"
|
||||||
|
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_file:
|
||||||
|
tmp_file.write(path_or_stream.getvalue())
|
||||||
|
temp_file_path = Path(tmp_file.name)
|
||||||
|
audio_path = temp_file_path
|
||||||
|
elif isinstance(path_or_stream, Path):
|
||||||
|
audio_path = path_or_stream
|
||||||
|
else:
|
||||||
|
raise RuntimeError(
|
||||||
|
f"ASR pipeline requires a file path or BytesIO stream, but got {type(path_or_stream)}"
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
conversation = self.transcribe(audio_path)
|
conversation = self.transcribe(audio_path)
|
||||||
@@ -172,9 +191,18 @@ class _NativeWhisperModel:
|
|||||||
|
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
_log.error(f"Audio tranciption has an error: {exc}")
|
_log.error(f"Audio tranciption has an error: {exc}")
|
||||||
|
conv_res.status = ConversionStatus.FAILURE
|
||||||
|
return conv_res
|
||||||
|
|
||||||
conv_res.status = ConversionStatus.FAILURE
|
finally:
|
||||||
return conv_res
|
# Clean up temporary file if created
|
||||||
|
if temp_file_path is not None and temp_file_path.exists():
|
||||||
|
try:
|
||||||
|
temp_file_path.unlink()
|
||||||
|
except Exception as e:
|
||||||
|
_log.warning(
|
||||||
|
f"Failed to delete temporary file {temp_file_path}: {e}"
|
||||||
|
)
|
||||||
|
|
||||||
def transcribe(self, fpath: Path) -> list[_ConversationItem]:
|
def transcribe(self, fpath: Path) -> list[_ConversationItem]:
|
||||||
result = self.model.transcribe(
|
result = self.model.transcribe(
|
||||||
|
|||||||
Reference in New Issue
Block a user