diff --git a/docling/pipeline/asr_pipeline.py b/docling/pipeline/asr_pipeline.py index 611d49ee..18bc5e89 100644 --- a/docling/pipeline/asr_pipeline.py +++ b/docling/pipeline/asr_pipeline.py @@ -186,7 +186,6 @@ class _NativeWhisperModel: label=DocItemLabel.TEXT, text=citem.to_string() ) - conv_res.status = ConversionStatus.SUCCESS return conv_res except Exception as exc: @@ -249,9 +248,29 @@ class AsrPipeline(BasePipeline): else: _log.error(f"No model support for {self.pipeline_options.asr_options}") + def _has_text(self, document: "DoclingDocument") -> bool: + """ + Helper method to check if the document contains any transcribed text. + A transcription is considered non-empty if the .texts list contains items with actual, non whitespace content. + """ + if not document or not document.texts: + return False + for item in document.texts: + if item.text and item.text.strip(): + return True + return False + def _determine_status(self, conv_res: ConversionResult) -> ConversionStatus: - status = ConversionStatus.SUCCESS - return status + """Determines the final status of ASR Conversion based on its result.""" + if conv_res.status == ConversionStatus.FAILURE or conv_res.errors: + return ConversionStatus.FAILURE + if not self._has_text(conv_res.document): + _log.warning( + "ASR conversion resulted in an empty document." + f"File: {conv_res.input.file.name}" + ) + return ConversionStatus.PARTIAL_SUCCESS + return ConversionStatus.SUCCESS @classmethod def get_default_options(cls) -> AsrPipelineOptions: diff --git a/tests/data/audio/silent_1s.wav b/tests/data/audio/silent_1s.wav new file mode 100644 index 00000000..24d262ae Binary files /dev/null and b/tests/data/audio/silent_1s.wav differ diff --git a/tests/test_asr_pipeline.py b/tests/test_asr_pipeline.py index 595cd608..8a68bdc3 100644 --- a/tests/test_asr_pipeline.py +++ b/tests/test_asr_pipeline.py @@ -57,3 +57,29 @@ def test_asr_pipeline_conversion(test_audio_path): print(f"Transcribed text from {test_audio_path.name}:") for i, text_item in enumerate(texts): print(f" {i + 1}: {text_item.text}") + + +@pytest.fixture +def silent_audio_path(): + """Fixture to provide the path to a silent audio file.""" + path = Path("./tests/data/audio/silent_1s.wav") + if not path.exists(): + pytest.skip("Silent audio file for testing not found at " + str(path)) + return path + + +def test_asr_pipeline_with_silent_audio(silent_audio_path): + """ + Test that the ASR pipeline correctly handles silent audio files + by returning a PARTIAL_SUCCESS status. + """ + converter = get_asr_converter() + doc_result: ConversionResult = converter.convert(silent_audio_path) + + # This test will FAIL initially, which is what we want. + assert doc_result.status == ConversionStatus.PARTIAL_SUCCESS, ( + f"Status should be PARTIAL_SUCCESS for silent audio, but got {doc_result.status}" + ) + assert len(doc_result.document.texts) == 0, ( + "Document should contain zero text items" + )