Rename to NoOpBackend, add test for ASR pipeline

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer 2025-06-23 13:03:44 +02:00
parent 01706beea4
commit d54cea02b9
7 changed files with 18 additions and 16 deletions

View File

@ -10,16 +10,16 @@ from docling.datamodel.document import InputDocument
_log = logging.getLogger(__name__) _log = logging.getLogger(__name__)
class DummyBackend(AbstractDocumentBackend): class NoOpBackend(AbstractDocumentBackend):
""" """
A dummy backend that only validates input existence. A no-op backend that only validates input existence.
Used e.g. for audio files where actual processing is handled by the ASR pipeline. Used e.g. for audio files where actual processing is handled by the ASR pipeline.
""" """
def __init__(self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path]): def __init__(self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path]):
super().__init__(in_doc, path_or_stream) super().__init__(in_doc, path_or_stream)
_log.debug(f"DummyBackend initialized for: {path_or_stream}") _log.debug(f"NoOpBackend initialized for: {path_or_stream}")
# Validate input # Validate input
try: try:
@ -36,7 +36,7 @@ class DummyBackend(AbstractDocumentBackend):
else: else:
self.valid = False self.valid = False
except Exception as e: except Exception as e:
_log.error(f"DummyBackend validation failed: {e}") _log.error(f"NoOpBackend validation failed: {e}")
self.valid = False self.valid = False
def is_valid(self) -> bool: def is_valid(self) -> bool:

View File

@ -26,7 +26,6 @@ from rich.console import Console
from docling.backend.docling_parse_backend import DoclingParseDocumentBackend from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
from docling.backend.dummy_backend import DummyBackend
from docling.backend.pdf_backend import PdfDocumentBackend from docling.backend.pdf_backend import PdfDocumentBackend
from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions

View File

@ -106,7 +106,7 @@ FormatToMimeType: Dict[InputFormat, List[str]] = {
], ],
InputFormat.XML_USPTO: ["application/xml", "text/plain"], InputFormat.XML_USPTO: ["application/xml", "text/plain"],
InputFormat.JSON_DOCLING: ["application/json"], InputFormat.JSON_DOCLING: ["application/json"],
InputFormat.AUDIO: ["audio/wav", "audio/x-wav"], InputFormat.AUDIO: ["audio/x-wav", "audio/mpeg", "audio/wav", "audio/mp3"],
} }
MimeTypeToFormat: dict[str, list[InputFormat]] = { MimeTypeToFormat: dict[str, list[InputFormat]] = {

View File

@ -17,8 +17,8 @@ class BaseAsrOptions(BaseModel):
class InferenceAsrFramework(str, Enum): class InferenceAsrFramework(str, Enum):
MLX = "mlx" # MLX = "mlx" # disabled for now
TRANSFORMERS = "transformers" # TRANSFORMERS = "transformers" # disabled for now
WHISPER = "whisper" WHISPER = "whisper"

View File

@ -13,13 +13,13 @@ from docling.backend.abstract_backend import AbstractDocumentBackend
from docling.backend.asciidoc_backend import AsciiDocBackend from docling.backend.asciidoc_backend import AsciiDocBackend
from docling.backend.csv_backend import CsvDocumentBackend from docling.backend.csv_backend import CsvDocumentBackend
from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
from docling.backend.dummy_backend import DummyBackend
from docling.backend.html_backend import HTMLDocumentBackend from docling.backend.html_backend import HTMLDocumentBackend
from docling.backend.json.docling_json_backend import DoclingJSONBackend from docling.backend.json.docling_json_backend import DoclingJSONBackend
from docling.backend.md_backend import MarkdownDocumentBackend from docling.backend.md_backend import MarkdownDocumentBackend
from docling.backend.msexcel_backend import MsExcelDocumentBackend from docling.backend.msexcel_backend import MsExcelDocumentBackend
from docling.backend.mspowerpoint_backend import MsPowerpointDocumentBackend from docling.backend.mspowerpoint_backend import MsPowerpointDocumentBackend
from docling.backend.msword_backend import MsWordDocumentBackend from docling.backend.msword_backend import MsWordDocumentBackend
from docling.backend.noop_backend import NoOpBackend
from docling.backend.xml.jats_backend import JatsDocumentBackend from docling.backend.xml.jats_backend import JatsDocumentBackend
from docling.backend.xml.uspto_backend import PatentUsptoDocumentBackend from docling.backend.xml.uspto_backend import PatentUsptoDocumentBackend
from docling.datamodel.base_models import ( from docling.datamodel.base_models import (
@ -122,7 +122,7 @@ class PdfFormatOption(FormatOption):
class AudioFormatOption(FormatOption): class AudioFormatOption(FormatOption):
pipeline_cls: Type = AsrPipeline pipeline_cls: Type = AsrPipeline
backend: Type[AbstractDocumentBackend] = DummyBackend backend: Type[AbstractDocumentBackend] = NoOpBackend
def _get_default_option(format: InputFormat) -> FormatOption: def _get_default_option(format: InputFormat) -> FormatOption:
@ -163,7 +163,7 @@ def _get_default_option(format: InputFormat) -> FormatOption:
InputFormat.JSON_DOCLING: FormatOption( InputFormat.JSON_DOCLING: FormatOption(
pipeline_cls=SimplePipeline, backend=DoclingJSONBackend pipeline_cls=SimplePipeline, backend=DoclingJSONBackend
), ),
InputFormat.AUDIO: FormatOption(pipeline_cls=AsrPipeline, backend=DummyBackend), InputFormat.AUDIO: FormatOption(pipeline_cls=AsrPipeline, backend=NoOpBackend),
} }
if (options := format_to_default_options.get(format)) is not None: if (options := format_to_default_options.get(format)) is not None:
return options return options

View File

@ -15,7 +15,7 @@ from docling_core.types.doc.labels import DocItemLabel
from pydantic import BaseModel, Field, validator from pydantic import BaseModel, Field, validator
from docling.backend.abstract_backend import AbstractDocumentBackend from docling.backend.abstract_backend import AbstractDocumentBackend
from docling.backend.dummy_backend import DummyBackend from docling.backend.noop_backend import NoOpBackend
# from pydub import AudioSegment # type: ignore # from pydub import AudioSegment # type: ignore
# from transformers import WhisperForConditionalGeneration, WhisperProcessor, pipeline # from transformers import WhisperForConditionalGeneration, WhisperProcessor, pipeline
@ -24,6 +24,7 @@ from docling.datamodel.accelerator_options import (
) )
from docling.datamodel.base_models import ( from docling.datamodel.base_models import (
ConversionStatus, ConversionStatus,
FormatToMimeType,
) )
from docling.datamodel.document import ConversionResult, InputDocument from docling.datamodel.document import ConversionResult, InputDocument
from docling.datamodel.pipeline_options import ( from docling.datamodel.pipeline_options import (
@ -154,15 +155,17 @@ class _NativeWhisperModel:
# Ensure we have a proper DoclingDocument # Ensure we have a proper DoclingDocument
origin = DocumentOrigin( origin = DocumentOrigin(
filename=conv_res.input.file.name or "audio.wav", filename=conv_res.input.file.name or "audio.wav",
mimetype="audio/wav", mimetype="audio/x-wav",
binary_hash=conv_res.input.document_hash, binary_hash=conv_res.input.document_hash,
) )
conv_res.document = DoclingDocument( conv_res.document = DoclingDocument(
name=conv_res.input.file.stem or "audio.wav", origin=origin name=conv_res.input.file.stem or "audio.wav", origin=origin
) )
for _ in conversation: for citem in conversation:
conv_res.document.add_text(label=DocItemLabel.TEXT, text=_.to_string()) conv_res.document.add_text(
label=DocItemLabel.TEXT, text=citem.to_string()
)
conv_res.status = ConversionStatus.SUCCESS conv_res.status = ConversionStatus.SUCCESS
return conv_res return conv_res
@ -247,4 +250,4 @@ class AsrPipeline(BasePipeline):
@classmethod @classmethod
def is_backend_supported(cls, backend: AbstractDocumentBackend): def is_backend_supported(cls, backend: AbstractDocumentBackend):
return isinstance(backend, DummyBackend) return isinstance(backend, NoOpBackend)

BIN
tests/data/audio/sample_10s.mp3 vendored Normal file

Binary file not shown.