mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 20:14:47 +00:00
Rename to NoOpBackend, add test for ASR pipeline
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
parent
01706beea4
commit
d54cea02b9
@ -10,16 +10,16 @@ from docling.datamodel.document import InputDocument
|
|||||||
_log = logging.getLogger(__name__)
|
_log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class DummyBackend(AbstractDocumentBackend):
|
class NoOpBackend(AbstractDocumentBackend):
|
||||||
"""
|
"""
|
||||||
A dummy backend that only validates input existence.
|
A no-op backend that only validates input existence.
|
||||||
Used e.g. for audio files where actual processing is handled by the ASR pipeline.
|
Used e.g. for audio files where actual processing is handled by the ASR pipeline.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path]):
|
def __init__(self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path]):
|
||||||
super().__init__(in_doc, path_or_stream)
|
super().__init__(in_doc, path_or_stream)
|
||||||
|
|
||||||
_log.debug(f"DummyBackend initialized for: {path_or_stream}")
|
_log.debug(f"NoOpBackend initialized for: {path_or_stream}")
|
||||||
|
|
||||||
# Validate input
|
# Validate input
|
||||||
try:
|
try:
|
||||||
@ -36,7 +36,7 @@ class DummyBackend(AbstractDocumentBackend):
|
|||||||
else:
|
else:
|
||||||
self.valid = False
|
self.valid = False
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
_log.error(f"DummyBackend validation failed: {e}")
|
_log.error(f"NoOpBackend validation failed: {e}")
|
||||||
self.valid = False
|
self.valid = False
|
||||||
|
|
||||||
def is_valid(self) -> bool:
|
def is_valid(self) -> bool:
|
@ -26,7 +26,6 @@ from rich.console import Console
|
|||||||
from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
|
from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
|
||||||
from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
|
from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
|
||||||
from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
|
from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
|
||||||
from docling.backend.dummy_backend import DummyBackend
|
|
||||||
from docling.backend.pdf_backend import PdfDocumentBackend
|
from docling.backend.pdf_backend import PdfDocumentBackend
|
||||||
from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
|
from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
|
||||||
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
|
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
|
||||||
|
@ -106,7 +106,7 @@ FormatToMimeType: Dict[InputFormat, List[str]] = {
|
|||||||
],
|
],
|
||||||
InputFormat.XML_USPTO: ["application/xml", "text/plain"],
|
InputFormat.XML_USPTO: ["application/xml", "text/plain"],
|
||||||
InputFormat.JSON_DOCLING: ["application/json"],
|
InputFormat.JSON_DOCLING: ["application/json"],
|
||||||
InputFormat.AUDIO: ["audio/wav", "audio/x-wav"],
|
InputFormat.AUDIO: ["audio/x-wav", "audio/mpeg", "audio/wav", "audio/mp3"],
|
||||||
}
|
}
|
||||||
|
|
||||||
MimeTypeToFormat: dict[str, list[InputFormat]] = {
|
MimeTypeToFormat: dict[str, list[InputFormat]] = {
|
||||||
|
@ -17,8 +17,8 @@ class BaseAsrOptions(BaseModel):
|
|||||||
|
|
||||||
|
|
||||||
class InferenceAsrFramework(str, Enum):
|
class InferenceAsrFramework(str, Enum):
|
||||||
MLX = "mlx"
|
# MLX = "mlx" # disabled for now
|
||||||
TRANSFORMERS = "transformers"
|
# TRANSFORMERS = "transformers" # disabled for now
|
||||||
WHISPER = "whisper"
|
WHISPER = "whisper"
|
||||||
|
|
||||||
|
|
||||||
|
@ -13,13 +13,13 @@ from docling.backend.abstract_backend import AbstractDocumentBackend
|
|||||||
from docling.backend.asciidoc_backend import AsciiDocBackend
|
from docling.backend.asciidoc_backend import AsciiDocBackend
|
||||||
from docling.backend.csv_backend import CsvDocumentBackend
|
from docling.backend.csv_backend import CsvDocumentBackend
|
||||||
from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
|
from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
|
||||||
from docling.backend.dummy_backend import DummyBackend
|
|
||||||
from docling.backend.html_backend import HTMLDocumentBackend
|
from docling.backend.html_backend import HTMLDocumentBackend
|
||||||
from docling.backend.json.docling_json_backend import DoclingJSONBackend
|
from docling.backend.json.docling_json_backend import DoclingJSONBackend
|
||||||
from docling.backend.md_backend import MarkdownDocumentBackend
|
from docling.backend.md_backend import MarkdownDocumentBackend
|
||||||
from docling.backend.msexcel_backend import MsExcelDocumentBackend
|
from docling.backend.msexcel_backend import MsExcelDocumentBackend
|
||||||
from docling.backend.mspowerpoint_backend import MsPowerpointDocumentBackend
|
from docling.backend.mspowerpoint_backend import MsPowerpointDocumentBackend
|
||||||
from docling.backend.msword_backend import MsWordDocumentBackend
|
from docling.backend.msword_backend import MsWordDocumentBackend
|
||||||
|
from docling.backend.noop_backend import NoOpBackend
|
||||||
from docling.backend.xml.jats_backend import JatsDocumentBackend
|
from docling.backend.xml.jats_backend import JatsDocumentBackend
|
||||||
from docling.backend.xml.uspto_backend import PatentUsptoDocumentBackend
|
from docling.backend.xml.uspto_backend import PatentUsptoDocumentBackend
|
||||||
from docling.datamodel.base_models import (
|
from docling.datamodel.base_models import (
|
||||||
@ -122,7 +122,7 @@ class PdfFormatOption(FormatOption):
|
|||||||
|
|
||||||
class AudioFormatOption(FormatOption):
|
class AudioFormatOption(FormatOption):
|
||||||
pipeline_cls: Type = AsrPipeline
|
pipeline_cls: Type = AsrPipeline
|
||||||
backend: Type[AbstractDocumentBackend] = DummyBackend
|
backend: Type[AbstractDocumentBackend] = NoOpBackend
|
||||||
|
|
||||||
|
|
||||||
def _get_default_option(format: InputFormat) -> FormatOption:
|
def _get_default_option(format: InputFormat) -> FormatOption:
|
||||||
@ -163,7 +163,7 @@ def _get_default_option(format: InputFormat) -> FormatOption:
|
|||||||
InputFormat.JSON_DOCLING: FormatOption(
|
InputFormat.JSON_DOCLING: FormatOption(
|
||||||
pipeline_cls=SimplePipeline, backend=DoclingJSONBackend
|
pipeline_cls=SimplePipeline, backend=DoclingJSONBackend
|
||||||
),
|
),
|
||||||
InputFormat.AUDIO: FormatOption(pipeline_cls=AsrPipeline, backend=DummyBackend),
|
InputFormat.AUDIO: FormatOption(pipeline_cls=AsrPipeline, backend=NoOpBackend),
|
||||||
}
|
}
|
||||||
if (options := format_to_default_options.get(format)) is not None:
|
if (options := format_to_default_options.get(format)) is not None:
|
||||||
return options
|
return options
|
||||||
|
@ -15,7 +15,7 @@ from docling_core.types.doc.labels import DocItemLabel
|
|||||||
from pydantic import BaseModel, Field, validator
|
from pydantic import BaseModel, Field, validator
|
||||||
|
|
||||||
from docling.backend.abstract_backend import AbstractDocumentBackend
|
from docling.backend.abstract_backend import AbstractDocumentBackend
|
||||||
from docling.backend.dummy_backend import DummyBackend
|
from docling.backend.noop_backend import NoOpBackend
|
||||||
|
|
||||||
# from pydub import AudioSegment # type: ignore
|
# from pydub import AudioSegment # type: ignore
|
||||||
# from transformers import WhisperForConditionalGeneration, WhisperProcessor, pipeline
|
# from transformers import WhisperForConditionalGeneration, WhisperProcessor, pipeline
|
||||||
@ -24,6 +24,7 @@ from docling.datamodel.accelerator_options import (
|
|||||||
)
|
)
|
||||||
from docling.datamodel.base_models import (
|
from docling.datamodel.base_models import (
|
||||||
ConversionStatus,
|
ConversionStatus,
|
||||||
|
FormatToMimeType,
|
||||||
)
|
)
|
||||||
from docling.datamodel.document import ConversionResult, InputDocument
|
from docling.datamodel.document import ConversionResult, InputDocument
|
||||||
from docling.datamodel.pipeline_options import (
|
from docling.datamodel.pipeline_options import (
|
||||||
@ -154,15 +155,17 @@ class _NativeWhisperModel:
|
|||||||
# Ensure we have a proper DoclingDocument
|
# Ensure we have a proper DoclingDocument
|
||||||
origin = DocumentOrigin(
|
origin = DocumentOrigin(
|
||||||
filename=conv_res.input.file.name or "audio.wav",
|
filename=conv_res.input.file.name or "audio.wav",
|
||||||
mimetype="audio/wav",
|
mimetype="audio/x-wav",
|
||||||
binary_hash=conv_res.input.document_hash,
|
binary_hash=conv_res.input.document_hash,
|
||||||
)
|
)
|
||||||
conv_res.document = DoclingDocument(
|
conv_res.document = DoclingDocument(
|
||||||
name=conv_res.input.file.stem or "audio.wav", origin=origin
|
name=conv_res.input.file.stem or "audio.wav", origin=origin
|
||||||
)
|
)
|
||||||
|
|
||||||
for _ in conversation:
|
for citem in conversation:
|
||||||
conv_res.document.add_text(label=DocItemLabel.TEXT, text=_.to_string())
|
conv_res.document.add_text(
|
||||||
|
label=DocItemLabel.TEXT, text=citem.to_string()
|
||||||
|
)
|
||||||
|
|
||||||
conv_res.status = ConversionStatus.SUCCESS
|
conv_res.status = ConversionStatus.SUCCESS
|
||||||
return conv_res
|
return conv_res
|
||||||
@ -247,4 +250,4 @@ class AsrPipeline(BasePipeline):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def is_backend_supported(cls, backend: AbstractDocumentBackend):
|
def is_backend_supported(cls, backend: AbstractDocumentBackend):
|
||||||
return isinstance(backend, DummyBackend)
|
return isinstance(backend, NoOpBackend)
|
||||||
|
BIN
tests/data/audio/sample_10s.mp3
vendored
Normal file
BIN
tests/data/audio/sample_10s.mp3
vendored
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user