diff --git a/docling/backend/dummy_backend.py b/docling/backend/noop_backend.py
similarity index 86%
rename from docling/backend/dummy_backend.py
rename to docling/backend/noop_backend.py
index 87552aed..e4ae6d20 100644
--- a/docling/backend/dummy_backend.py
+++ b/docling/backend/noop_backend.py
@@ -10,16 +10,16 @@ from docling.datamodel.document import InputDocument
 _log = logging.getLogger(__name__)
 
 
-class DummyBackend(AbstractDocumentBackend):
+class NoOpBackend(AbstractDocumentBackend):
     """
-    A dummy backend that only validates input existence.
+    A no-op backend that only validates input existence.
     Used e.g. for audio files where actual processing is handled by the ASR pipeline.
     """
 
     def __init__(self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path]):
         super().__init__(in_doc, path_or_stream)
 
-        _log.debug(f"DummyBackend initialized for: {path_or_stream}")
+        _log.debug(f"NoOpBackend initialized for: {path_or_stream}")
 
         # Validate input
         try:
@@ -36,7 +36,7 @@ class DummyBackend(AbstractDocumentBackend):
             else:
                 self.valid = False
         except Exception as e:
-            _log.error(f"DummyBackend validation failed: {e}")
+            _log.error(f"NoOpBackend validation failed: {e}")
             self.valid = False
 
     def is_valid(self) -> bool:
diff --git a/docling/cli/main.py b/docling/cli/main.py
index ddf355f0..ae275ea9 100644
--- a/docling/cli/main.py
+++ b/docling/cli/main.py
@@ -26,7 +26,6 @@ from rich.console import Console
 from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
 from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
 from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
-from docling.backend.dummy_backend import DummyBackend
 from docling.backend.pdf_backend import PdfDocumentBackend
 from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
 from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
diff --git a/docling/datamodel/base_models.py b/docling/datamodel/base_models.py
index 85501a5a..f76a066c 100644
--- a/docling/datamodel/base_models.py
+++ b/docling/datamodel/base_models.py
@@ -106,7 +106,7 @@ FormatToMimeType: Dict[InputFormat, List[str]] = {
     ],
     InputFormat.XML_USPTO: ["application/xml", "text/plain"],
     InputFormat.JSON_DOCLING: ["application/json"],
-    InputFormat.AUDIO: ["audio/wav", "audio/x-wav"],
+    InputFormat.AUDIO: ["audio/x-wav", "audio/mpeg", "audio/wav", "audio/mp3"],
 }
 
 MimeTypeToFormat: dict[str, list[InputFormat]] = {
diff --git a/docling/datamodel/pipeline_options_asr_model.py b/docling/datamodel/pipeline_options_asr_model.py
index e892254c..20e2e453 100644
--- a/docling/datamodel/pipeline_options_asr_model.py
+++ b/docling/datamodel/pipeline_options_asr_model.py
@@ -17,8 +17,8 @@ class BaseAsrOptions(BaseModel):
 
 
 class InferenceAsrFramework(str, Enum):
-    MLX = "mlx"
-    TRANSFORMERS = "transformers"
+    # MLX = "mlx" # disabled for now
+    # TRANSFORMERS = "transformers" # disabled for now
     WHISPER = "whisper"
 
 
diff --git a/docling/document_converter.py b/docling/document_converter.py
index 5cae12e2..1a0a9d75 100644
--- a/docling/document_converter.py
+++ b/docling/document_converter.py
@@ -13,13 +13,13 @@ from docling.backend.abstract_backend import AbstractDocumentBackend
 from docling.backend.asciidoc_backend import AsciiDocBackend
 from docling.backend.csv_backend import CsvDocumentBackend
 from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
-from docling.backend.dummy_backend import DummyBackend
 from docling.backend.html_backend import HTMLDocumentBackend
 from docling.backend.json.docling_json_backend import DoclingJSONBackend
 from docling.backend.md_backend import MarkdownDocumentBackend
 from docling.backend.msexcel_backend import MsExcelDocumentBackend
 from docling.backend.mspowerpoint_backend import MsPowerpointDocumentBackend
 from docling.backend.msword_backend import MsWordDocumentBackend
+from docling.backend.noop_backend import NoOpBackend
 from docling.backend.xml.jats_backend import JatsDocumentBackend
 from docling.backend.xml.uspto_backend import PatentUsptoDocumentBackend
 from docling.datamodel.base_models import (
@@ -122,7 +122,7 @@ class PdfFormatOption(FormatOption):
 
 class AudioFormatOption(FormatOption):
     pipeline_cls: Type = AsrPipeline
-    backend: Type[AbstractDocumentBackend] = DummyBackend
+    backend: Type[AbstractDocumentBackend] = NoOpBackend
 
 
 def _get_default_option(format: InputFormat) -> FormatOption:
@@ -163,7 +163,7 @@ def _get_default_option(format: InputFormat) -> FormatOption:
         InputFormat.JSON_DOCLING: FormatOption(
             pipeline_cls=SimplePipeline, backend=DoclingJSONBackend
         ),
-        InputFormat.AUDIO: FormatOption(pipeline_cls=AsrPipeline, backend=DummyBackend),
+        InputFormat.AUDIO: FormatOption(pipeline_cls=AsrPipeline, backend=NoOpBackend),
     }
     if (options := format_to_default_options.get(format)) is not None:
         return options
diff --git a/docling/pipeline/asr_pipeline.py b/docling/pipeline/asr_pipeline.py
index 65b053f5..94fa6341 100644
--- a/docling/pipeline/asr_pipeline.py
+++ b/docling/pipeline/asr_pipeline.py
@@ -15,7 +15,7 @@ from docling_core.types.doc.labels import DocItemLabel
 from pydantic import BaseModel, Field, validator
 
 from docling.backend.abstract_backend import AbstractDocumentBackend
-from docling.backend.dummy_backend import DummyBackend
+from docling.backend.noop_backend import NoOpBackend
 
 # from pydub import AudioSegment  # type: ignore
 # from transformers import WhisperForConditionalGeneration, WhisperProcessor, pipeline
@@ -24,6 +24,7 @@ from docling.datamodel.accelerator_options import (
 )
 from docling.datamodel.base_models import (
     ConversionStatus,
+    FormatToMimeType,
 )
 from docling.datamodel.document import ConversionResult, InputDocument
 from docling.datamodel.pipeline_options import (
@@ -154,15 +155,17 @@ class _NativeWhisperModel:
             # Ensure we have a proper DoclingDocument
             origin = DocumentOrigin(
                 filename=conv_res.input.file.name or "audio.wav",
-                mimetype="audio/wav",
+                mimetype="audio/x-wav",
                 binary_hash=conv_res.input.document_hash,
             )
             conv_res.document = DoclingDocument(
                 name=conv_res.input.file.stem or "audio.wav", origin=origin
             )
 
-            for _ in conversation:
-                conv_res.document.add_text(label=DocItemLabel.TEXT, text=_.to_string())
+            for citem in conversation:
+                conv_res.document.add_text(
+                    label=DocItemLabel.TEXT, text=citem.to_string()
+                )
 
             conv_res.status = ConversionStatus.SUCCESS
             return conv_res
@@ -247,4 +250,4 @@ class AsrPipeline(BasePipeline):
 
     @classmethod
     def is_backend_supported(cls, backend: AbstractDocumentBackend):
-        return isinstance(backend, DummyBackend)
+        return isinstance(backend, NoOpBackend)
diff --git a/tests/data/audio/sample_10s.mp3 b/tests/data/audio/sample_10s.mp3
new file mode 100644
index 00000000..93a7ec73
Binary files /dev/null and b/tests/data/audio/sample_10s.mp3 differ