diff --git a/docling/backend/abstract_backend.py b/docling/backend/abstract_backend.py
index 5bfc02a2..b47b11cd 100644
--- a/docling/backend/abstract_backend.py
+++ b/docling/backend/abstract_backend.py
@@ -13,6 +13,7 @@ if TYPE_CHECKING:
 class AbstractDocumentBackend(ABC):
     @abstractmethod
     def __init__(self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path]):
+        self.file = in_doc.file
         self.path_or_stream = path_or_stream
         self.document_hash = in_doc.document_hash
         self.input_format = in_doc.format
diff --git a/docling/backend/asciidoc_backend.py b/docling/backend/asciidoc_backend.py
index 5ece3813..c9d2fc52 100644
--- a/docling/backend/asciidoc_backend.py
+++ b/docling/backend/asciidoc_backend.py
@@ -1,4 +1,5 @@
 import logging
+import os
 import re
 from io import BytesIO
 from pathlib import Path
@@ -67,21 +68,13 @@ class AsciiDocBackend(DeclarativeDocumentBackend):
         Parses the ASCII into a structured document model.
         """
 
-        fname = ""
-        if isinstance(self.path_or_stream, Path):
-            fname = self.path_or_stream.name
-
         origin = DocumentOrigin(
-            filename=fname,
+            filename=self.file.name or "file",
             mimetype="text/asciidoc",
             binary_hash=self.document_hash,
         )
-        if len(fname) > 0:
-            docname = Path(fname).stem
-        else:
-            docname = "stream"
 
-        doc = DoclingDocument(name=docname, origin=origin)
+        doc = DoclingDocument(name=self.file.stem or "file", origin=origin)
 
         doc = self._parse(doc)
 
@@ -138,9 +131,9 @@ class AsciiDocBackend(DeclarativeDocumentBackend):
             # Lists
             elif self._is_list_item(line):
 
-                print("line: ", line)
+                _log.debug(f"line: {line}")
                 item = self._parse_list_item(line)
-                print("parsed list-item: ", item)
+                _log.debug(f"parsed list-item: {item}")
 
                 level = self._get_current_level(parents)
 
@@ -160,9 +153,9 @@ class AsciiDocBackend(DeclarativeDocumentBackend):
 
                 elif in_list and item["indent"] < indents[level]:
 
-                    print(item["indent"], " => ", indents[level])
+                    # print(item["indent"], " => ", indents[level])
                     while item["indent"] < indents[level]:
-                        print(item["indent"], " => ", indents[level])
+                        # print(item["indent"], " => ", indents[level])
                         parents[level] = None
                         indents[level] = None
                         level -= 1
@@ -217,7 +210,6 @@ class AsciiDocBackend(DeclarativeDocumentBackend):
                 caption_data = []
 
                 item = self._parse_picture(line)
-                print(item)
 
                 size = None
                 if "width" in item and "height" in item:
@@ -355,7 +347,7 @@ class AsciiDocBackend(DeclarativeDocumentBackend):
             # Fallback if no match
             return {
                 "type": "list_item",
-                "marker": item_marker,
+                "marker": "-",
                 "text": line,
                 "numbered": False,
                 "indent": 0,
diff --git a/docling/backend/html_backend.py b/docling/backend/html_backend.py
index bd098c95..7bae3463 100644
--- a/docling/backend/html_backend.py
+++ b/docling/backend/html_backend.py
@@ -7,6 +7,7 @@ from bs4 import BeautifulSoup
 from docling_core.types.doc import (
     DocItemLabel,
     DoclingDocument,
+    DocumentOrigin,
     GroupLabel,
     TableCell,
     TableData,
@@ -66,7 +67,13 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
 
     def convert(self) -> DoclingDocument:
         # access self.path_or_stream to load stuff
-        doc = DoclingDocument(name="dummy")
+        origin = DocumentOrigin(
+            filename=self.file.name or "file",
+            mimetype="text/html",
+            binary_hash=self.document_hash,
+        )
+
+        doc = DoclingDocument(name=self.file.stem or "file", origin=origin)
         _log.debug("Trying to convert HTML...")
 
         if self.is_valid():
diff --git a/docling/backend/md_backend.py b/docling/backend/md_backend.py
index 5f326065..0f51b052 100644
--- a/docling/backend/md_backend.py
+++ b/docling/backend/md_backend.py
@@ -237,21 +237,13 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
     def convert(self) -> DoclingDocument:
         _log.debug("converting Markdown...")
 
-        fname = ""
-        if isinstance(self.path_or_stream, Path):
-            fname = self.path_or_stream.name
-
         origin = DocumentOrigin(
-            filename=fname,
+            filename=self.file.name or "file",
             mimetype="text/markdown",
             binary_hash=self.document_hash,
         )
-        if len(fname) > 0:
-            docname = Path(fname).stem
-        else:
-            docname = "stream"
 
-        doc = DoclingDocument(name=docname, origin=origin)
+        doc = DoclingDocument(name=self.file.stem or "file", origin=origin)
 
         if self.is_valid():
             # Parse the markdown into an abstract syntax tree (AST)
diff --git a/docling/backend/mspowerpoint_backend.py b/docling/backend/mspowerpoint_backend.py
index 0adebb15..0544cc9c 100644
--- a/docling/backend/mspowerpoint_backend.py
+++ b/docling/backend/mspowerpoint_backend.py
@@ -83,21 +83,14 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
         # Parses the PPTX into a structured document model.
         # origin = DocumentOrigin(filename=self.path_or_stream.name, mimetype=next(iter(FormatToMimeType.get(InputFormat.PPTX))), binary_hash=self.document_hash)
 
-        fname = ""
-        if isinstance(self.path_or_stream, Path):
-            fname = self.path_or_stream.name
-
         origin = DocumentOrigin(
-            filename=fname,
+            filename=self.file.name or "file",
             mimetype="application/vnd.ms-powerpoint",
             binary_hash=self.document_hash,
         )
-        if len(fname) > 0:
-            docname = Path(fname).stem
-        else:
-            docname = "stream"
+
         doc = DoclingDocument(
-            name=docname, origin=origin
+            name=self.file.stem or "file", origin=origin
         )  # must add origin information
         doc = self.walk_linear(self.pptx_obj, doc)
 
diff --git a/docling/backend/msword_backend.py b/docling/backend/msword_backend.py
index 5b5420f9..5b166d5b 100644
--- a/docling/backend/msword_backend.py
+++ b/docling/backend/msword_backend.py
@@ -85,20 +85,13 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
     def convert(self) -> DoclingDocument:
         # Parses the DOCX into a structured document model.
 
-        fname = ""
-        if isinstance(self.path_or_stream, Path):
-            fname = self.path_or_stream.name
-
         origin = DocumentOrigin(
-            filename=fname,
+            filename=self.file.name or "file",
             mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
             binary_hash=self.document_hash,
         )
-        if len(fname) > 0:
-            docname = Path(fname).stem
-        else:
-            docname = "stream"
-        doc = DoclingDocument(name=docname, origin=origin)
+
+        doc = DoclingDocument(name=self.file.stem or "file", origin=origin)
         if self.is_valid():
             assert self.docx_obj is not None
             doc = self.walk_linear(self.docx_obj.element.body, self.docx_obj, doc)
diff --git a/docling/datamodel/base_models.py b/docling/datamodel/base_models.py
index f80056c8..a82d86a5 100644
--- a/docling/datamodel/base_models.py
+++ b/docling/datamodel/base_models.py
@@ -51,27 +51,27 @@ FormatToExtensions: Dict[InputFormat, List[str]] = {
     InputFormat.ASCIIDOC: ["adoc", "asciidoc", "asc"],
 }
 
-FormatToMimeType: Dict[InputFormat, Set[str]] = {
-    InputFormat.DOCX: {
+FormatToMimeType: Dict[InputFormat, List[str]] = {
+    InputFormat.DOCX: [
         "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
         "application/vnd.openxmlformats-officedocument.wordprocessingml.template",
-    },
-    InputFormat.PPTX: {
+    ],
+    InputFormat.PPTX: [
         "application/vnd.openxmlformats-officedocument.presentationml.template",
         "application/vnd.openxmlformats-officedocument.presentationml.slideshow",
         "application/vnd.openxmlformats-officedocument.presentationml.presentation",
-    },
-    InputFormat.HTML: {"text/html", "application/xhtml+xml"},
-    InputFormat.IMAGE: {
+    ],
+    InputFormat.HTML: ["text/html", "application/xhtml+xml"],
+    InputFormat.IMAGE: [
         "image/png",
         "image/jpeg",
         "image/tiff",
         "image/gif",
         "image/bmp",
-    },
-    InputFormat.PDF: {"application/pdf"},
-    InputFormat.ASCIIDOC: {"text/asciidoc"},
-    InputFormat.MD: {"text/markdown", "text/x-markdown"},
+    ],
+    InputFormat.PDF: ["application/pdf"],
+    InputFormat.ASCIIDOC: ["text/asciidoc"],
+    InputFormat.MD: ["text/markdown", "text/x-markdown"],
 }
 MimeTypeToFormat = {
     mime: fmt for fmt, mimes in FormatToMimeType.items() for mime in mimes
diff --git a/docling/datamodel/document.py b/docling/datamodel/document.py
index be213bc5..e1ecf17f 100644
--- a/docling/datamodel/document.py
+++ b/docling/datamodel/document.py
@@ -45,6 +45,8 @@ from docling.datamodel.base_models import (
     ConversionStatus,
     DocumentStream,
     ErrorItem,
+    FormatToExtensions,
+    FormatToMimeType,
     InputFormat,
     MimeTypeToFormat,
     Page,
@@ -480,28 +482,48 @@ class _DocumentConversionInput(BaseModel):
             else:
                 raise RuntimeError(f"Unexpected obj type in iterator: {type(obj)}")
 
-    def _guess_format(self, obj):
-        content = None
+    def _guess_format(self, obj: Union[Path, DocumentStream]):
+        content = b""  # empty binary blob
+        format = None
+
         if isinstance(obj, Path):
             mime = filetype.guess_mime(str(obj))
             if mime is None:
+                ext = obj.suffix[1:]
+                mime = self._mime_from_extension(ext)
+            if mime is None:  # must guess from
                 with obj.open("rb") as f:
                     content = f.read(1024)  # Read first 1KB
 
         elif isinstance(obj, DocumentStream):
-            obj.stream.seek(0)
             content = obj.stream.read(8192)
             obj.stream.seek(0)
             mime = filetype.guess_mime(content)
+            if mime is None:
+                ext = (
+                    obj.name.rsplit(".", 1)[-1]
+                    if ("." in obj.name and not obj.name.startswith("."))
+                    else ""
+                )
+                mime = self._mime_from_extension(ext)
 
-        if mime is None:
-            mime = self._detect_html_xhtml(content)
-        if mime is None:
-            mime = "text/markdown"
+        mime = mime or self._detect_html_xhtml(content)
+        mime = mime or "text/plain"
 
         format = MimeTypeToFormat.get(mime)
         return format
 
+    def _mime_from_extension(self, ext):
+        mime = None
+        if ext in FormatToExtensions[InputFormat.ASCIIDOC]:
+            mime = FormatToMimeType[InputFormat.ASCIIDOC][0]
+        elif ext in FormatToExtensions[InputFormat.HTML]:
+            mime = FormatToMimeType[InputFormat.HTML][0]
+        elif ext in FormatToExtensions[InputFormat.MD]:
+            mime = FormatToMimeType[InputFormat.MD][0]
+
+        return mime
+
     def _detect_html_xhtml(self, content):
         content_str = content.decode("ascii", errors="ignore").lower()
         # Remove XML comments
diff --git a/docs/examples/run_with_formats.py b/docs/examples/run_with_formats.py
index bb3d6722..80384f6d 100644
--- a/docs/examples/run_with_formats.py
+++ b/docs/examples/run_with_formats.py
@@ -1,11 +1,13 @@
 import json
 import logging
+from io import BytesIO
 from pathlib import Path
 
 import yaml
 
+from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
 from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
-from docling.datamodel.base_models import InputFormat
+from docling.datamodel.base_models import DocumentStream, InputFormat
 from docling.document_converter import (
     DocumentConverter,
     PdfFormatOption,
@@ -19,18 +21,24 @@ _log = logging.getLogger(__name__)
 
 def main():
     input_paths = [
-        Path("README.md"),
         Path("tests/data/wiki_duck.html"),
         Path("tests/data/word_sample.docx"),
+        Path("tests/data/word_nested.docx"),
         Path("tests/data/lorem_ipsum.docx"),
         Path("tests/data/powerpoint_sample.pptx"),
         Path("tests/data/2305.03393v1-pg9-img.png"),
         Path("tests/data/2206.01062.pdf"),
         Path("tests/data/test_01.asciidoc"),
-        Path("tests/data/test_01.asciidoc"),
+        Path("tests/data/test_02.asciidoc"),
         Path("README.md"),
     ]
 
+    # To read from bytes instead:
+    # docs = [
+    #    DocumentStream(name=f.name, stream=BytesIO(f.open("rb").read()))
+    #    for f in input_paths
+    # ]
+
     ## for defaults use:
     # doc_converter = DocumentConverter()
 
@@ -49,7 +57,8 @@ def main():
             ],  # whitelist formats, non-matching files are ignored.
             format_options={
                 InputFormat.PDF: PdfFormatOption(
-                    pipeline_cls=StandardPdfPipeline, backend=PyPdfiumDocumentBackend
+                    pipeline_cls=StandardPdfPipeline,
+                    backend=DoclingParseDocumentBackend,
                 ),
                 InputFormat.DOCX: WordFormatOption(
                     pipeline_cls=SimplePipeline  # , backend=MsWordDocumentBackend
@@ -59,6 +68,7 @@ def main():
     )
 
     conv_results = doc_converter.convert_all(input_paths)
+    # conv_results = doc_converter.convert_all(docs)
 
     for res in conv_results:
         out_path = Path("scratch")