fix(markdown): set the correct discriminator in md backend options (#2501)

Signed-off-by: Cesar Berrospi Ramis <ceb@zurich.ibm.com>
2025-12-08 12:48:28 +00:00 · 2025-10-21 14:30:48 +02:00
parent a30e6a7614
commit 4227fcc3e1
2 changed files with 33 additions and 4 deletions
--- a/docling/backend/md_backend.py
+++ b/docling/backend/md_backend.py
@@ -536,6 +536,11 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
    def supported_formats(cls) -> set[InputFormat]:
        return {InputFormat.MD}

+    @classmethod
+    @override
+    def get_default_options(cls) -> MarkdownBackendOptions:
+        return MarkdownBackendOptions()
+
    def convert(self) -> DoclingDocument:
        _log.debug("converting Markdown...")

@@ -587,17 +592,24 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
                self._html_blocks = 0
                # delegate to HTML backend
                stream = BytesIO(bytes(html_str, encoding="utf-8"))
+                md_options = cast(MarkdownBackendOptions, self.options)
+                html_options = HTMLBackendOptions(
+                    enable_local_fetch=md_options.enable_local_fetch,
+                    enable_remote_fetch=md_options.enable_remote_fetch,
+                    fetch_images=md_options.fetch_images,
+                    source_uri=md_options.source_uri,
+                )
                in_doc = InputDocument(
                    path_or_stream=stream,
                    format=InputFormat.HTML,
                    backend=html_backend_cls,
                    filename=self.file.name,
-                    backend_options=self.options,
+                    backend_options=html_options,
                )
                html_backend_obj = html_backend_cls(
                    in_doc=in_doc,
                    path_or_stream=stream,
-                    options=cast(HTMLBackendOptions, self.options),
+                    options=html_options,
                )
                doc = html_backend_obj.convert()
        else:
--- a/docling/datamodel/backend_options.py
+++ b/docling/datamodel/backend_options.py
@@ -44,10 +44,27 @@ class HTMLBackendOptions(BaseBackendOptions):
    )


-class MarkdownBackendOptions(HTMLBackendOptions):
+class MarkdownBackendOptions(BaseBackendOptions):
    """Options specific to the Markdown backend."""

+    kind: Literal["md"] = Field("md", exclude=True, repr=False)
+    fetch_images: bool = Field(
+        False,
+        description=(
+            "Whether the backend should access remote or local resources to parse "
+            "images in the markdown document."
+        ),
+    )
+    source_uri: Optional[Union[AnyUrl, PurePath]] = Field(
+        None,
+        description=(
+            "The URI that originates the markdown document. If provided, the backend "
+            "will use it to resolve relative paths in the markdown document."
+        ),
+    )
+

 BackendOptions = Annotated[
-    Union[DeclarativeBackendOptions, HTMLBackendOptions], Field(discriminator="kind")
+    Union[DeclarativeBackendOptions, HTMLBackendOptions, MarkdownBackendOptions],
+    Field(discriminator="kind"),
 ]