fix(markdown): set the correct discriminator in md backend options (#2501)

Signed-off-by: Cesar Berrospi Ramis <ceb@zurich.ibm.com>
This commit is contained in:
Cesar Berrospi Ramis
2025-10-21 14:30:48 +02:00
committed by GitHub
parent a30e6a7614
commit 4227fcc3e1
2 changed files with 33 additions and 4 deletions

View File

@@ -536,6 +536,11 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
def supported_formats(cls) -> set[InputFormat]:
return {InputFormat.MD}
@classmethod
@override
def get_default_options(cls) -> MarkdownBackendOptions:
return MarkdownBackendOptions()
def convert(self) -> DoclingDocument:
_log.debug("converting Markdown...")
@@ -587,17 +592,24 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
self._html_blocks = 0
# delegate to HTML backend
stream = BytesIO(bytes(html_str, encoding="utf-8"))
md_options = cast(MarkdownBackendOptions, self.options)
html_options = HTMLBackendOptions(
enable_local_fetch=md_options.enable_local_fetch,
enable_remote_fetch=md_options.enable_remote_fetch,
fetch_images=md_options.fetch_images,
source_uri=md_options.source_uri,
)
in_doc = InputDocument(
path_or_stream=stream,
format=InputFormat.HTML,
backend=html_backend_cls,
filename=self.file.name,
backend_options=self.options,
backend_options=html_options,
)
html_backend_obj = html_backend_cls(
in_doc=in_doc,
path_or_stream=stream,
options=cast(HTMLBackendOptions, self.options),
options=html_options,
)
doc = html_backend_obj.convert()
else:

View File

@@ -44,10 +44,27 @@ class HTMLBackendOptions(BaseBackendOptions):
)
class MarkdownBackendOptions(HTMLBackendOptions):
class MarkdownBackendOptions(BaseBackendOptions):
"""Options specific to the Markdown backend."""
kind: Literal["md"] = Field("md", exclude=True, repr=False)
fetch_images: bool = Field(
False,
description=(
"Whether the backend should access remote or local resources to parse "
"images in the markdown document."
),
)
source_uri: Optional[Union[AnyUrl, PurePath]] = Field(
None,
description=(
"The URI that originates the markdown document. If provided, the backend "
"will use it to resolve relative paths in the markdown document."
),
)
BackendOptions = Annotated[
Union[DeclarativeBackendOptions, HTMLBackendOptions], Field(discriminator="kind")
Union[DeclarativeBackendOptions, HTMLBackendOptions, MarkdownBackendOptions],
Field(discriminator="kind"),
]