fix(markdown): set the correct discriminator in md backend options (#2501)

Signed-off-by: Cesar Berrospi Ramis <ceb@zurich.ibm.com>
This commit is contained in:
Cesar Berrospi Ramis
2025-10-21 14:30:48 +02:00
committed by GitHub
parent a30e6a7614
commit 4227fcc3e1
2 changed files with 33 additions and 4 deletions

View File

@@ -536,6 +536,11 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
def supported_formats(cls) -> set[InputFormat]: def supported_formats(cls) -> set[InputFormat]:
return {InputFormat.MD} return {InputFormat.MD}
@classmethod
@override
def get_default_options(cls) -> MarkdownBackendOptions:
return MarkdownBackendOptions()
def convert(self) -> DoclingDocument: def convert(self) -> DoclingDocument:
_log.debug("converting Markdown...") _log.debug("converting Markdown...")
@@ -587,17 +592,24 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
self._html_blocks = 0 self._html_blocks = 0
# delegate to HTML backend # delegate to HTML backend
stream = BytesIO(bytes(html_str, encoding="utf-8")) stream = BytesIO(bytes(html_str, encoding="utf-8"))
md_options = cast(MarkdownBackendOptions, self.options)
html_options = HTMLBackendOptions(
enable_local_fetch=md_options.enable_local_fetch,
enable_remote_fetch=md_options.enable_remote_fetch,
fetch_images=md_options.fetch_images,
source_uri=md_options.source_uri,
)
in_doc = InputDocument( in_doc = InputDocument(
path_or_stream=stream, path_or_stream=stream,
format=InputFormat.HTML, format=InputFormat.HTML,
backend=html_backend_cls, backend=html_backend_cls,
filename=self.file.name, filename=self.file.name,
backend_options=self.options, backend_options=html_options,
) )
html_backend_obj = html_backend_cls( html_backend_obj = html_backend_cls(
in_doc=in_doc, in_doc=in_doc,
path_or_stream=stream, path_or_stream=stream,
options=cast(HTMLBackendOptions, self.options), options=html_options,
) )
doc = html_backend_obj.convert() doc = html_backend_obj.convert()
else: else:

View File

@@ -44,10 +44,27 @@ class HTMLBackendOptions(BaseBackendOptions):
) )
class MarkdownBackendOptions(HTMLBackendOptions): class MarkdownBackendOptions(BaseBackendOptions):
"""Options specific to the Markdown backend.""" """Options specific to the Markdown backend."""
kind: Literal["md"] = Field("md", exclude=True, repr=False)
fetch_images: bool = Field(
False,
description=(
"Whether the backend should access remote or local resources to parse "
"images in the markdown document."
),
)
source_uri: Optional[Union[AnyUrl, PurePath]] = Field(
None,
description=(
"The URI that originates the markdown document. If provided, the backend "
"will use it to resolve relative paths in the markdown document."
),
)
BackendOptions = Annotated[ BackendOptions = Annotated[
Union[DeclarativeBackendOptions, HTMLBackendOptions], Field(discriminator="kind") Union[DeclarativeBackendOptions, HTMLBackendOptions, MarkdownBackendOptions],
Field(discriminator="kind"),
] ]