mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 12:48:28 +00:00
fix(markdown): set the correct discriminator in md backend options (#2501)
Signed-off-by: Cesar Berrospi Ramis <ceb@zurich.ibm.com>
This commit is contained in:
committed by
GitHub
parent
a30e6a7614
commit
4227fcc3e1
@@ -536,6 +536,11 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
|
||||
def supported_formats(cls) -> set[InputFormat]:
|
||||
return {InputFormat.MD}
|
||||
|
||||
@classmethod
|
||||
@override
|
||||
def get_default_options(cls) -> MarkdownBackendOptions:
|
||||
return MarkdownBackendOptions()
|
||||
|
||||
def convert(self) -> DoclingDocument:
|
||||
_log.debug("converting Markdown...")
|
||||
|
||||
@@ -587,17 +592,24 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
|
||||
self._html_blocks = 0
|
||||
# delegate to HTML backend
|
||||
stream = BytesIO(bytes(html_str, encoding="utf-8"))
|
||||
md_options = cast(MarkdownBackendOptions, self.options)
|
||||
html_options = HTMLBackendOptions(
|
||||
enable_local_fetch=md_options.enable_local_fetch,
|
||||
enable_remote_fetch=md_options.enable_remote_fetch,
|
||||
fetch_images=md_options.fetch_images,
|
||||
source_uri=md_options.source_uri,
|
||||
)
|
||||
in_doc = InputDocument(
|
||||
path_or_stream=stream,
|
||||
format=InputFormat.HTML,
|
||||
backend=html_backend_cls,
|
||||
filename=self.file.name,
|
||||
backend_options=self.options,
|
||||
backend_options=html_options,
|
||||
)
|
||||
html_backend_obj = html_backend_cls(
|
||||
in_doc=in_doc,
|
||||
path_or_stream=stream,
|
||||
options=cast(HTMLBackendOptions, self.options),
|
||||
options=html_options,
|
||||
)
|
||||
doc = html_backend_obj.convert()
|
||||
else:
|
||||
|
||||
@@ -44,10 +44,27 @@ class HTMLBackendOptions(BaseBackendOptions):
|
||||
)
|
||||
|
||||
|
||||
class MarkdownBackendOptions(HTMLBackendOptions):
|
||||
class MarkdownBackendOptions(BaseBackendOptions):
|
||||
"""Options specific to the Markdown backend."""
|
||||
|
||||
kind: Literal["md"] = Field("md", exclude=True, repr=False)
|
||||
fetch_images: bool = Field(
|
||||
False,
|
||||
description=(
|
||||
"Whether the backend should access remote or local resources to parse "
|
||||
"images in the markdown document."
|
||||
),
|
||||
)
|
||||
source_uri: Optional[Union[AnyUrl, PurePath]] = Field(
|
||||
None,
|
||||
description=(
|
||||
"The URI that originates the markdown document. If provided, the backend "
|
||||
"will use it to resolve relative paths in the markdown document."
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
BackendOptions = Annotated[
|
||||
Union[DeclarativeBackendOptions, HTMLBackendOptions], Field(discriminator="kind")
|
||||
Union[DeclarativeBackendOptions, HTMLBackendOptions, MarkdownBackendOptions],
|
||||
Field(discriminator="kind"),
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user