mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 12:48:28 +00:00
fix(markdown): set the correct discriminator in md backend options (#2501)
Signed-off-by: Cesar Berrospi Ramis <ceb@zurich.ibm.com>
This commit is contained in:
committed by
GitHub
parent
a30e6a7614
commit
4227fcc3e1
@@ -536,6 +536,11 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
def supported_formats(cls) -> set[InputFormat]:
|
def supported_formats(cls) -> set[InputFormat]:
|
||||||
return {InputFormat.MD}
|
return {InputFormat.MD}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@override
|
||||||
|
def get_default_options(cls) -> MarkdownBackendOptions:
|
||||||
|
return MarkdownBackendOptions()
|
||||||
|
|
||||||
def convert(self) -> DoclingDocument:
|
def convert(self) -> DoclingDocument:
|
||||||
_log.debug("converting Markdown...")
|
_log.debug("converting Markdown...")
|
||||||
|
|
||||||
@@ -587,17 +592,24 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
self._html_blocks = 0
|
self._html_blocks = 0
|
||||||
# delegate to HTML backend
|
# delegate to HTML backend
|
||||||
stream = BytesIO(bytes(html_str, encoding="utf-8"))
|
stream = BytesIO(bytes(html_str, encoding="utf-8"))
|
||||||
|
md_options = cast(MarkdownBackendOptions, self.options)
|
||||||
|
html_options = HTMLBackendOptions(
|
||||||
|
enable_local_fetch=md_options.enable_local_fetch,
|
||||||
|
enable_remote_fetch=md_options.enable_remote_fetch,
|
||||||
|
fetch_images=md_options.fetch_images,
|
||||||
|
source_uri=md_options.source_uri,
|
||||||
|
)
|
||||||
in_doc = InputDocument(
|
in_doc = InputDocument(
|
||||||
path_or_stream=stream,
|
path_or_stream=stream,
|
||||||
format=InputFormat.HTML,
|
format=InputFormat.HTML,
|
||||||
backend=html_backend_cls,
|
backend=html_backend_cls,
|
||||||
filename=self.file.name,
|
filename=self.file.name,
|
||||||
backend_options=self.options,
|
backend_options=html_options,
|
||||||
)
|
)
|
||||||
html_backend_obj = html_backend_cls(
|
html_backend_obj = html_backend_cls(
|
||||||
in_doc=in_doc,
|
in_doc=in_doc,
|
||||||
path_or_stream=stream,
|
path_or_stream=stream,
|
||||||
options=cast(HTMLBackendOptions, self.options),
|
options=html_options,
|
||||||
)
|
)
|
||||||
doc = html_backend_obj.convert()
|
doc = html_backend_obj.convert()
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -44,10 +44,27 @@ class HTMLBackendOptions(BaseBackendOptions):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class MarkdownBackendOptions(HTMLBackendOptions):
|
class MarkdownBackendOptions(BaseBackendOptions):
|
||||||
"""Options specific to the Markdown backend."""
|
"""Options specific to the Markdown backend."""
|
||||||
|
|
||||||
|
kind: Literal["md"] = Field("md", exclude=True, repr=False)
|
||||||
|
fetch_images: bool = Field(
|
||||||
|
False,
|
||||||
|
description=(
|
||||||
|
"Whether the backend should access remote or local resources to parse "
|
||||||
|
"images in the markdown document."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
source_uri: Optional[Union[AnyUrl, PurePath]] = Field(
|
||||||
|
None,
|
||||||
|
description=(
|
||||||
|
"The URI that originates the markdown document. If provided, the backend "
|
||||||
|
"will use it to resolve relative paths in the markdown document."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
BackendOptions = Annotated[
|
BackendOptions = Annotated[
|
||||||
Union[DeclarativeBackendOptions, HTMLBackendOptions], Field(discriminator="kind")
|
Union[DeclarativeBackendOptions, HTMLBackendOptions, MarkdownBackendOptions],
|
||||||
|
Field(discriminator="kind"),
|
||||||
]
|
]
|
||||||
|
|||||||
Reference in New Issue
Block a user