diff --git a/docling/backend/md_backend.py b/docling/backend/md_backend.py index a33dcdfa..e2a754f1 100644 --- a/docling/backend/md_backend.py +++ b/docling/backend/md_backend.py @@ -536,6 +536,11 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend): def supported_formats(cls) -> set[InputFormat]: return {InputFormat.MD} + @classmethod + @override + def get_default_options(cls) -> MarkdownBackendOptions: + return MarkdownBackendOptions() + def convert(self) -> DoclingDocument: _log.debug("converting Markdown...") @@ -587,17 +592,24 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend): self._html_blocks = 0 # delegate to HTML backend stream = BytesIO(bytes(html_str, encoding="utf-8")) + md_options = cast(MarkdownBackendOptions, self.options) + html_options = HTMLBackendOptions( + enable_local_fetch=md_options.enable_local_fetch, + enable_remote_fetch=md_options.enable_remote_fetch, + fetch_images=md_options.fetch_images, + source_uri=md_options.source_uri, + ) in_doc = InputDocument( path_or_stream=stream, format=InputFormat.HTML, backend=html_backend_cls, filename=self.file.name, - backend_options=self.options, + backend_options=html_options, ) html_backend_obj = html_backend_cls( in_doc=in_doc, path_or_stream=stream, - options=cast(HTMLBackendOptions, self.options), + options=html_options, ) doc = html_backend_obj.convert() else: diff --git a/docling/datamodel/backend_options.py b/docling/datamodel/backend_options.py index 5aaa0721..99fcfb63 100644 --- a/docling/datamodel/backend_options.py +++ b/docling/datamodel/backend_options.py @@ -44,10 +44,27 @@ class HTMLBackendOptions(BaseBackendOptions): ) -class MarkdownBackendOptions(HTMLBackendOptions): +class MarkdownBackendOptions(BaseBackendOptions): """Options specific to the Markdown backend.""" + kind: Literal["md"] = Field("md", exclude=True, repr=False) + fetch_images: bool = Field( + False, + description=( + "Whether the backend should access remote or local resources to parse " + "images in the markdown document." + ), + ) + source_uri: Optional[Union[AnyUrl, PurePath]] = Field( + None, + description=( + "The URI that originates the markdown document. If provided, the backend " + "will use it to resolve relative paths in the markdown document." + ), + ) + BackendOptions = Annotated[ - Union[DeclarativeBackendOptions, HTMLBackendOptions], Field(discriminator="kind") + Union[DeclarativeBackendOptions, HTMLBackendOptions, MarkdownBackendOptions], + Field(discriminator="kind"), ]