From da7678a754b62df5cf0a9a1efe98c288bda20bd7 Mon Sep 17 00:00:00 2001 From: Nick Hoernle Date: Wed, 10 Dec 2025 20:35:24 +0100 Subject: [PATCH] feat: Add YAML output format to CLI (#2768) * Add a yaml command line argument Signed-off-by: Nick Hoernle * Use yaml and not yml Co-authored-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> Signed-off-by: Nick Hoernle * Use the same naming convention yaml in the variable names as well Signed-off-by: Nick Hoernle --------- Signed-off-by: Nick Hoernle Signed-off-by: Nick Hoernle Co-authored-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> --- docling/cli/main.py | 11 +++++++++++ docling/datamodel/base_models.py | 1 + 2 files changed, 12 insertions(+) diff --git a/docling/cli/main.py b/docling/cli/main.py index 9dddadba..32f3aa9c 100644 --- a/docling/cli/main.py +++ b/docling/cli/main.py @@ -201,6 +201,7 @@ def export_documents( conv_results: Iterable[ConversionResult], output_dir: Path, export_json: bool, + export_yaml: bool, export_html: bool, export_html_split_page: bool, show_layout: bool, @@ -225,6 +226,14 @@ def export_documents( filename=fname, image_mode=image_export_mode ) + # Export YAML format: + if export_yaml: + fname = output_dir / f"{doc_filename}.yaml" + _log.info(f"writing YAML output to {fname}") + conv_res.document.save_as_yaml( + filename=fname, image_mode=image_export_mode + ) + # Export HTML format: if export_html: fname = output_dir / f"{doc_filename}.html" @@ -602,6 +611,7 @@ def convert( # noqa: C901 to_formats = [OutputFormat.MARKDOWN] export_json = OutputFormat.JSON in to_formats + export_yaml = OutputFormat.YAML in to_formats export_html = OutputFormat.HTML in to_formats export_html_split_page = OutputFormat.HTML_SPLIT_PAGE in to_formats export_md = OutputFormat.MARKDOWN in to_formats @@ -873,6 +883,7 @@ def convert( # noqa: C901 conv_results, output_dir=output, export_json=export_json, + export_yaml=export_yaml, export_html=export_html, export_html_split_page=export_html_split_page, show_layout=show_layout, diff --git a/docling/datamodel/base_models.py b/docling/datamodel/base_models.py index 61156f61..8c946200 100644 --- a/docling/datamodel/base_models.py +++ b/docling/datamodel/base_models.py @@ -75,6 +75,7 @@ class InputFormat(str, Enum): class OutputFormat(str, Enum): MARKDOWN = "md" JSON = "json" + YAML = "yaml" HTML = "html" HTML_SPLIT_PAGE = "html_split_page" TEXT = "text"