feat: Add YAML output format to CLI (#2768)

* Add a yaml command line argument

Signed-off-by: Nick Hoernle <nicholas.hoernle@gmail.com>

* Use yaml and not yml

Co-authored-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com>
Signed-off-by: Nick Hoernle <nicholas.hoernle@gmail.com>

* Use the same naming convention yaml in the variable names as well

Signed-off-by: Nick Hoernle <nicholas.hoernle@quantco.com>

---------

Signed-off-by: Nick Hoernle <nicholas.hoernle@gmail.com>
Signed-off-by: Nick Hoernle <nicholas.hoernle@quantco.com>
Co-authored-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com>
This commit is contained in:
Nick Hoernle
2025-12-10 20:35:24 +01:00
committed by GitHub
parent 1d78418cef
commit da7678a754
2 changed files with 12 additions and 0 deletions

View File

@@ -201,6 +201,7 @@ def export_documents(
conv_results: Iterable[ConversionResult], conv_results: Iterable[ConversionResult],
output_dir: Path, output_dir: Path,
export_json: bool, export_json: bool,
export_yaml: bool,
export_html: bool, export_html: bool,
export_html_split_page: bool, export_html_split_page: bool,
show_layout: bool, show_layout: bool,
@@ -225,6 +226,14 @@ def export_documents(
filename=fname, image_mode=image_export_mode filename=fname, image_mode=image_export_mode
) )
# Export YAML format:
if export_yaml:
fname = output_dir / f"{doc_filename}.yaml"
_log.info(f"writing YAML output to {fname}")
conv_res.document.save_as_yaml(
filename=fname, image_mode=image_export_mode
)
# Export HTML format: # Export HTML format:
if export_html: if export_html:
fname = output_dir / f"{doc_filename}.html" fname = output_dir / f"{doc_filename}.html"
@@ -602,6 +611,7 @@ def convert( # noqa: C901
to_formats = [OutputFormat.MARKDOWN] to_formats = [OutputFormat.MARKDOWN]
export_json = OutputFormat.JSON in to_formats export_json = OutputFormat.JSON in to_formats
export_yaml = OutputFormat.YAML in to_formats
export_html = OutputFormat.HTML in to_formats export_html = OutputFormat.HTML in to_formats
export_html_split_page = OutputFormat.HTML_SPLIT_PAGE in to_formats export_html_split_page = OutputFormat.HTML_SPLIT_PAGE in to_formats
export_md = OutputFormat.MARKDOWN in to_formats export_md = OutputFormat.MARKDOWN in to_formats
@@ -873,6 +883,7 @@ def convert( # noqa: C901
conv_results, conv_results,
output_dir=output, output_dir=output,
export_json=export_json, export_json=export_json,
export_yaml=export_yaml,
export_html=export_html, export_html=export_html,
export_html_split_page=export_html_split_page, export_html_split_page=export_html_split_page,
show_layout=show_layout, show_layout=show_layout,

View File

@@ -75,6 +75,7 @@ class InputFormat(str, Enum):
class OutputFormat(str, Enum): class OutputFormat(str, Enum):
MARKDOWN = "md" MARKDOWN = "md"
JSON = "json" JSON = "json"
YAML = "yaml"
HTML = "html" HTML = "html"
HTML_SPLIT_PAGE = "html_split_page" HTML_SPLIT_PAGE = "html_split_page"
TEXT = "text" TEXT = "text"