From f773d8a62161130d9092fd77d98effd0f5188b09 Mon Sep 17 00:00:00 2001 From: Maxim Lysak Date: Mon, 7 Oct 2024 17:25:17 +0200 Subject: [PATCH] Improved demo code, that saves output mds to files Signed-off-by: Maxim Lysak --- examples/run_with_formats.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/examples/run_with_formats.py b/examples/run_with_formats.py index 91d0baa2..3080c0ab 100644 --- a/examples/run_with_formats.py +++ b/examples/run_with_formats.py @@ -1,17 +1,27 @@ +import json +import logging from pathlib import Path +from typing import Iterable + +import yaml from docling.backend.msword_backend import MsWordDocumentBackend from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend from docling.datamodel.base_models import ( + ConversionStatus, InputFormat, PdfPipelineOptions, PipelineOptions, ) -from docling.datamodel.document import DocumentConversionInput +from docling.datamodel.document import ConversionResult, DocumentConversionInput from docling.document_converter import DocumentConverter, FormatOption from docling.pipeline.simple_model_pipeline import SimpleModelPipeline from docling.pipeline.standard_pdf_model_pipeline import StandardPdfModelPipeline +_log = logging.getLogger(__name__) + +USE_EXPERIMENTAL = False + input_paths = [ # Path("tests/data/wiki_duck.html"), Path("tests/data/word_sample.docx"), @@ -36,7 +46,11 @@ doc_converter = DocumentConverter() conv_results = doc_converter.convert(input) for res in conv_results: + print("") print( f"Document {res.input.file.name} converted with status {res.status}. Content:" ) print(res.experimental.export_to_markdown()) + # Export Docling document format to markdown (experimental): + with (Path("./scratch") / f"{res.input.file.name}.experimental.md").open("w") as fp: + fp.write(res.experimental.export_to_markdown())