More renaming, design enrichment interface

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer
2024-10-11 10:21:31 +02:00
parent 051beae203
commit 304d16029a
17 changed files with 85 additions and 52 deletions

View File

@@ -36,25 +36,25 @@ def export_documents(
with (output_dir / f"{doc_filename}.legacy.json").open(
"w", encoding="utf-8"
) as fp:
fp.write(json.dumps(conv_res.render_as_dict_v1()))
fp.write(json.dumps(conv_res.render_as_dict()))
# Export Text format:
with (output_dir / f"{doc_filename}.legacy.txt").open(
"w", encoding="utf-8"
) as fp:
fp.write(conv_res.render_as_text_v1())
fp.write(conv_res.render_as_text())
# Export Markdown format:
with (output_dir / f"{doc_filename}.legacy.md").open(
"w", encoding="utf-8"
) as fp:
fp.write(conv_res.render_as_markdown_v1())
fp.write(conv_res.render_as_markdown())
# Export Document Tags format:
with (output_dir / f"{doc_filename}.legacy.doctags.txt").open(
"w", encoding="utf-8"
) as fp:
fp.write(conv_res.render_as_doctags_v1())
fp.write(conv_res.render_as_doctags())
if USE_V2:
# Export Docling document format to JSON (experimental):
@@ -129,7 +129,7 @@ def main():
start_time = time.time()
conv_results = doc_converter.convert(input)
conv_results = doc_converter.convert_batch(input)
success_count, partial_success_count, failure_count = export_documents(
conv_results, output_dir=Path("./scratch")
)

View File

@@ -39,17 +39,17 @@ def export_documents(
# Export Text format:
with (output_dir / f"{doc_filename}.txt").open("w", encoding="utf-8") as fp:
fp.write(conv_res.render_as_text_v1())
fp.write(conv_res.render_as_text())
# Export Markdown format:
with (output_dir / f"{doc_filename}.md").open("w", encoding="utf-8") as fp:
fp.write(conv_res.render_as_markdown_v1())
fp.write(conv_res.render_as_markdown())
# Export Document Tags format:
with (output_dir / f"{doc_filename}.doctags").open(
"w", encoding="utf-8"
) as fp:
fp.write(conv_res.render_as_doctags_v1())
fp.write(conv_res.render_as_doctags())
else:
_log.info(f"Document {conv_res.input.file} failed to convert.")
@@ -157,7 +157,7 @@ def main():
start_time = time.time()
conv_results = doc_converter.convert(input)
conv_results = doc_converter.convert_batch(input)
success_count, failure_count = export_documents(
conv_results, output_dir=Path("./scratch")
)

View File

@@ -42,7 +42,7 @@ def main():
start_time = time.time()
conv_results = doc_converter.convert(input_files)
conv_results = doc_converter.convert_batch(input_files)
success_count = 0
failure_count = 0

View File

@@ -41,7 +41,7 @@ def main():
start_time = time.time()
converted_docs = doc_converter.convert(input_files)
converted_docs = doc_converter.convert_batch(input_files)
success_count = 0
failure_count = 0

View File

@@ -25,7 +25,7 @@ def main():
start_time = time.time()
conv_results = doc_converter.convert(input_files)
conv_results = doc_converter.convert_batch(input_files)
success_count = 0
failure_count = 0

View File

@@ -50,7 +50,7 @@ doc_converter = DocumentConverter( # all of the below is optional, has internal
},
)
conv_results = doc_converter.convert(input)
conv_results = doc_converter.convert_batch(input)
for res in conv_results:
out_path = Path("./scratch") / f"{res.input.file.name}.experimental.md"