feat: adding txt and doctags output (#68)

* feat: adding txt and doctags output

Signed-off-by: Peter Staar <taa@zurich.ibm.com>

* cleaned up the export

Signed-off-by: Peter Staar <taa@zurich.ibm.com>

* Fix datamodel usage for Figure

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* updated all the examples to deal with new rendering

Signed-off-by: Peter Staar <taa@zurich.ibm.com>

---------

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
Co-authored-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Peter W. J. Staar
2024-09-10 17:30:52 +02:00
committed by GitHub
parent cd5b6293cc
commit bdfdfbf092
7 changed files with 784 additions and 1173 deletions

View File

@@ -30,9 +30,18 @@ def export_documents(
with (output_dir / f"{doc_filename}.json").open("w") as fp:
fp.write(json.dumps(conv_res.render_as_dict()))
# Export Text format:
with (output_dir / f"{doc_filename}.txt").open("w") as fp:
fp.write(conv_res.render_as_text())
# Export Markdown format:
with (output_dir / f"{doc_filename}.md").open("w") as fp:
fp.write(conv_res.render_as_markdown())
# Export Document Tags format:
with (output_dir / f"{doc_filename}.doctags").open("w") as fp:
fp.write(conv_res.render_as_doctags())
elif conv_res.status == ConversionStatus.PARTIAL_SUCCESS:
_log.info(
f"Document {conv_res.input.file} was partially converted with the following errors:"

View File

@@ -31,9 +31,18 @@ def export_documents(
with (output_dir / f"{doc_filename}.json").open("w") as fp:
fp.write(json.dumps(conv_res.render_as_dict()))
# Export Text format:
with (output_dir / f"{doc_filename}.txt").open("w") as fp:
fp.write(conv_res.render_as_text())
# Export Markdown format:
with (output_dir / f"{doc_filename}.md").open("w") as fp:
fp.write(conv_res.render_as_markdown())
# Export Document Tags format:
with (output_dir / f"{doc_filename}.doctags").open("w") as fp:
fp.write(conv_res.render_as_doctags())
else:
_log.info(f"Document {conv_res.input.file} failed to convert.")
failure_count += 1

View File

@@ -51,6 +51,7 @@ def main():
for (
content_text,
content_md,
content_dt,
page_cells,
page_segments,
page,
@@ -71,6 +72,7 @@ def main():
"cells": page_cells,
"contents": content_text,
"contents_md": content_md,
"contents_dt": content_dt,
"segments": page_segments,
"extra": {
"page_num": page.page_no + 1,