Add code to expose text direction of cell

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer
2025-02-05 12:48:12 +01:00
parent d7c9874a88
commit 7bdd6868ed
6 changed files with 51 additions and 10 deletions

View File

@@ -49,6 +49,8 @@ def export_documents(
with (output_dir / f"{doc_filename}.md").open("w") as fp:
fp.write(conv_res.document.export_to_markdown())
conv_res.document.save_as_html(output_dir / f"{doc_filename}.html")
# Export Docling document format to text:
with (output_dir / f"{doc_filename}.txt").open("w") as fp:
fp.write(conv_res.document.export_to_markdown(strict_text=True))
@@ -103,10 +105,13 @@ def main():
logging.basicConfig(level=logging.INFO)
input_doc_paths = [
Path("./tests/data/2206.01062.pdf"),
Path("./tests/data/2203.01017v2.pdf"),
Path("./tests/data/2305.03393v1.pdf"),
Path("./tests/data/redp5110_sampled.pdf"),
Path("./tests/data/pdf/right_to_left_01.pdf"),
Path("./tests/data/pdf/right_to_left_02.pdf"),
Path("./tests/data/pdf/right_to_left_03.pdf"),
Path("./tests/data/pdf/2206.01062.pdf"),
Path("./tests/data/pdf/2203.01017v2.pdf"),
Path("./tests/data/pdf/2305.03393v1.pdf"),
Path("./tests/data/pdf/redp5110_sampled.pdf"),
]
# buf = BytesIO(Path("./test/data/2206.01062.pdf").open("rb").read())