mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 20:14:47 +00:00
updated all the examples to deal with new rendering
Signed-off-by: Peter Staar <taa@zurich.ibm.com>
This commit is contained in:
parent
fd23432f6e
commit
4f89470ab1
@ -163,8 +163,12 @@ def generate_multimodal_pages(
|
|||||||
content_md = doc.export_to_markdown(
|
content_md = doc.export_to_markdown(
|
||||||
main_text_start=start_ix, main_text_stop=end_ix
|
main_text_start=start_ix, main_text_stop=end_ix
|
||||||
)
|
)
|
||||||
|
# No page-tagging since we only do 1 page at the time
|
||||||
|
content_dt = doc.export_to_document_tokens(
|
||||||
|
main_text_start=start_ix, main_text_stop=end_ix, page_tagging=False
|
||||||
|
)
|
||||||
|
|
||||||
return content_text, content_md, page_cells, page_segments, page
|
return content_text, content_md, content_dt, page_cells, page_segments, page
|
||||||
|
|
||||||
for ix, orig_item in enumerate(doc.main_text):
|
for ix, orig_item in enumerate(doc.main_text):
|
||||||
|
|
||||||
|
@ -31,9 +31,18 @@ def export_documents(
|
|||||||
with (output_dir / f"{doc_filename}.json").open("w") as fp:
|
with (output_dir / f"{doc_filename}.json").open("w") as fp:
|
||||||
fp.write(json.dumps(conv_res.render_as_dict()))
|
fp.write(json.dumps(conv_res.render_as_dict()))
|
||||||
|
|
||||||
|
# Export Text format:
|
||||||
|
with (output_dir / f"{doc_filename}.txt").open("w") as fp:
|
||||||
|
fp.write(conv_res.render_as_text())
|
||||||
|
|
||||||
# Export Markdown format:
|
# Export Markdown format:
|
||||||
with (output_dir / f"{doc_filename}.md").open("w") as fp:
|
with (output_dir / f"{doc_filename}.md").open("w") as fp:
|
||||||
fp.write(conv_res.render_as_markdown())
|
fp.write(conv_res.render_as_markdown())
|
||||||
|
|
||||||
|
# Export Document Tags format:
|
||||||
|
with (output_dir / f"{doc_filename}.doctags").open("w") as fp:
|
||||||
|
fp.write(conv_res.render_as_doctags())
|
||||||
|
|
||||||
else:
|
else:
|
||||||
_log.info(f"Document {conv_res.input.file} failed to convert.")
|
_log.info(f"Document {conv_res.input.file} failed to convert.")
|
||||||
failure_count += 1
|
failure_count += 1
|
||||||
|
@ -51,6 +51,7 @@ def main():
|
|||||||
for (
|
for (
|
||||||
content_text,
|
content_text,
|
||||||
content_md,
|
content_md,
|
||||||
|
content_dt,
|
||||||
page_cells,
|
page_cells,
|
||||||
page_segments,
|
page_segments,
|
||||||
page,
|
page,
|
||||||
@ -71,6 +72,7 @@ def main():
|
|||||||
"cells": page_cells,
|
"cells": page_cells,
|
||||||
"contents": content_text,
|
"contents": content_text,
|
||||||
"contents_md": content_md,
|
"contents_md": content_md,
|
||||||
|
"contents_dt": content_dt,
|
||||||
"segments": page_segments,
|
"segments": page_segments,
|
||||||
"extra": {
|
"extra": {
|
||||||
"page_num": page.page_no + 1,
|
"page_num": page.page_no + 1,
|
||||||
|
Loading…
Reference in New Issue
Block a user