mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-11 06:08:09 +00:00
feat: adding txt and doctags output (#68)
* feat: adding txt and doctags output Signed-off-by: Peter Staar <taa@zurich.ibm.com> * cleaned up the export Signed-off-by: Peter Staar <taa@zurich.ibm.com> * Fix datamodel usage for Figure Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * updated all the examples to deal with new rendering Signed-off-by: Peter Staar <taa@zurich.ibm.com> --------- Signed-off-by: Peter Staar <taa@zurich.ibm.com> Signed-off-by: Christoph Auer <cau@zurich.ibm.com> Co-authored-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
committed by
GitHub
parent
cd5b6293cc
commit
bdfdfbf092
@@ -163,8 +163,12 @@ def generate_multimodal_pages(
|
||||
content_md = doc.export_to_markdown(
|
||||
main_text_start=start_ix, main_text_stop=end_ix
|
||||
)
|
||||
# No page-tagging since we only do 1 page at the time
|
||||
content_dt = doc.export_to_document_tokens(
|
||||
main_text_start=start_ix, main_text_stop=end_ix, page_tagging=False
|
||||
)
|
||||
|
||||
return content_text, content_md, page_cells, page_segments, page
|
||||
return content_text, content_md, content_dt, page_cells, page_segments, page
|
||||
|
||||
for ix, orig_item in enumerate(doc.main_text):
|
||||
|
||||
|
||||
Reference in New Issue
Block a user