mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-10 13:48:13 +00:00
feat(cli): add option for html with split-page mode (#1355)
* updated the cli to output html in split-page mode Signed-off-by: Peter Staar <taa@zurich.ibm.com> * add pin for new docling-core with html split argument Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * relock with fixed html export in docling-core Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * update test results Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * update more tests Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * update example Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * update lock with docling-core fixes Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * update test results Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add again chunking extras Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> --------- Signed-off-by: Peter Staar <taa@zurich.ibm.com> Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Co-authored-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
committed by
GitHub
parent
0de70e7991
commit
c0ba88edf1
@@ -154,6 +154,7 @@ def export_documents(
|
||||
output_dir: Path,
|
||||
export_json: bool,
|
||||
export_html: bool,
|
||||
export_html_split_page: bool,
|
||||
export_md: bool,
|
||||
export_txt: bool,
|
||||
export_doctags: bool,
|
||||
@@ -181,7 +182,15 @@ def export_documents(
|
||||
fname = output_dir / f"{doc_filename}.html"
|
||||
_log.info(f"writing HTML output to {fname}")
|
||||
conv_res.document.save_as_html(
|
||||
filename=fname, image_mode=image_export_mode
|
||||
filename=fname, image_mode=image_export_mode, split_page_view=False
|
||||
)
|
||||
|
||||
# Export HTML format:
|
||||
if export_html_split_page:
|
||||
fname = output_dir / f"{doc_filename}.html"
|
||||
_log.info(f"writing HTML output to {fname}")
|
||||
conv_res.document.save_as_html(
|
||||
filename=fname, image_mode=image_export_mode, split_page_view=True
|
||||
)
|
||||
|
||||
# Export Text format:
|
||||
@@ -472,6 +481,7 @@ def convert(
|
||||
|
||||
export_json = OutputFormat.JSON in to_formats
|
||||
export_html = OutputFormat.HTML in to_formats
|
||||
export_html_split_page = OutputFormat.HTML_SPLIT_PAGE in to_formats
|
||||
export_md = OutputFormat.MARKDOWN in to_formats
|
||||
export_txt = OutputFormat.TEXT in to_formats
|
||||
export_doctags = OutputFormat.DOCTAGS in to_formats
|
||||
@@ -585,6 +595,7 @@ def convert(
|
||||
output_dir=output,
|
||||
export_json=export_json,
|
||||
export_html=export_html,
|
||||
export_html_split_page=export_html_split_page,
|
||||
export_md=export_md,
|
||||
export_txt=export_txt,
|
||||
export_doctags=export_doctags,
|
||||
|
||||
@@ -50,6 +50,7 @@ class OutputFormat(str, Enum):
|
||||
MARKDOWN = "md"
|
||||
JSON = "json"
|
||||
HTML = "html"
|
||||
HTML_SPLIT_PAGE = "html_split_page"
|
||||
TEXT = "text"
|
||||
DOCTAGS = "doctags"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user