mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-31 14:34:40 +00:00
Updated example
Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>
This commit is contained in:
parent
40b3f597f3
commit
e13fa5ade8
@ -68,18 +68,13 @@ for source in sources:
|
|||||||
|
|
||||||
res = converter.convert(source)
|
res = converter.convert(source)
|
||||||
|
|
||||||
print("------------------------------------------------")
|
|
||||||
print("MD:")
|
|
||||||
print("------------------------------------------------")
|
|
||||||
print("")
|
print("")
|
||||||
print(res.document.export_to_markdown())
|
print(res.document.export_to_markdown())
|
||||||
|
|
||||||
doctags = ""
|
|
||||||
for page in res.pages:
|
for page in res.pages:
|
||||||
print("")
|
print("")
|
||||||
print("Predicted page in DOCTAGS:")
|
print("Predicted page in DOCTAGS:")
|
||||||
print(page.predictions.vlm_response.text)
|
print(page.predictions.vlm_response.text)
|
||||||
doctags += page.predictions.vlm_response.text
|
|
||||||
|
|
||||||
res.document.save_as_html(
|
res.document.save_as_html(
|
||||||
filename=Path("{}/{}.html".format(out_path, res.input.file.stem)),
|
filename=Path("{}/{}.html".format(out_path, res.input.file.stem)),
|
||||||
@ -90,14 +85,17 @@ for source in sources:
|
|||||||
with (out_path / f"{res.input.file.stem}.json").open("w") as fp:
|
with (out_path / f"{res.input.file.stem}.json").open("w") as fp:
|
||||||
fp.write(json.dumps(res.document.export_to_dict()))
|
fp.write(json.dumps(res.document.export_to_dict()))
|
||||||
|
|
||||||
with (out_path / f"{res.input.file.stem}.md").open("w") as fp:
|
res.document.save_as_json(
|
||||||
fp.write(res.document.export_to_markdown())
|
out_path / f"{res.input.file.stem}.md",
|
||||||
|
image_mode=ImageRefMode.PLACEHOLDER,
|
||||||
|
)
|
||||||
|
|
||||||
with (out_path / f"{res.input.file.stem}.doctag").open("w") as fp:
|
res.document.save_as_markdown(
|
||||||
fp.write(doctags)
|
out_path / f"{res.input.file.stem}.md",
|
||||||
|
image_mode=ImageRefMode.PLACEHOLDER,
|
||||||
|
)
|
||||||
|
|
||||||
pg_num = res.document.num_pages()
|
pg_num = res.document.num_pages()
|
||||||
|
|
||||||
print("")
|
print("")
|
||||||
inference_time = time.time() - start_time
|
inference_time = time.time() - start_time
|
||||||
print(
|
print(
|
||||||
|
Loading…
Reference in New Issue
Block a user