mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-11 14:18:30 +00:00
More renaming, design enrichment interface
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
@@ -39,11 +39,11 @@ def save_output(pdf_path: Path, doc_result: ConversionResult, engine: str):
|
||||
|
||||
doctags_fn = os.path.join(parent, f"{pdf_path.stem}{eng}.doctags.txt")
|
||||
with open(doctags_fn, "w") as fd:
|
||||
fd.write(doc_result.render_as_doctags_v1())
|
||||
fd.write(doc_result.render_as_doctags())
|
||||
|
||||
md_fn = os.path.join(parent, f"{pdf_path.stem}{eng}.md")
|
||||
with open(md_fn, "w") as fd:
|
||||
fd.write(doc_result.render_as_markdown_v1())
|
||||
fd.write(doc_result.render_as_markdown())
|
||||
|
||||
|
||||
def get_pdf_paths():
|
||||
|
||||
@@ -54,7 +54,7 @@ def test_batch_path(converter: DocumentConverter):
|
||||
|
||||
conv_input = DocumentConversionInput.from_paths([pdf_path])
|
||||
|
||||
results = converter.convert(conv_input)
|
||||
results = converter.convert_batch(conv_input)
|
||||
for doc_result in results:
|
||||
verify_conversion_result_v1(input_path=pdf_path, doc_result=doc_result)
|
||||
verify_conversion_result_v2(input_path=pdf_path, doc_result=doc_result)
|
||||
@@ -69,7 +69,7 @@ def test_batch_bytes(converter: DocumentConverter):
|
||||
docs = [DocumentStream(name=pdf_path.name, stream=buf)]
|
||||
conv_input = DocumentConversionInput.from_streams(docs)
|
||||
|
||||
results = converter.convert(conv_input)
|
||||
results = converter.convert_batch(conv_input)
|
||||
for doc_result in results:
|
||||
verify_conversion_result_v1(input_path=pdf_path, doc_result=doc_result)
|
||||
verify_conversion_result_v2(input_path=pdf_path, doc_result=doc_result)
|
||||
|
||||
@@ -198,8 +198,8 @@ def verify_conversion_result_v1(
|
||||
|
||||
doc_pred_pages: List[Page] = doc_result.pages
|
||||
doc_pred: DsDocument = doc_result.legacy_output
|
||||
doc_pred_md = doc_result.render_as_markdown_v1()
|
||||
doc_pred_dt = doc_result.render_as_doctags_v1()
|
||||
doc_pred_md = doc_result.render_as_markdown()
|
||||
doc_pred_dt = doc_result.render_as_doctags()
|
||||
|
||||
engine_suffix = "" if ocr_engine is None else f".{ocr_engine}"
|
||||
gt_subpath = input_path.parent / "groundtruth" / "docling_v1" / input_path.name
|
||||
|
||||
Reference in New Issue
Block a user