mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 12:48:28 +00:00
feat: add save and load for conversion result (#2648)
* feat: added save_as_json and load_from_json to ConversionResult Signed-off-by: Peter Staar <taa@zurich.ibm.com> * added a test Signed-off-by: Peter Staar <taa@zurich.ibm.com> * fixed the save and load for ConversionResult Signed-off-by: Peter Staar <taa@zurich.ibm.com> * reformatted the code Signed-off-by: Peter Staar <taa@zurich.ibm.com> * fixed the signature Signed-off-by: Peter Staar <taa@zurich.ibm.com> * refactored load/save into ConversionAssets Signed-off-by: Peter Staar <taa@zurich.ibm.com> * added the DoclingVersion class Signed-off-by: Peter Staar <taa@zurich.ibm.com> * renamed time_stamp to timestamp Signed-off-by: Peter Staar <taa@zurich.ibm.com> --------- Signed-off-by: Peter Staar <taa@zurich.ibm.com>
This commit is contained in:
committed by
GitHub
parent
6fb9a5f98a
commit
b559813b9b
44
tests/test_conversion_result_json.py
Normal file
44
tests/test_conversion_result_json.py
Normal file
@@ -0,0 +1,44 @@
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from docling.backend.pypdfium2_backend import (
|
||||
PyPdfiumDocumentBackend,
|
||||
PyPdfiumPageBackend,
|
||||
)
|
||||
from docling.datamodel.base_models import ConversionStatus, InputFormat
|
||||
from docling.datamodel.document import ConversionAssets
|
||||
from docling.datamodel.pipeline_options import PdfPipelineOptions
|
||||
from docling.document_converter import DocumentConverter, PdfFormatOption
|
||||
|
||||
|
||||
def test_conversion_result_json_roundtrip_string():
|
||||
pdf_doc = Path("./tests/data/pdf/redp5110_sampled.pdf")
|
||||
|
||||
pipeline_options = PdfPipelineOptions()
|
||||
pipeline_options.do_ocr = False
|
||||
pipeline_options.images_scale = 1.0
|
||||
pipeline_options.generate_page_images = False
|
||||
pipeline_options.do_table_structure = False
|
||||
pipeline_options.table_structure_options.do_cell_matching = True
|
||||
pipeline_options.generate_parsed_pages = True
|
||||
|
||||
doc_converter = DocumentConverter(
|
||||
format_options={
|
||||
InputFormat.PDF: PdfFormatOption(
|
||||
pipeline_options=pipeline_options, backend=PyPdfiumDocumentBackend
|
||||
)
|
||||
}
|
||||
)
|
||||
conv_res = doc_converter.convert(pdf_doc)
|
||||
|
||||
fpath: Path = Path("./test-conversion.zip")
|
||||
|
||||
conv_res.save(filename=fpath) # returns string when no filename is given
|
||||
# assert isinstance(json_str, str) and len(json_str) > 0
|
||||
|
||||
loaded = ConversionAssets.load(filename=fpath)
|
||||
|
||||
assert loaded.status == conv_res.status
|
||||
assert loaded.document.name == conv_res.document.name
|
||||
Reference in New Issue
Block a user