Files
docling/tests/test_conversion_result_json.py
Peter W. J. Staar b559813b9b feat: add save and load for conversion result (#2648)
* feat: added save_as_json and load_from_json to ConversionResult

Signed-off-by: Peter Staar <taa@zurich.ibm.com>

* added a test

Signed-off-by: Peter Staar <taa@zurich.ibm.com>

* fixed the save and load for ConversionResult

Signed-off-by: Peter Staar <taa@zurich.ibm.com>

* reformatted the code

Signed-off-by: Peter Staar <taa@zurich.ibm.com>

* fixed the signature

Signed-off-by: Peter Staar <taa@zurich.ibm.com>

* refactored load/save into ConversionAssets

Signed-off-by: Peter Staar <taa@zurich.ibm.com>

* added the DoclingVersion class

Signed-off-by: Peter Staar <taa@zurich.ibm.com>

* renamed time_stamp to timestamp

Signed-off-by: Peter Staar <taa@zurich.ibm.com>

---------

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
2025-11-20 12:45:26 +01:00

45 lines
1.5 KiB
Python

from io import BytesIO
from pathlib import Path
import pytest
from docling.backend.pypdfium2_backend import (
PyPdfiumDocumentBackend,
PyPdfiumPageBackend,
)
from docling.datamodel.base_models import ConversionStatus, InputFormat
from docling.datamodel.document import ConversionAssets
from docling.datamodel.pipeline_options import PdfPipelineOptions
from docling.document_converter import DocumentConverter, PdfFormatOption
def test_conversion_result_json_roundtrip_string():
pdf_doc = Path("./tests/data/pdf/redp5110_sampled.pdf")
pipeline_options = PdfPipelineOptions()
pipeline_options.do_ocr = False
pipeline_options.images_scale = 1.0
pipeline_options.generate_page_images = False
pipeline_options.do_table_structure = False
pipeline_options.table_structure_options.do_cell_matching = True
pipeline_options.generate_parsed_pages = True
doc_converter = DocumentConverter(
format_options={
InputFormat.PDF: PdfFormatOption(
pipeline_options=pipeline_options, backend=PyPdfiumDocumentBackend
)
}
)
conv_res = doc_converter.convert(pdf_doc)
fpath: Path = Path("./test-conversion.zip")
conv_res.save(filename=fpath) # returns string when no filename is given
# assert isinstance(json_str, str) and len(json_str) > 0
loaded = ConversionAssets.load(filename=fpath)
assert loaded.status == conv_res.status
assert loaded.document.name == conv_res.document.name