From 51c477cfc391cb9b43f48be5ce231824b45a1289 Mon Sep 17 00:00:00 2001 From: Christoph Auer Date: Wed, 23 Oct 2024 14:46:05 +0200 Subject: [PATCH] Add test case for InputDocument Signed-off-by: Christoph Auer --- tests/test_input_doc.py | 58 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 tests/test_input_doc.py diff --git a/tests/test_input_doc.py b/tests/test_input_doc.py new file mode 100644 index 00000000..3f7dd0c1 --- /dev/null +++ b/tests/test_input_doc.py @@ -0,0 +1,58 @@ +from io import BytesIO +from pathlib import Path + +from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend +from docling.datamodel.base_models import DocumentStream, InputFormat +from docling.datamodel.document import InputDocument + + +def test_in_doc_from_valid_path(): + + test_doc_path = Path("./tests/data/2206.01062.pdf") + doc = _make_input_doc(test_doc_path) + assert doc.valid == True + + +def test_in_doc_from_invalid_path(): + test_doc_path = Path("./tests/does/not/exist.pdf") + + doc = _make_input_doc(test_doc_path) + + assert doc.valid == False + + +def test_in_doc_from_valid_buf(): + + buf = BytesIO(Path("./tests/data/2206.01062.pdf").open("rb").read()) + stream = DocumentStream(name="my_doc.pdf", stream=buf) + + doc = _make_input_doc_from_stream(stream) + assert doc.valid == True + + +def test_in_doc_from_invalid_buf(): + + buf = BytesIO(b"") + stream = DocumentStream(name="my_doc.pdf", stream=buf) + + doc = _make_input_doc_from_stream(stream) + assert doc.valid == False + + +def _make_input_doc(path): + in_doc = InputDocument( + path_or_stream=path, + format=InputFormat.PDF, + backend=PyPdfiumDocumentBackend, + ) + return in_doc + + +def _make_input_doc_from_stream(doc_stream): + in_doc = InputDocument( + path_or_stream=doc_stream.stream, + format=InputFormat.PDF, + filename=doc_stream.name, + backend=PyPdfiumDocumentBackend, + ) + return in_doc