diff --git a/tests/test_backend_docling_parse_v4.py b/tests/test_backend_docling_parse_v4.py index 957261d3..35c4eab7 100644 --- a/tests/test_backend_docling_parse_v4.py +++ b/tests/test_backend_docling_parse_v4.py @@ -12,7 +12,7 @@ from docling.datamodel.document import InputDocument @pytest.fixture def test_doc_path(): - return Path("../../tests/data/pdf/2206.01062.pdf") + return Path("./tests/data/pdf/2206.01062.pdf") def _get_backend(pdf_doc): @@ -27,7 +27,7 @@ def _get_backend(pdf_doc): def test_text_cell_counts(): - pdf_doc = Path("../../tests/data/pdf/redp5110_sampled.pdf") + pdf_doc = Path("./tests/data/pdf/redp5110_sampled.pdf") doc_backend = _get_backend(pdf_doc) diff --git a/tests/test_backend_html.py b/tests/test_backend_html.py index a10a3400..18254a78 100644 --- a/tests/test_backend_html.py +++ b/tests/test_backend_html.py @@ -106,7 +106,7 @@ def test_ordered_lists(): def get_html_paths(): # Define the directory you want to search - directory = Path("../../tests/data/html/") + directory = Path("./tests/data/html/") # List all HTML files in the directory and its subdirectories html_files = sorted(directory.rglob("*.html")) diff --git a/tests/test_backend_msexcel.py b/tests/test_backend_msexcel.py index 807904c7..65f636e0 100644 --- a/tests/test_backend_msexcel.py +++ b/tests/test_backend_msexcel.py @@ -18,7 +18,7 @@ GENERATE = GEN_TEST_DATA def get_xlsx_paths(): # Define the directory you want to search - directory = Path("../../tests/data/xlsx/") + directory = Path("./tests/data/xlsx/") # List all PDF files in the directory and its subdirectories pdf_files = sorted(directory.rglob("*.xlsx")) diff --git a/tests/test_backend_msword.py b/tests/test_backend_msword.py index e9e5379b..c50e0718 100644 --- a/tests/test_backend_msword.py +++ b/tests/test_backend_msword.py @@ -43,7 +43,7 @@ def test_heading_levels(): def get_docx_paths(): # Define the directory you want to search - directory = Path("../../tests/data/docx/") + directory = Path("./tests/data/docx/") # List all PDF files in the directory and its subdirectories pdf_files = sorted(directory.rglob("*.docx")) diff --git a/tests/test_backend_patent_uspto.py b/tests/test_backend_patent_uspto.py index 3bc0a846..ace6d3a2 100644 --- a/tests/test_backend_patent_uspto.py +++ b/tests/test_backend_patent_uspto.py @@ -17,8 +17,8 @@ from .test_data_gen_flag import GEN_TEST_DATA from .verify_utils import verify_document GENERATE: bool = GEN_TEST_DATA -DATA_PATH: Path = Path("../../tests/data/uspto/") -GT_PATH: Path = Path("../../tests/data/groundtruth/docling_v2/") +DATA_PATH: Path = Path("./tests/data/uspto/") +GT_PATH: Path = Path("./tests/data/groundtruth/docling_v2/") def _generate_groundtruth(doc: DoclingDocument, file_stem: str) -> None: diff --git a/tests/test_backend_pdfium.py b/tests/test_backend_pdfium.py index 7505f171..317cdeed 100644 --- a/tests/test_backend_pdfium.py +++ b/tests/test_backend_pdfium.py @@ -13,7 +13,7 @@ from docling.datamodel.document import InputDocument @pytest.fixture def test_doc_path(): - return Path("../../tests/data/pdf/2206.01062.pdf") + return Path("./tests/data/pdf/2206.01062.pdf") def _get_backend(pdf_doc): @@ -28,7 +28,7 @@ def _get_backend(pdf_doc): def test_text_cell_counts(): - pdf_doc = Path("../../tests/data/pdf/redp5110_sampled.pdf") + pdf_doc = Path("./tests/data/pdf/redp5110_sampled.pdf") doc_backend = _get_backend(pdf_doc) diff --git a/tests/test_backend_pptx.py b/tests/test_backend_pptx.py index c6f7d74a..4f73c870 100644 --- a/tests/test_backend_pptx.py +++ b/tests/test_backend_pptx.py @@ -12,7 +12,7 @@ GENERATE = GEN_TEST_DATA def get_pptx_paths(): # Define the directory you want to search - directory = Path("../../tests/data/pptx/") + directory = Path("./tests/data/pptx/") # List all PPTX files in the directory and its subdirectories pptx_files = sorted(directory.rglob("*.pptx")) diff --git a/tests/test_cli.py b/tests/test_cli.py index daf5c4e5..4364df8b 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -18,7 +18,7 @@ def test_cli_version(): def test_cli_convert(tmp_path): - source = "../../tests/data/pdf/2305.03393v1-pg9.pdf" + source = "./tests/data/pdf/2305.03393v1-pg9.pdf" output = tmp_path / "out" output.mkdir() result = runner.invoke(app, [source, "--output", str(output)]) diff --git a/tests/test_e2e_conversion.py b/tests/test_e2e_conversion.py index 70c6542d..5dc2e89a 100644 --- a/tests/test_e2e_conversion.py +++ b/tests/test_e2e_conversion.py @@ -15,7 +15,7 @@ GENERATE_V2 = GEN_TEST_DATA def get_pdf_paths(): # Define the directory you want to search - directory = Path("../../tests/data/pdf/") + directory = Path("./tests/data/pdf/") # List all PDF files in the directory and its subdirectories pdf_files = sorted(directory.rglob("*.pdf")) diff --git a/tests/test_input_doc.py b/tests/test_input_doc.py index 6a575a48..94a68873 100644 --- a/tests/test_input_doc.py +++ b/tests/test_input_doc.py @@ -12,7 +12,7 @@ from docling.document_converter import PdfFormatOption def test_in_doc_from_valid_path(): - test_doc_path = Path("../../tests/data/pdf/2206.01062.pdf") + test_doc_path = Path("./tests/data/pdf/2206.01062.pdf") doc = _make_input_doc(test_doc_path) assert doc.valid is True @@ -26,7 +26,7 @@ def test_in_doc_from_invalid_path(): def test_in_doc_from_valid_buf(): - buf = BytesIO(Path("../../tests/data/pdf/2206.01062.pdf").open("rb").read()) + buf = BytesIO(Path("./tests/data/pdf/2206.01062.pdf").open("rb").read()) stream = DocumentStream(name="my_doc.pdf", stream=buf) doc = _make_input_doc_from_stream(stream) @@ -72,7 +72,7 @@ def test_image_in_pdf_backend(): def test_in_doc_with_page_range(): - test_doc_path = Path("../../tests/data/pdf/2206.01062.pdf") + test_doc_path = Path("./tests/data/pdf/2206.01062.pdf") limits = DocumentLimits() limits.page_range = (1, 10) @@ -112,72 +112,72 @@ def test_guess_format(tmp_path): temp_dir.mkdir() # Valid PDF - buf = BytesIO(Path("../../tests/data/pdf/2206.01062.pdf").open("rb").read()) + buf = BytesIO(Path("./tests/data/pdf/2206.01062.pdf").open("rb").read()) stream = DocumentStream(name="my_doc.pdf", stream=buf) assert dci._guess_format(stream) == InputFormat.PDF - doc_path = Path("../../tests/data/pdf/2206.01062.pdf") + doc_path = Path("./tests/data/pdf/2206.01062.pdf") assert dci._guess_format(doc_path) == InputFormat.PDF # Valid MS Office - buf = BytesIO(Path("../../tests/data/docx/lorem_ipsum.docx").open("rb").read()) + buf = BytesIO(Path("./tests/data/docx/lorem_ipsum.docx").open("rb").read()) stream = DocumentStream(name="lorem_ipsum.docx", stream=buf) assert dci._guess_format(stream) == InputFormat.DOCX - doc_path = Path("../../tests/data/docx/lorem_ipsum.docx") + doc_path = Path("./tests/data/docx/lorem_ipsum.docx") assert dci._guess_format(doc_path) == InputFormat.DOCX # Valid HTML - buf = BytesIO(Path("../../tests/data/html/wiki_duck.html").open("rb").read()) + buf = BytesIO(Path("./tests/data/html/wiki_duck.html").open("rb").read()) stream = DocumentStream(name="wiki_duck.html", stream=buf) assert dci._guess_format(stream) == InputFormat.HTML - doc_path = Path("../../tests/data/html/wiki_duck.html") + doc_path = Path("./tests/data/html/wiki_duck.html") assert dci._guess_format(doc_path) == InputFormat.HTML # Valid MD - buf = BytesIO(Path("../../tests/data/md/wiki.md").open("rb").read()) + buf = BytesIO(Path("./tests/data/md/wiki.md").open("rb").read()) stream = DocumentStream(name="wiki.md", stream=buf) assert dci._guess_format(stream) == InputFormat.MD - doc_path = Path("../../tests/data/md/wiki.md") + doc_path = Path("./tests/data/md/wiki.md") assert dci._guess_format(doc_path) == InputFormat.MD # Valid CSV - buf = BytesIO(Path("../../tests/data/csv/csv-comma.csv").open("rb").read()) + buf = BytesIO(Path("./tests/data/csv/csv-comma.csv").open("rb").read()) stream = DocumentStream(name="csv-comma.csv", stream=buf) assert dci._guess_format(stream) == InputFormat.CSV stream = DocumentStream(name="test-comma", stream=buf) assert dci._guess_format(stream) == InputFormat.CSV - doc_path = Path("../../tests/data/csv/csv-comma.csv") + doc_path = Path("./tests/data/csv/csv-comma.csv") assert dci._guess_format(doc_path) == InputFormat.CSV # Valid XML USPTO patent - buf = BytesIO(Path("../../tests/data/uspto/ipa20110039701.xml").open("rb").read()) + buf = BytesIO(Path("./tests/data/uspto/ipa20110039701.xml").open("rb").read()) stream = DocumentStream(name="ipa20110039701.xml", stream=buf) assert dci._guess_format(stream) == InputFormat.XML_USPTO - doc_path = Path("../../tests/data/uspto/ipa20110039701.xml") + doc_path = Path("./tests/data/uspto/ipa20110039701.xml") assert dci._guess_format(doc_path) == InputFormat.XML_USPTO - buf = BytesIO(Path("../../tests/data/uspto/pftaps057006474.txt").open("rb").read()) + buf = BytesIO(Path("./tests/data/uspto/pftaps057006474.txt").open("rb").read()) stream = DocumentStream(name="pftaps057006474.txt", stream=buf) assert dci._guess_format(stream) == InputFormat.XML_USPTO - doc_path = Path("../../tests/data/uspto/pftaps057006474.txt") + doc_path = Path("./tests/data/uspto/pftaps057006474.txt") assert dci._guess_format(doc_path) == InputFormat.XML_USPTO # Valid XML JATS - buf = BytesIO(Path("../../tests/data/jats/elife-56337.xml").open("rb").read()) + buf = BytesIO(Path("./tests/data/jats/elife-56337.xml").open("rb").read()) stream = DocumentStream(name="elife-56337.xml", stream=buf) assert dci._guess_format(stream) == InputFormat.XML_JATS - doc_path = Path("../../tests/data/jats/elife-56337.xml") + doc_path = Path("./tests/data/jats/elife-56337.xml") assert dci._guess_format(doc_path) == InputFormat.XML_JATS - buf = BytesIO(Path("../../tests/data/jats/elife-56337.nxml").open("rb").read()) + buf = BytesIO(Path("./tests/data/jats/elife-56337.nxml").open("rb").read()) stream = DocumentStream(name="elife-56337.nxml", stream=buf) assert dci._guess_format(stream) == InputFormat.XML_JATS - doc_path = Path("../../tests/data/jats/elife-56337.nxml") + doc_path = Path("./tests/data/jats/elife-56337.nxml") assert dci._guess_format(doc_path) == InputFormat.XML_JATS - buf = BytesIO(Path("../../tests/data/jats/elife-56337.txt").open("rb").read()) + buf = BytesIO(Path("./tests/data/jats/elife-56337.txt").open("rb").read()) stream = DocumentStream(name="elife-56337.txt", stream=buf) assert dci._guess_format(stream) == InputFormat.XML_JATS - doc_path = Path("../../tests/data/jats/elife-56337.txt") + doc_path = Path("./tests/data/jats/elife-56337.txt") assert dci._guess_format(doc_path) == InputFormat.XML_JATS # Valid XML, non-supported flavor diff --git a/tests/test_interfaces.py b/tests/test_interfaces.py index 4dc89691..8d68f299 100644 --- a/tests/test_interfaces.py +++ b/tests/test_interfaces.py @@ -15,7 +15,7 @@ GENERATE = GEN_TEST_DATA def get_pdf_path(): - pdf_path = Path("../../tests/data/pdf/2305.03393v1-pg9.pdf") + pdf_path = Path("./tests/data/pdf/2305.03393v1-pg9.pdf") return pdf_path diff --git a/tests/test_invalid_input.py b/tests/test_invalid_input.py index 6d8cebf3..3cc7a630 100644 --- a/tests/test_invalid_input.py +++ b/tests/test_invalid_input.py @@ -8,7 +8,7 @@ from docling.document_converter import ConversionError, DocumentConverter def get_pdf_path(): - pdf_path = Path("../../tests/data/pdf/2305.03393v1-pg9.pdf") + pdf_path = Path("./tests/data/pdf/2305.03393v1-pg9.pdf") return pdf_path diff --git a/tests/test_options.py b/tests/test_options.py index 7441e401..7b0b26d3 100644 --- a/tests/test_options.py +++ b/tests/test_options.py @@ -20,7 +20,7 @@ from docling.document_converter import DocumentConverter, PdfFormatOption @pytest.fixture def test_doc_path(): - return Path("../../tests/data/pdf/2206.01062.pdf") + return Path("./tests/data/pdf/2206.01062.pdf") def get_converters_with_table_options(): @@ -159,7 +159,7 @@ def test_parser_backends(test_doc_path): } ) - test_doc_path = Path("../../tests/data/pdf/code_and_formula.pdf") + test_doc_path = Path("./tests/data/pdf/code_and_formula.pdf") doc_result: ConversionResult = converter.convert(test_doc_path) assert doc_result.status == ConversionStatus.SUCCESS