diff --git a/docs/examples/batch_convert.py b/docs/examples/batch_convert.py index f6ad92bd..cd5e4b3b 100644 --- a/docs/examples/batch_convert.py +++ b/docs/examples/batch_convert.py @@ -103,10 +103,10 @@ def main(): logging.basicConfig(level=logging.INFO) input_doc_paths = [ - Path("./tests/data/2206.01062.pdf"), - Path("./tests/data/2203.01017v2.pdf"), - Path("./tests/data/2305.03393v1.pdf"), - Path("./tests/data/redp5110_sampled.pdf"), + Path("./tests/data/pdf/2206.01062.pdf"), + Path("./tests/data/pdf/2203.01017v2.pdf"), + Path("./tests/data/pdf/2305.03393v1.pdf"), + Path("./tests/data/pdf/redp5110_sampled.pdf"), ] # buf = BytesIO(Path("./test/data/2206.01062.pdf").open("rb").read()) diff --git a/docs/examples/translate.py b/docs/examples/translate.py index a36c6f71..fa39b6d3 100644 --- a/docs/examples/translate.py +++ b/docs/examples/translate.py @@ -32,7 +32,7 @@ def translate(text: str, src: str = "en", dest: str = "de"): def main(): logging.basicConfig(level=logging.INFO) - input_doc_path = Path("./tests/data/2206.01062.pdf") + input_doc_path = Path("./tests/data/pdf/2206.01062.pdf") output_dir = Path("scratch") # Important: For operating with page images, we must keep them, otherwise the DocumentConverter diff --git a/docs/v2.md b/docs/v2.md index bf1de1c8..df1c8233 100644 --- a/docs/v2.md +++ b/docs/v2.md @@ -117,12 +117,12 @@ conv_result: ConversionResult = doc_converter.convert("https://arxiv.org/pdf/240 ## Convert several files at once: input_files = [ - "tests/data/wiki_duck.html", - "tests/data/word_sample.docx", - "tests/data/lorem_ipsum.docx", - "tests/data/powerpoint_sample.pptx", + "tests/data/html/wiki_duck.html", + "tests/data/docx/word_sample.docx", + "tests/data/docx/lorem_ipsum.docx", + "tests/data/pptx/powerpoint_sample.pptx", "tests/data/2305.03393v1-pg9-img.png", - "tests/data/2206.01062.pdf", + "tests/data/pdf/2206.01062.pdf", ] # Directly pass list of files or streams to `convert_all`