From 23e82a5f49350186e2de85fe9cf678a3c7b9f03f Mon Sep 17 00:00:00 2001 From: Michele Dolfi Date: Thu, 6 Feb 2025 16:12:52 +0100 Subject: [PATCH] fix example filepaths Signed-off-by: Michele Dolfi --- docs/examples/custom_convert.py | 2 +- docs/examples/develop_formula_understanding.py | 2 +- docs/examples/develop_picture_enrichment.py | 2 +- docs/examples/export_figures.py | 2 +- docs/examples/export_multimodal.py | 2 +- docs/examples/export_tables.py | 2 +- docs/examples/full_page_ocr.py | 2 +- docs/examples/inspect_picture_content.py | 2 +- docs/examples/run_with_accelerator.py | 2 +- docs/examples/run_with_formats.py | 2 +- docs/examples/tesseract_lang_detection.py | 2 +- 11 files changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/examples/custom_convert.py b/docs/examples/custom_convert.py index 49ab739a..ddc19217 100644 --- a/docs/examples/custom_convert.py +++ b/docs/examples/custom_convert.py @@ -21,7 +21,7 @@ _log = logging.getLogger(__name__) def main(): logging.basicConfig(level=logging.INFO) - input_doc_path = Path("./tests/data/2206.01062.pdf") + input_doc_path = Path("./tests/data/pdf/2206.01062.pdf") ########################################################################### diff --git a/docs/examples/develop_formula_understanding.py b/docs/examples/develop_formula_understanding.py index e2e508c3..2c631a8d 100644 --- a/docs/examples/develop_formula_understanding.py +++ b/docs/examples/develop_formula_understanding.py @@ -68,7 +68,7 @@ class ExampleFormulaUnderstandingPipeline(StandardPdfPipeline): def main(): logging.basicConfig(level=logging.INFO) - input_doc_path = Path("./tests/data/2203.01017v2.pdf") + input_doc_path = Path("./tests/data/pdf/2203.01017v2.pdf") pipeline_options = ExampleFormulaUnderstandingPipelineOptions() pipeline_options.do_formula_understanding = True diff --git a/docs/examples/develop_picture_enrichment.py b/docs/examples/develop_picture_enrichment.py index 81009fe8..86898889 100644 --- a/docs/examples/develop_picture_enrichment.py +++ b/docs/examples/develop_picture_enrichment.py @@ -71,7 +71,7 @@ class ExamplePictureClassifierPipeline(StandardPdfPipeline): def main(): logging.basicConfig(level=logging.INFO) - input_doc_path = Path("./tests/data/2206.01062.pdf") + input_doc_path = Path("./tests/data/pdf/2206.01062.pdf") pipeline_options = ExamplePictureClassifierPipelineOptions() pipeline_options.images_scale = 2.0 diff --git a/docs/examples/export_figures.py b/docs/examples/export_figures.py index b2ecc43f..c2186661 100644 --- a/docs/examples/export_figures.py +++ b/docs/examples/export_figures.py @@ -16,7 +16,7 @@ IMAGE_RESOLUTION_SCALE = 2.0 def main(): logging.basicConfig(level=logging.INFO) - input_doc_path = Path("./tests/data/2206.01062.pdf") + input_doc_path = Path("./tests/data/pdf/2206.01062.pdf") output_dir = Path("scratch") # Important: For operating with page images, we must keep them, otherwise the DocumentConverter diff --git a/docs/examples/export_multimodal.py b/docs/examples/export_multimodal.py index 09885bd3..e7ea3df2 100644 --- a/docs/examples/export_multimodal.py +++ b/docs/examples/export_multimodal.py @@ -19,7 +19,7 @@ IMAGE_RESOLUTION_SCALE = 2.0 def main(): logging.basicConfig(level=logging.INFO) - input_doc_path = Path("./tests/data/2206.01062.pdf") + input_doc_path = Path("./tests/data/pdf/2206.01062.pdf") output_dir = Path("scratch") # Important: For operating with page images, we must keep them, otherwise the DocumentConverter diff --git a/docs/examples/export_tables.py b/docs/examples/export_tables.py index 68b9ce47..2985127b 100644 --- a/docs/examples/export_tables.py +++ b/docs/examples/export_tables.py @@ -12,7 +12,7 @@ _log = logging.getLogger(__name__) def main(): logging.basicConfig(level=logging.INFO) - input_doc_path = Path("./tests/data/2206.01062.pdf") + input_doc_path = Path("./tests/data/pdf/2206.01062.pdf") output_dir = Path("scratch") doc_converter = DocumentConverter() diff --git a/docs/examples/full_page_ocr.py b/docs/examples/full_page_ocr.py index 967910dc..8390d5fc 100644 --- a/docs/examples/full_page_ocr.py +++ b/docs/examples/full_page_ocr.py @@ -14,7 +14,7 @@ from docling.document_converter import DocumentConverter, PdfFormatOption def main(): - input_doc = Path("./tests/data/2206.01062.pdf") + input_doc = Path("./tests/data/pdf/2206.01062.pdf") pipeline_options = PdfPipelineOptions() pipeline_options.do_ocr = True diff --git a/docs/examples/inspect_picture_content.py b/docs/examples/inspect_picture_content.py index 7930acf5..3a3f6657 100644 --- a/docs/examples/inspect_picture_content.py +++ b/docs/examples/inspect_picture_content.py @@ -4,7 +4,7 @@ from docling.datamodel.base_models import InputFormat from docling.datamodel.pipeline_options import PdfPipelineOptions from docling.document_converter import DocumentConverter, PdfFormatOption -source = "tests/data/amt_handbook_sample.pdf" +source = "tests/data/pdf/amt_handbook_sample.pdf" pipeline_options = PdfPipelineOptions() pipeline_options.images_scale = 2 diff --git a/docs/examples/run_with_accelerator.py b/docs/examples/run_with_accelerator.py index 5985401d..e53ab2a2 100644 --- a/docs/examples/run_with_accelerator.py +++ b/docs/examples/run_with_accelerator.py @@ -14,7 +14,7 @@ from docling.document_converter import DocumentConverter, PdfFormatOption def main(): - input_doc = Path("./tests/data/2206.01062.pdf") + input_doc = Path("./tests/data/pdf/2206.01062.pdf") # Explicitly set the accelerator # accelerator_options = AcceleratorOptions( diff --git a/docs/examples/run_with_formats.py b/docs/examples/run_with_formats.py index 7bd27de5..a6bd6315 100644 --- a/docs/examples/run_with_formats.py +++ b/docs/examples/run_with_formats.py @@ -25,7 +25,7 @@ def main(): Path("tests/data/docx/lorem_ipsum.docx"), Path("tests/data/pptx/powerpoint_sample.pptx"), Path("tests/data/2305.03393v1-pg9-img.png"), - Path("tests/data/2206.01062.pdf"), + Path("tests/data/pdf/2206.01062.pdf"), Path("tests/data/test_01.asciidoc"), Path("tests/data/test_01.asciidoc"), ] diff --git a/docs/examples/tesseract_lang_detection.py b/docs/examples/tesseract_lang_detection.py index b75e4707..0de0dd6b 100644 --- a/docs/examples/tesseract_lang_detection.py +++ b/docs/examples/tesseract_lang_detection.py @@ -10,7 +10,7 @@ from docling.document_converter import DocumentConverter, PdfFormatOption def main(): - input_doc = Path("./tests/data/2206.01062.pdf") + input_doc = Path("./tests/data/pdf/2206.01062.pdf") # Set lang=["auto"] with a tesseract OCR engine: TesseractOcrOptions, TesseractCliOcrOptions # ocr_options = TesseractOcrOptions(lang=["auto"])