From d5d42ed4edc271d33df9e93debfcd58f6c756562 Mon Sep 17 00:00:00 2001 From: Michele Dolfi Date: Wed, 14 May 2025 09:51:00 +0200 Subject: [PATCH] make the example files location more generic Signed-off-by: Michele Dolfi --- docs/examples/batch_convert.py | 8 ++++---- docs/examples/custom_convert.py | 2 +- docs/examples/develop_formula_understanding.py | 2 +- docs/examples/develop_picture_enrichment.py | 2 +- docs/examples/export_figures.py | 2 +- docs/examples/export_multimodal.py | 2 +- docs/examples/export_tables.py | 2 +- docs/examples/full_page_ocr.py | 2 +- docs/examples/pictures_description_api.py | 2 +- docs/examples/run_with_accelerator.py | 2 +- docs/examples/tesseract_lang_detection.py | 2 +- docs/examples/translate.py | 2 +- docs/examples/vlm_pipeline_api_model.py | 4 ++-- 13 files changed, 17 insertions(+), 17 deletions(-) diff --git a/docs/examples/batch_convert.py b/docs/examples/batch_convert.py index e62a7bff..cecf6dc6 100644 --- a/docs/examples/batch_convert.py +++ b/docs/examples/batch_convert.py @@ -122,10 +122,10 @@ def main(): logging.basicConfig(level=logging.INFO) input_doc_paths = [ - Path("../../tests/data/pdf/2206.01062.pdf"), - Path("../../tests/data/pdf/2203.01017v2.pdf"), - Path("../../tests/data/pdf/2305.03393v1.pdf"), - Path("../../tests/data/pdf/redp5110_sampled.pdf"), + Path(__file__) / "../../tests/data/pdf/2206.01062.pdf", + Path(__file__) / "../../tests/data/pdf/2203.01017v2.pdf", + Path(__file__) / "../../tests/data/pdf/2305.03393v1.pdf", + Path(__file__) / "../../tests/data/pdf/redp5110_sampled.pdf", ] # buf = BytesIO(Path("./test/data/2206.01062.pdf").open("rb").read()) diff --git a/docs/examples/custom_convert.py b/docs/examples/custom_convert.py index 46dc60c9..175516be 100644 --- a/docs/examples/custom_convert.py +++ b/docs/examples/custom_convert.py @@ -17,7 +17,7 @@ _log = logging.getLogger(__name__) def main(): logging.basicConfig(level=logging.INFO) - input_doc_path = Path("../../tests/data/pdf/2206.01062.pdf") + input_doc_path = Path(__file__) / "../../tests/data/pdf/2206.01062.pdf" ########################################################################### diff --git a/docs/examples/develop_formula_understanding.py b/docs/examples/develop_formula_understanding.py index 58919443..9b0ff835 100644 --- a/docs/examples/develop_formula_understanding.py +++ b/docs/examples/develop_formula_understanding.py @@ -71,7 +71,7 @@ class ExampleFormulaUnderstandingPipeline(StandardPdfPipeline): def main(): logging.basicConfig(level=logging.INFO) - input_doc_path = Path("../../tests/data/pdf/2203.01017v2.pdf") + input_doc_path = Path(__file__) / "../../tests/data/pdf/2203.01017v2.pdf" pipeline_options = ExampleFormulaUnderstandingPipelineOptions() pipeline_options.do_formula_understanding = True diff --git a/docs/examples/develop_picture_enrichment.py b/docs/examples/develop_picture_enrichment.py index c7bd390d..6025e8da 100644 --- a/docs/examples/develop_picture_enrichment.py +++ b/docs/examples/develop_picture_enrichment.py @@ -76,7 +76,7 @@ class ExamplePictureClassifierPipeline(StandardPdfPipeline): def main(): logging.basicConfig(level=logging.INFO) - input_doc_path = Path("../../tests/data/pdf/2206.01062.pdf") + input_doc_path = Path(__file__) / "../../tests/data/pdf/2206.01062.pdf" pipeline_options = ExamplePictureClassifierPipelineOptions() pipeline_options.images_scale = 2.0 diff --git a/docs/examples/export_figures.py b/docs/examples/export_figures.py index 03aa57e5..96c4bf54 100644 --- a/docs/examples/export_figures.py +++ b/docs/examples/export_figures.py @@ -16,7 +16,7 @@ IMAGE_RESOLUTION_SCALE = 2.0 def main(): logging.basicConfig(level=logging.INFO) - input_doc_path = Path("../../tests/data/pdf/2206.01062.pdf") + input_doc_path = Path(__file__) / "../../tests/data/pdf/2206.01062.pdf" output_dir = Path("scratch") # Important: For operating with page images, we must keep them, otherwise the DocumentConverter diff --git a/docs/examples/export_multimodal.py b/docs/examples/export_multimodal.py index fcccc584..35a0f66d 100644 --- a/docs/examples/export_multimodal.py +++ b/docs/examples/export_multimodal.py @@ -19,7 +19,7 @@ IMAGE_RESOLUTION_SCALE = 2.0 def main(): logging.basicConfig(level=logging.INFO) - input_doc_path = Path("../../tests/data/pdf/2206.01062.pdf") + input_doc_path = Path(__file__) / "../../tests/data/pdf/2206.01062.pdf" output_dir = Path("scratch") # Important: For operating with page images, we must keep them, otherwise the DocumentConverter diff --git a/docs/examples/export_tables.py b/docs/examples/export_tables.py index 43e19a0a..8219f784 100644 --- a/docs/examples/export_tables.py +++ b/docs/examples/export_tables.py @@ -12,7 +12,7 @@ _log = logging.getLogger(__name__) def main(): logging.basicConfig(level=logging.INFO) - input_doc_path = Path("../../tests/data/pdf/2206.01062.pdf") + input_doc_path = Path(__file__) / "../../tests/data/pdf/2206.01062.pdf" output_dir = Path("scratch") doc_converter = DocumentConverter() diff --git a/docs/examples/full_page_ocr.py b/docs/examples/full_page_ocr.py index 98cf9452..b1337125 100644 --- a/docs/examples/full_page_ocr.py +++ b/docs/examples/full_page_ocr.py @@ -9,7 +9,7 @@ from docling.document_converter import DocumentConverter, PdfFormatOption def main(): - input_doc = Path("../../tests/data/pdf/2206.01062.pdf") + input_doc = Path(__file__) / "../../tests/data/pdf/2206.01062.pdf" pipeline_options = PdfPipelineOptions() pipeline_options.do_ocr = True diff --git a/docs/examples/pictures_description_api.py b/docs/examples/pictures_description_api.py index f1226ad0..cc4746c2 100644 --- a/docs/examples/pictures_description_api.py +++ b/docs/examples/pictures_description_api.py @@ -67,7 +67,7 @@ def watsonx_vlm_options(): def main(): logging.basicConfig(level=logging.INFO) - input_doc_path = Path("../../tests/data/pdf/2206.01062.pdf") + input_doc_path = Path(__file__) / "../../tests/data/pdf/2206.01062.pdf" pipeline_options = PdfPipelineOptions( enable_remote_services=True # <-- this is required! diff --git a/docs/examples/run_with_accelerator.py b/docs/examples/run_with_accelerator.py index 73c37056..c4989344 100644 --- a/docs/examples/run_with_accelerator.py +++ b/docs/examples/run_with_accelerator.py @@ -11,7 +11,7 @@ from docling.document_converter import DocumentConverter, PdfFormatOption def main(): - input_doc = Path("../../tests/data/pdf/2206.01062.pdf") + input_doc = Path(__file__) / "../../tests/data/pdf/2206.01062.pdf" # Explicitly set the accelerator # accelerator_options = AcceleratorOptions( diff --git a/docs/examples/tesseract_lang_detection.py b/docs/examples/tesseract_lang_detection.py index 18a40f7a..bba645c6 100644 --- a/docs/examples/tesseract_lang_detection.py +++ b/docs/examples/tesseract_lang_detection.py @@ -9,7 +9,7 @@ from docling.document_converter import DocumentConverter, PdfFormatOption def main(): - input_doc = Path("../../tests/data/pdf/2206.01062.pdf") + input_doc = Path(__file__) / "../../tests/data/pdf/2206.01062.pdf" # Set lang=["auto"] with a tesseract OCR engine: TesseractOcrOptions, TesseractCliOcrOptions # ocr_options = TesseractOcrOptions(lang=["auto"]) diff --git a/docs/examples/translate.py b/docs/examples/translate.py index 62473699..ea938340 100644 --- a/docs/examples/translate.py +++ b/docs/examples/translate.py @@ -31,7 +31,7 @@ def translate(text: str, src: str = "en", dest: str = "de"): def main(): logging.basicConfig(level=logging.INFO) - input_doc_path = Path("../../tests/data/pdf/2206.01062.pdf") + input_doc_path = Path(__file__) / "../../tests/data/pdf/2206.01062.pdf" output_dir = Path("scratch") # Important: For operating with page images, we must keep them, otherwise the DocumentConverter diff --git a/docs/examples/vlm_pipeline_api_model.py b/docs/examples/vlm_pipeline_api_model.py index 263627b1..2fbd2b22 100644 --- a/docs/examples/vlm_pipeline_api_model.py +++ b/docs/examples/vlm_pipeline_api_model.py @@ -69,8 +69,8 @@ def watsonx_vlm_options(model: str, prompt: str): def main(): logging.basicConfig(level=logging.INFO) - # input_doc_path = Path("../../tests/data/pdf/2206.01062.pdf") - input_doc_path = Path("../../tests/data/pdf/2305.03393v1-pg9.pdf") + # input_doc_path = Path(__file__) / "../../tests/data/pdf/2206.01062.pdf" + input_doc_path = Path(__file__) / "../../tests/data/pdf/2305.03393v1-pg9.pdf" pipeline_options = VlmPipelineOptions( enable_remote_services=True # <-- this is required!