diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index fff22546..d0d90edc 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -16,6 +16,12 @@ jobs: run: poetry run pre-commit run --all-files - name: Install with poetry run: poetry install --all-extras + - name: Run examples + run: | + for file in examples/*.py; do + echo "Running example $file" + poetry run python "$file" || exit 1 + done - name: Testing run: | poetry run pytest -v tests diff --git a/examples/batch_convert.py b/examples/batch_convert.py index aaf5c49f..063d4aa0 100644 --- a/examples/batch_convert.py +++ b/examples/batch_convert.py @@ -49,17 +49,18 @@ def export_documents( f"of which {failure_count} failed " f"and {partial_success_count} were partially converted." ) + return success_count, partial_success_count, failure_count def main(): logging.basicConfig(level=logging.INFO) input_doc_paths = [ - Path("./test/data/2206.01062.pdf"), - Path("./test/data/2203.01017v2.pdf"), - Path("./test/data/2305.03393v1.pdf"), - Path("./test/data/redp5110.pdf"), - Path("./test/data/redp5695.pdf"), + Path("./tests/data/2206.01062.pdf"), + Path("./tests/data/2203.01017v2.pdf"), + Path("./tests/data/2305.03393v1.pdf"), + Path("./tests/data/redp5110.pdf"), + Path("./tests/data/redp5695.pdf"), ] # buf = BytesIO(Path("./test/data/2206.01062.pdf").open("rb").read()) @@ -73,12 +74,19 @@ def main(): start_time = time.time() conv_results = doc_converter.convert(input) - export_documents(conv_results, output_dir=Path("./scratch")) + success_count, partial_success_count, failure_count = export_documents( + conv_results, output_dir=Path("./scratch") + ) end_time = time.time() - start_time _log.info(f"All documents were converted in {end_time:.2f} seconds.") + if failure_count > 0: + raise RuntimeError( + f"The example failed converting {failure_count} on {len(input_doc_paths)}." + ) + if __name__ == "__main__": main() diff --git a/examples/custom_convert.py b/examples/custom_convert.py index 2aaab377..9d046d94 100644 --- a/examples/custom_convert.py +++ b/examples/custom_convert.py @@ -42,14 +42,16 @@ def export_documents( f"Processed {success_count + failure_count} docs, of which {failure_count} failed" ) + return success_count, failure_count + def main(): logging.basicConfig(level=logging.INFO) input_doc_paths = [ - Path("./test/data/2206.01062.pdf"), - Path("./test/data/2203.01017v2.pdf"), - Path("./test/data/2305.03393v1.pdf"), + Path("./tests/data/2206.01062.pdf"), + Path("./tests/data/2203.01017v2.pdf"), + Path("./tests/data/2305.03393v1.pdf"), ] ########################################################################### @@ -114,12 +116,19 @@ def main(): start_time = time.time() conv_results = doc_converter.convert(input) - export_documents(conv_results, output_dir=Path("./scratch")) + success_count, failure_count = export_documents( + conv_results, output_dir=Path("./scratch") + ) end_time = time.time() - start_time _log.info(f"All documents were converted in {end_time:.2f} seconds.") + if failure_count > 0: + raise RuntimeError( + f"The example failed converting {failure_count} on {len(input_doc_paths)}." + ) + if __name__ == "__main__": main() diff --git a/examples/export_figures.py b/examples/export_figures.py index 277e1c26..bdffbec1 100644 --- a/examples/export_figures.py +++ b/examples/export_figures.py @@ -22,7 +22,7 @@ def main(): logging.basicConfig(level=logging.INFO) input_doc_paths = [ - Path("./test/data/2206.01062.pdf"), + Path("./tests/data/2206.01062.pdf"), ] output_dir = Path("./scratch") @@ -41,10 +41,13 @@ def main(): conv_results = doc_converter.convert(input_files) + success_count = 0 + failure_count = 0 output_dir.mkdir(parents=True, exist_ok=True) for conv_res in conv_results: if conv_res.status != ConversionStatus.SUCCESS: _log.info(f"Document {conv_res.input.file} failed to convert.") + failure_count += 1 continue doc_filename = conv_res.input.file.stem @@ -66,10 +69,17 @@ def main(): with element_image_filename.open("wb") as fp: image.save(fp, "PNG") + success_count += 1 + end_time = time.time() - start_time _log.info(f"All documents were converted in {end_time:.2f} seconds.") + if failure_count > 0: + raise RuntimeError( + f"The example failed converting {failure_count} on {len(input_doc_paths)}." + ) + if __name__ == "__main__": main() diff --git a/poetry.lock b/poetry.lock index 16b16290..8ec14f50 100644 --- a/poetry.lock +++ b/poetry.lock @@ -797,13 +797,13 @@ tabulate = ">=0.9.0,<0.10.0" [[package]] name = "docling-ibm-models" -version = "1.1.2" +version = "1.1.3" description = "This package contains the AI models used by the Docling PDF conversion package" optional = false python-versions = "<4.0,>=3.10" files = [ - {file = "docling_ibm_models-1.1.2-py3-none-any.whl", hash = "sha256:5b5cc7bfdd690597a43ca64b4528ec4060d3557cf07d1a6ee01a7b952539a607"}, - {file = "docling_ibm_models-1.1.2.tar.gz", hash = "sha256:9926769a7053fd3696238ed9e05798b548701df01e2e2f37c48cafc7bfb62b6f"}, + {file = "docling_ibm_models-1.1.3-py3-none-any.whl", hash = "sha256:d7dfbacf5f6c63a150cb2270d67d0d6e892763af99d2a7fd434b0b240da67df3"}, + {file = "docling_ibm_models-1.1.3.tar.gz", hash = "sha256:dd89476d152c74c1b0a9c445fe31ebca4390d9b23c568bbd175b92a0ee112e77"}, ] [package.dependencies] @@ -2696,8 +2696,8 @@ files = [ numpy = [ {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""}, {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""}, - {version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""}, {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, + {version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""}, ] [[package]] @@ -2752,8 +2752,8 @@ files = [ [package.dependencies] numpy = [ {version = ">=1.22.4", markers = "python_version < \"3.11\""}, - {version = ">=1.23.2", markers = "python_version == \"3.11\""}, {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, + {version = ">=1.23.2", markers = "python_version == \"3.11\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -5143,4 +5143,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "e0f8f29e02dcc980287efc0b946df1df4d149bfe498cc16abda897842b45b019" +content-hash = "35a2c8652173107818d14f38266af07b689fa80a5f0e8f3e06f5708797511cf6" diff --git a/pyproject.toml b/pyproject.toml index 8fcff0f0..efa6a078 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ packages = [{include = "docling"}] python = "^3.10" pydantic = "^2.0.0" docling-core = "^1.1.2" -docling-ibm-models = "^1.1.2" +docling-ibm-models = "^1.1.3" deepsearch-glm = ">=0.19.0,<1" filetype = "^1.2.0" pypdfium2 = "^4.30.0"