From aa187552e508a2d09def1ab80e9a26dc210db3d3 Mon Sep 17 00:00:00 2001 From: Panos Vagenas <35837085+vagenas@users.noreply.github.com> Date: Mon, 4 Nov 2024 09:08:16 +0100 Subject: [PATCH] docs: add explicit artifacts path example [skip ci] Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> --- docs/usage.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/docs/usage.md b/docs/usage.md index 5493c2ee..df24d33d 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -108,6 +108,30 @@ doc_converter = DocumentConverter( ) ``` +##### Provide specific artifacts path + +By default, artifacts such as models are downloaded automatically upon first usage. If you would prefer to use a local path where the artifacts have been explicitly prefetched, you can do that as follows: + +```python +from docling.datamodel.base_models import InputFormat +from docling.datamodel.pipeline_options import PdfPipelineOptions +from docling.document_converter import DocumentConverter, PdfFormatOption +from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline + +#to explicitly prefetch: +# artifacts_path = StandardPdfPipeline.download_models_hf() + +artifacts_path = "/local/path/to/artifacts" + +pipeline_options = PdfPipelineOptions(artifacts_path=artifacts_path) +doc_converter = DocumentConverter( + format_options={ + InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options) + } +) +``` + + #### Impose limits on the document size You can limit the file size and number of pages which should be allowed to process per document: