feat(CLI): Option to download arbitrary HuggingFace model (#2123)

* Added option to docling-tools to download arbitrary HuggingFace model Signed-off-by: Viktor Kuropiatnyk <vku@zurich.ibm.com> * Added note in documentation Signed-off-by: Viktor Kuropiatnyk <vku@zurich.ibm.com> * Removed note on custom artifact path usage from HF download option Signed-off-by: Viktor Kuropiatnyk <vku@zurich.ibm.com> * Fixed typo Signed-off-by: Viktor Kuropiatnyk <vku@zurich.ibm.com> --------- Signed-off-by: Viktor Kuropiatnyk <vku@zurich.ibm.com>
2025-12-08 12:48:28 +00:00 · 2025-08-22 15:23:29 +02:00
parent 449bde0a6c
commit cdf079dd06
2 changed files with 63 additions and 0 deletions
--- a/docling/cli/models.py
+++ b/docling/cli/models.py
@@ -9,6 +9,7 @@ from rich.console import Console
 from rich.logging import RichHandler

 from docling.datamodel.settings import settings
+from docling.models.utils.hf_model_download import download_hf_model
 from docling.utils.model_downloader import download_models

 warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic|torch")
@@ -128,6 +129,61 @@ def download(
        )


+@app.command("download-hf-repo")
+def download_hf_repo(
+    models: Annotated[
+        list[str],
+        typer.Argument(
+            help="Specific models to download from HuggingFace identified by their repo id. For example: ds4sd/docling-models .",
+        ),
+    ],
+    output_dir: Annotated[
+        Path,
+        typer.Option(
+            ...,
+            "-o",
+            "--output-dir",
+            help="The directory where to download the models.",
+        ),
+    ] = (settings.cache_dir / "models"),
+    force: Annotated[
+        bool, typer.Option(..., help="If true, the download will be forced.")
+    ] = False,
+    quiet: Annotated[
+        bool,
+        typer.Option(
+            ...,
+            "-q",
+            "--quiet",
+            help="No extra output is generated, the CLI prints only the directory with the cached models.",
+        ),
+    ] = False,
+):
+    if not quiet:
+        logging.basicConfig(
+            level=logging.INFO,
+            format="[blue]%(message)s[/blue]",
+            datefmt="[%X]",
+            handlers=[RichHandler(show_level=False, show_time=False, markup=True)],
+        )
+
+    for item in models:
+        typer.secho(f"\nDownloading {item} model from HuggingFace...")
+        download_hf_model(
+            repo_id=item,
+            # would be better to reuse "repo_cache_folder" property: https://github.com/docling-project/docling/blob/main/docling/datamodel/pipeline_options_vlm_model.py#L76
+            # but creating options objects seams like an overkill
+            local_dir=output_dir / item.replace("/", "--"),
+            force=force,
+            progress=(not quiet),
+        )
+
+    if quiet:
+        typer.echo(output_dir)
+    else:
+        typer.secho(f"\nModels downloaded into: {output_dir}.", fg="green")
+
+
 click_app = typer.main.get_command(app)

 if __name__ == "__main__":
--- a/docs/usage/advanced_options.md
+++ b/docs/usage/advanced_options.md
@@ -20,6 +20,13 @@ Models downloaded into $HOME/.cache/docling/models.

 Alternatively, models can be programmatically downloaded using `docling.utils.model_downloader.download_models()`.

+Also, you can use `download-hf-repo` parameter to download arbitrary models from HuggingFace by specifying repo id:
+
+```sh
+$ docling-tools models download-hf-repo ds4sd/SmolDocling-256M-preview
+Downloading ds4sd/SmolDocling-256M-preview model from HuggingFace...
+```
+
 **Step 2: Use the prefetched models**

 ```python