feat(CLI): Option to download arbitrary HuggingFace model (#2123)

* Added option to docling-tools to download arbitrary HuggingFace model

Signed-off-by: Viktor Kuropiatnyk <vku@zurich.ibm.com>

* Added note in documentation

Signed-off-by: Viktor Kuropiatnyk <vku@zurich.ibm.com>

* Removed note on custom artifact path usage from HF download option

Signed-off-by: Viktor Kuropiatnyk <vku@zurich.ibm.com>

* Fixed typo

Signed-off-by: Viktor Kuropiatnyk <vku@zurich.ibm.com>

---------

Signed-off-by: Viktor Kuropiatnyk <vku@zurich.ibm.com>
This commit is contained in:
VIktor Kuropiantnyk
2025-08-22 15:23:29 +02:00
committed by GitHub
parent 449bde0a6c
commit cdf079dd06
2 changed files with 63 additions and 0 deletions

View File

@@ -9,6 +9,7 @@ from rich.console import Console
from rich.logging import RichHandler
from docling.datamodel.settings import settings
from docling.models.utils.hf_model_download import download_hf_model
from docling.utils.model_downloader import download_models
warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic|torch")
@@ -128,6 +129,61 @@ def download(
)
@app.command("download-hf-repo")
def download_hf_repo(
models: Annotated[
list[str],
typer.Argument(
help="Specific models to download from HuggingFace identified by their repo id. For example: ds4sd/docling-models .",
),
],
output_dir: Annotated[
Path,
typer.Option(
...,
"-o",
"--output-dir",
help="The directory where to download the models.",
),
] = (settings.cache_dir / "models"),
force: Annotated[
bool, typer.Option(..., help="If true, the download will be forced.")
] = False,
quiet: Annotated[
bool,
typer.Option(
...,
"-q",
"--quiet",
help="No extra output is generated, the CLI prints only the directory with the cached models.",
),
] = False,
):
if not quiet:
logging.basicConfig(
level=logging.INFO,
format="[blue]%(message)s[/blue]",
datefmt="[%X]",
handlers=[RichHandler(show_level=False, show_time=False, markup=True)],
)
for item in models:
typer.secho(f"\nDownloading {item} model from HuggingFace...")
download_hf_model(
repo_id=item,
# would be better to reuse "repo_cache_folder" property: https://github.com/docling-project/docling/blob/main/docling/datamodel/pipeline_options_vlm_model.py#L76
# but creating options objects seams like an overkill
local_dir=output_dir / item.replace("/", "--"),
force=force,
progress=(not quiet),
)
if quiet:
typer.echo(output_dir)
else:
typer.secho(f"\nModels downloaded into: {output_dir}.", fg="green")
click_app = typer.main.get_command(app)
if __name__ == "__main__":

View File

@@ -20,6 +20,13 @@ Models downloaded into $HOME/.cache/docling/models.
Alternatively, models can be programmatically downloaded using `docling.utils.model_downloader.download_models()`.
Also, you can use `download-hf-repo` parameter to download arbitrary models from HuggingFace by specifying repo id:
```sh
$ docling-tools models download-hf-repo ds4sd/SmolDocling-256M-preview
Downloading ds4sd/SmolDocling-256M-preview model from HuggingFace...
```
**Step 2: Use the prefetched models**
```python