mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 12:48:28 +00:00
feat(CLI): Option to download arbitrary HuggingFace model (#2123)
* Added option to docling-tools to download arbitrary HuggingFace model Signed-off-by: Viktor Kuropiatnyk <vku@zurich.ibm.com> * Added note in documentation Signed-off-by: Viktor Kuropiatnyk <vku@zurich.ibm.com> * Removed note on custom artifact path usage from HF download option Signed-off-by: Viktor Kuropiatnyk <vku@zurich.ibm.com> * Fixed typo Signed-off-by: Viktor Kuropiatnyk <vku@zurich.ibm.com> --------- Signed-off-by: Viktor Kuropiatnyk <vku@zurich.ibm.com>
This commit is contained in:
committed by
GitHub
parent
449bde0a6c
commit
cdf079dd06
@@ -9,6 +9,7 @@ from rich.console import Console
|
||||
from rich.logging import RichHandler
|
||||
|
||||
from docling.datamodel.settings import settings
|
||||
from docling.models.utils.hf_model_download import download_hf_model
|
||||
from docling.utils.model_downloader import download_models
|
||||
|
||||
warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic|torch")
|
||||
@@ -128,6 +129,61 @@ def download(
|
||||
)
|
||||
|
||||
|
||||
@app.command("download-hf-repo")
|
||||
def download_hf_repo(
|
||||
models: Annotated[
|
||||
list[str],
|
||||
typer.Argument(
|
||||
help="Specific models to download from HuggingFace identified by their repo id. For example: ds4sd/docling-models .",
|
||||
),
|
||||
],
|
||||
output_dir: Annotated[
|
||||
Path,
|
||||
typer.Option(
|
||||
...,
|
||||
"-o",
|
||||
"--output-dir",
|
||||
help="The directory where to download the models.",
|
||||
),
|
||||
] = (settings.cache_dir / "models"),
|
||||
force: Annotated[
|
||||
bool, typer.Option(..., help="If true, the download will be forced.")
|
||||
] = False,
|
||||
quiet: Annotated[
|
||||
bool,
|
||||
typer.Option(
|
||||
...,
|
||||
"-q",
|
||||
"--quiet",
|
||||
help="No extra output is generated, the CLI prints only the directory with the cached models.",
|
||||
),
|
||||
] = False,
|
||||
):
|
||||
if not quiet:
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="[blue]%(message)s[/blue]",
|
||||
datefmt="[%X]",
|
||||
handlers=[RichHandler(show_level=False, show_time=False, markup=True)],
|
||||
)
|
||||
|
||||
for item in models:
|
||||
typer.secho(f"\nDownloading {item} model from HuggingFace...")
|
||||
download_hf_model(
|
||||
repo_id=item,
|
||||
# would be better to reuse "repo_cache_folder" property: https://github.com/docling-project/docling/blob/main/docling/datamodel/pipeline_options_vlm_model.py#L76
|
||||
# but creating options objects seams like an overkill
|
||||
local_dir=output_dir / item.replace("/", "--"),
|
||||
force=force,
|
||||
progress=(not quiet),
|
||||
)
|
||||
|
||||
if quiet:
|
||||
typer.echo(output_dir)
|
||||
else:
|
||||
typer.secho(f"\nModels downloaded into: {output_dir}.", fg="green")
|
||||
|
||||
|
||||
click_app = typer.main.get_command(app)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
7
docs/usage/advanced_options.md
vendored
7
docs/usage/advanced_options.md
vendored
@@ -20,6 +20,13 @@ Models downloaded into $HOME/.cache/docling/models.
|
||||
|
||||
Alternatively, models can be programmatically downloaded using `docling.utils.model_downloader.download_models()`.
|
||||
|
||||
Also, you can use `download-hf-repo` parameter to download arbitrary models from HuggingFace by specifying repo id:
|
||||
|
||||
```sh
|
||||
$ docling-tools models download-hf-repo ds4sd/SmolDocling-256M-preview
|
||||
Downloading ds4sd/SmolDocling-256M-preview model from HuggingFace...
|
||||
```
|
||||
|
||||
**Step 2: Use the prefetched models**
|
||||
|
||||
```python
|
||||
|
||||
Reference in New Issue
Block a user