mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 12:48:28 +00:00
feat(CLI): Option to download arbitrary HuggingFace model (#2123)
* Added option to docling-tools to download arbitrary HuggingFace model Signed-off-by: Viktor Kuropiatnyk <vku@zurich.ibm.com> * Added note in documentation Signed-off-by: Viktor Kuropiatnyk <vku@zurich.ibm.com> * Removed note on custom artifact path usage from HF download option Signed-off-by: Viktor Kuropiatnyk <vku@zurich.ibm.com> * Fixed typo Signed-off-by: Viktor Kuropiatnyk <vku@zurich.ibm.com> --------- Signed-off-by: Viktor Kuropiatnyk <vku@zurich.ibm.com>
This commit is contained in:
committed by
GitHub
parent
449bde0a6c
commit
cdf079dd06
@@ -9,6 +9,7 @@ from rich.console import Console
|
|||||||
from rich.logging import RichHandler
|
from rich.logging import RichHandler
|
||||||
|
|
||||||
from docling.datamodel.settings import settings
|
from docling.datamodel.settings import settings
|
||||||
|
from docling.models.utils.hf_model_download import download_hf_model
|
||||||
from docling.utils.model_downloader import download_models
|
from docling.utils.model_downloader import download_models
|
||||||
|
|
||||||
warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic|torch")
|
warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic|torch")
|
||||||
@@ -128,6 +129,61 @@ def download(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.command("download-hf-repo")
|
||||||
|
def download_hf_repo(
|
||||||
|
models: Annotated[
|
||||||
|
list[str],
|
||||||
|
typer.Argument(
|
||||||
|
help="Specific models to download from HuggingFace identified by their repo id. For example: ds4sd/docling-models .",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
output_dir: Annotated[
|
||||||
|
Path,
|
||||||
|
typer.Option(
|
||||||
|
...,
|
||||||
|
"-o",
|
||||||
|
"--output-dir",
|
||||||
|
help="The directory where to download the models.",
|
||||||
|
),
|
||||||
|
] = (settings.cache_dir / "models"),
|
||||||
|
force: Annotated[
|
||||||
|
bool, typer.Option(..., help="If true, the download will be forced.")
|
||||||
|
] = False,
|
||||||
|
quiet: Annotated[
|
||||||
|
bool,
|
||||||
|
typer.Option(
|
||||||
|
...,
|
||||||
|
"-q",
|
||||||
|
"--quiet",
|
||||||
|
help="No extra output is generated, the CLI prints only the directory with the cached models.",
|
||||||
|
),
|
||||||
|
] = False,
|
||||||
|
):
|
||||||
|
if not quiet:
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="[blue]%(message)s[/blue]",
|
||||||
|
datefmt="[%X]",
|
||||||
|
handlers=[RichHandler(show_level=False, show_time=False, markup=True)],
|
||||||
|
)
|
||||||
|
|
||||||
|
for item in models:
|
||||||
|
typer.secho(f"\nDownloading {item} model from HuggingFace...")
|
||||||
|
download_hf_model(
|
||||||
|
repo_id=item,
|
||||||
|
# would be better to reuse "repo_cache_folder" property: https://github.com/docling-project/docling/blob/main/docling/datamodel/pipeline_options_vlm_model.py#L76
|
||||||
|
# but creating options objects seams like an overkill
|
||||||
|
local_dir=output_dir / item.replace("/", "--"),
|
||||||
|
force=force,
|
||||||
|
progress=(not quiet),
|
||||||
|
)
|
||||||
|
|
||||||
|
if quiet:
|
||||||
|
typer.echo(output_dir)
|
||||||
|
else:
|
||||||
|
typer.secho(f"\nModels downloaded into: {output_dir}.", fg="green")
|
||||||
|
|
||||||
|
|
||||||
click_app = typer.main.get_command(app)
|
click_app = typer.main.get_command(app)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
7
docs/usage/advanced_options.md
vendored
7
docs/usage/advanced_options.md
vendored
@@ -20,6 +20,13 @@ Models downloaded into $HOME/.cache/docling/models.
|
|||||||
|
|
||||||
Alternatively, models can be programmatically downloaded using `docling.utils.model_downloader.download_models()`.
|
Alternatively, models can be programmatically downloaded using `docling.utils.model_downloader.download_models()`.
|
||||||
|
|
||||||
|
Also, you can use `download-hf-repo` parameter to download arbitrary models from HuggingFace by specifying repo id:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
$ docling-tools models download-hf-repo ds4sd/SmolDocling-256M-preview
|
||||||
|
Downloading ds4sd/SmolDocling-256M-preview model from HuggingFace...
|
||||||
|
```
|
||||||
|
|
||||||
**Step 2: Use the prefetched models**
|
**Step 2: Use the prefetched models**
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
|||||||
Reference in New Issue
Block a user