rename download methods and deprecation warnings

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi 2025-02-05 17:04:54 +01:00
parent 0f830a6ac9
commit 0ba08adb26
6 changed files with 24 additions and 16 deletions

View File

@ -93,7 +93,7 @@ def download(
if layout: if layout:
if not quite: if not quite:
typer.secho(f"Downloading layout model...", fg="blue") typer.secho(f"Downloading layout model...", fg="blue")
LayoutModel.download_models_hf( LayoutModel.download_models(
local_dir=output_dir / LayoutModel._model_repo_folder, local_dir=output_dir / LayoutModel._model_repo_folder,
force=force, force=force,
progress=show_progress, progress=show_progress,
@ -102,7 +102,7 @@ def download(
if tableformer: if tableformer:
if not quite: if not quite:
typer.secho(f"Downloading tableformer model...", fg="blue") typer.secho(f"Downloading tableformer model...", fg="blue")
TableStructureModel.download_models_hf( TableStructureModel.download_models(
local_dir=output_dir / TableStructureModel._model_repo_folder, local_dir=output_dir / TableStructureModel._model_repo_folder,
force=force, force=force,
progress=show_progress, progress=show_progress,
@ -111,7 +111,7 @@ def download(
if picture_classifier: if picture_classifier:
if not quite: if not quite:
typer.secho(f"Downloading picture classifier model...", fg="blue") typer.secho(f"Downloading picture classifier model...", fg="blue")
DocumentPictureClassifier.download_models_hf( DocumentPictureClassifier.download_models(
local_dir=output_dir / DocumentPictureClassifier._model_repo_folder, local_dir=output_dir / DocumentPictureClassifier._model_repo_folder,
force=force, force=force,
progress=show_progress, progress=show_progress,
@ -120,7 +120,7 @@ def download(
if code_formula: if code_formula:
if not quite: if not quite:
typer.secho(f"Downloading code formula model...", fg="blue") typer.secho(f"Downloading code formula model...", fg="blue")
CodeFormulaModel.download_models_hf( CodeFormulaModel.download_models(
local_dir=output_dir / CodeFormulaModel._model_repo_folder, local_dir=output_dir / CodeFormulaModel._model_repo_folder,
force=force, force=force,
progress=show_progress, progress=show_progress,

View File

@ -97,7 +97,7 @@ class CodeFormulaModel(BaseItemAndImageEnrichmentModel):
) )
if artifacts_path is None: if artifacts_path is None:
artifacts_path = self.download_models_hf() artifacts_path = self.download_models()
else: else:
artifacts_path = artifacts_path / self._model_repo_folder artifacts_path = artifacts_path / self._model_repo_folder
@ -108,7 +108,7 @@ class CodeFormulaModel(BaseItemAndImageEnrichmentModel):
) )
@staticmethod @staticmethod
def download_models_hf( def download_models(
local_dir: Optional[Path] = None, local_dir: Optional[Path] = None,
force: bool = False, force: bool = False,
progress: bool = False, progress: bool = False,

View File

@ -89,7 +89,7 @@ class DocumentPictureClassifier(BaseEnrichmentModel):
) )
if artifacts_path is None: if artifacts_path is None:
artifacts_path = self.download_models_hf() artifacts_path = self.download_models()
else: else:
artifacts_path = artifacts_path / self._model_repo_folder artifacts_path = artifacts_path / self._model_repo_folder
@ -100,7 +100,7 @@ class DocumentPictureClassifier(BaseEnrichmentModel):
) )
@staticmethod @staticmethod
def download_models_hf( def download_models(
local_dir: Optional[Path] = None, force: bool = False, progress: bool = False local_dir: Optional[Path] = None, force: bool = False, progress: bool = False
) -> Path: ) -> Path:
from huggingface_hub import snapshot_download from huggingface_hub import snapshot_download

View File

@ -51,7 +51,7 @@ class LayoutModel(BasePageModel):
device = decide_device(accelerator_options.device) device = decide_device(accelerator_options.device)
if artifacts_path is None: if artifacts_path is None:
artifacts_path = self.download_models_hf() / self._model_path artifacts_path = self.download_models() / self._model_path
else: else:
# will become the default in the future # will become the default in the future
if (artifacts_path / self._model_repo_folder).exists(): if (artifacts_path / self._model_repo_folder).exists():
@ -76,7 +76,7 @@ class LayoutModel(BasePageModel):
) )
@staticmethod @staticmethod
def download_models_hf( def download_models(
local_dir: Optional[Path] = None, local_dir: Optional[Path] = None,
force: bool = False, force: bool = False,
progress: bool = False, progress: bool = False,

View File

@ -41,7 +41,7 @@ class TableStructureModel(BasePageModel):
if self.enabled: if self.enabled:
if artifacts_path is None: if artifacts_path is None:
artifacts_path = self.download_models_hf() / self._model_path artifacts_path = self.download_models() / self._model_path
else: else:
# will become the default in the future # will become the default in the future
if (artifacts_path / self._model_repo_folder).exists(): if (artifacts_path / self._model_repo_folder).exists():
@ -83,7 +83,7 @@ class TableStructureModel(BasePageModel):
self.scale = 2.0 # Scale up table input images to 144 dpi self.scale = 2.0 # Scale up table input images to 144 dpi
@staticmethod @staticmethod
def download_models_hf( def download_models(
local_dir: Optional[Path] = None, force: bool = False, progress: bool = False local_dir: Optional[Path] = None, force: bool = False, progress: bool = False
) -> Path: ) -> Path:
from huggingface_hub import snapshot_download from huggingface_hub import snapshot_download

View File

@ -1,5 +1,6 @@
import logging import logging
import sys import sys
import warnings
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
@ -125,6 +126,13 @@ class StandardPdfPipeline(PaginatedPipeline):
def download_models_hf( def download_models_hf(
local_dir: Optional[Path] = None, force: bool = False local_dir: Optional[Path] = None, force: bool = False
) -> Path: ) -> Path:
warnings.warn(
"The usage of StandardPdfPipeline.download_models_hf() is deprecated "
"use instead the utility `docling-tools models download`, or "
"the upstream method in docling.utils.",
DeprecationWarning,
stacklevel=3,
)
if local_dir is None: if local_dir is None:
local_dir = settings.cache_dir / "models" local_dir = settings.cache_dir / "models"
@ -133,17 +141,17 @@ class StandardPdfPipeline(PaginatedPipeline):
local_dir.mkdir(exist_ok=True, parents=True) local_dir.mkdir(exist_ok=True, parents=True)
# Download model weights # Download model weights
LayoutModel.download_models_hf( LayoutModel.download_models(
local_dir=local_dir / LayoutModel._model_repo_folder, force=force local_dir=local_dir / LayoutModel._model_repo_folder, force=force
) )
TableStructureModel.download_models_hf( TableStructureModel.download_models(
local_dir=local_dir / TableStructureModel._model_repo_folder, force=force local_dir=local_dir / TableStructureModel._model_repo_folder, force=force
) )
DocumentPictureClassifier.download_models_hf( DocumentPictureClassifier.download_models(
local_dir=local_dir / DocumentPictureClassifier._model_repo_folder, local_dir=local_dir / DocumentPictureClassifier._model_repo_folder,
force=force, force=force,
) )
CodeFormulaModel.download_models_hf( CodeFormulaModel.download_models(
local_dir=local_dir / CodeFormulaModel._model_repo_folder, force=force local_dir=local_dir / CodeFormulaModel._model_repo_folder, force=force
) )