chore: replace ds4sd with docling-project (#2596)

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
2025-12-08 12:48:28 +00:00 · 2025-11-07 11:25:56 +01:00
parent c21327cd74
commit 06ae8ae29a
6 changed files with 16 additions and 16 deletions
--- a/docling/cli/models.py
+++ b/docling/cli/models.py
@@ -140,7 +140,7 @@ def download_hf_repo(
    models: Annotated[
        list[str],
        typer.Argument(
-            help="Specific models to download from HuggingFace identified by their repo id. For example: ds4sd/docling-models .",
+            help="Specific models to download from HuggingFace identified by their repo id. For example: docling-project/docling-models .",
        ),
    ],
    output_dir: Annotated[
--- a/docling/datamodel/layout_model_specs.py
+++ b/docling/datamodel/layout_model_specs.py
@@ -31,42 +31,42 @@ class LayoutModelConfig(BaseModel):
 # Default Docling Layout Model
 DOCLING_LAYOUT_V2 = LayoutModelConfig(
    name="docling_layout_v2",
-    repo_id="ds4sd/docling-layout-old",
+    repo_id="docling-project/docling-layout-old",
    revision="main",
    model_path="",
 )

 DOCLING_LAYOUT_HERON = LayoutModelConfig(
    name="docling_layout_heron",
-    repo_id="ds4sd/docling-layout-heron",
+    repo_id="docling-project/docling-layout-heron",
    revision="main",
    model_path="",
 )

 DOCLING_LAYOUT_HERON_101 = LayoutModelConfig(
    name="docling_layout_heron_101",
-    repo_id="ds4sd/docling-layout-heron-101",
+    repo_id="docling-project/docling-layout-heron-101",
    revision="main",
    model_path="",
 )

 DOCLING_LAYOUT_EGRET_MEDIUM = LayoutModelConfig(
    name="docling_layout_egret_medium",
-    repo_id="ds4sd/docling-layout-egret-medium",
+    repo_id="docling-project/docling-layout-egret-medium",
    revision="main",
    model_path="",
 )

 DOCLING_LAYOUT_EGRET_LARGE = LayoutModelConfig(
    name="docling_layout_egret_large",
-    repo_id="ds4sd/docling-layout-egret-large",
+    repo_id="docling-project/docling-layout-egret-large",
    revision="main",
    model_path="",
 )

 DOCLING_LAYOUT_EGRET_XLARGE = LayoutModelConfig(
    name="docling_layout_egret_xlarge",
-    repo_id="ds4sd/docling-layout-egret-xlarge",
+    repo_id="docling-project/docling-layout-egret-xlarge",
    revision="main",
    model_path="",
 )
--- a/docling/datamodel/vlm_model_specs.py
+++ b/docling/datamodel/vlm_model_specs.py
@@ -57,7 +57,7 @@ GRANITEDOCLING_MLX = InlineVlmOptions(

 # SmolDocling
 SMOLDOCLING_MLX = InlineVlmOptions(
-    repo_id="ds4sd/SmolDocling-256M-preview-mlx-bf16",
+    repo_id="docling-project/SmolDocling-256M-preview-mlx-bf16",
    prompt="Convert this page to docling.",
    response_format=ResponseFormat.DOCTAGS,
    inference_framework=InferenceFramework.MLX,
@@ -68,7 +68,7 @@ SMOLDOCLING_MLX = InlineVlmOptions(
 )

 SMOLDOCLING_TRANSFORMERS = InlineVlmOptions(
-    repo_id="ds4sd/SmolDocling-256M-preview",
+    repo_id="docling-project/SmolDocling-256M-preview",
    prompt="Convert this page to docling.",
    response_format=ResponseFormat.DOCTAGS,
    inference_framework=InferenceFramework.TRANSFORMERS,
@@ -84,7 +84,7 @@ SMOLDOCLING_TRANSFORMERS = InlineVlmOptions(
 )

 SMOLDOCLING_VLLM = InlineVlmOptions(
-    repo_id="ds4sd/SmolDocling-256M-preview",
+    repo_id="docling-project/SmolDocling-256M-preview",
    prompt="Convert this page to docling.",
    response_format=ResponseFormat.DOCTAGS,
    inference_framework=InferenceFramework.VLLM,
--- a/docling/models/code_formula_model.py
+++ b/docling/models/code_formula_model.py
@@ -65,7 +65,7 @@ class CodeFormulaModel(BaseItemAndImageEnrichmentModel):
        Processes the given batch of elements and enriches them with predictions.
    """

-    _model_repo_folder = "ds4sd--CodeFormulaV2"
+    _model_repo_folder = "docling-project--CodeFormulaV2"
    elements_batch_size = 5
    images_scale = 1.67  # = 120 dpi, aligned with training data resolution
    expansion_factor = 0.18
@@ -121,7 +121,7 @@ class CodeFormulaModel(BaseItemAndImageEnrichmentModel):
        progress: bool = False,
    ) -> Path:
        return download_hf_model(
-            repo_id="ds4sd/CodeFormulaV2",
+            repo_id="docling-project/CodeFormulaV2",
            revision="main",
            local_dir=local_dir,
            force=force,
--- a/docling/models/document_picture_classifier.py
+++ b/docling/models/document_picture_classifier.py
@@ -59,7 +59,7 @@ class DocumentPictureClassifier(BaseItemAndImageEnrichmentModel):
        Processes a batch of elements and adds classification annotations.
    """

-    _model_repo_folder = "ds4sd--DocumentFigureClassifier"
+    _model_repo_folder = "docling-project--DocumentFigureClassifier"
    images_scale = 2

    def __init__(
@@ -108,7 +108,7 @@ class DocumentPictureClassifier(BaseItemAndImageEnrichmentModel):
        local_dir: Optional[Path] = None, force: bool = False, progress: bool = False
    ) -> Path:
        return download_hf_model(
-            repo_id="ds4sd/DocumentFigureClassifier",
+            repo_id="docling-project/DocumentFigureClassifier",
            revision="v1.0.1",
            local_dir=local_dir,
            force=force,
--- a/docling/models/table_structure_model.py
+++ b/docling/models/table_structure_model.py
@@ -27,7 +27,7 @@ from docling.utils.profiling import TimeRecorder


 class TableStructureModel(BasePageModel):
-    _model_repo_folder = "ds4sd--docling-models"
+    _model_repo_folder = "docling-project--docling-models"
    _model_path = "model_artifacts/tableformer"

    def __init__(
@@ -93,7 +93,7 @@ class TableStructureModel(BasePageModel):
        local_dir: Optional[Path] = None, force: bool = False, progress: bool = False
    ) -> Path:
        return download_hf_model(
-            repo_id="ds4sd/docling-models",
+            repo_id="docling-project/docling-models",
            revision="v2.3.0",
            local_dir=local_dir,
            force=force,