chore: replace ds4sd with docling-project (#2596)

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
This commit is contained in:
Peter W. J. Staar
2025-11-07 11:25:56 +01:00
committed by GitHub
parent c21327cd74
commit 06ae8ae29a
6 changed files with 16 additions and 16 deletions

View File

@@ -140,7 +140,7 @@ def download_hf_repo(
models: Annotated[ models: Annotated[
list[str], list[str],
typer.Argument( typer.Argument(
help="Specific models to download from HuggingFace identified by their repo id. For example: ds4sd/docling-models .", help="Specific models to download from HuggingFace identified by their repo id. For example: docling-project/docling-models .",
), ),
], ],
output_dir: Annotated[ output_dir: Annotated[

View File

@@ -31,42 +31,42 @@ class LayoutModelConfig(BaseModel):
# Default Docling Layout Model # Default Docling Layout Model
DOCLING_LAYOUT_V2 = LayoutModelConfig( DOCLING_LAYOUT_V2 = LayoutModelConfig(
name="docling_layout_v2", name="docling_layout_v2",
repo_id="ds4sd/docling-layout-old", repo_id="docling-project/docling-layout-old",
revision="main", revision="main",
model_path="", model_path="",
) )
DOCLING_LAYOUT_HERON = LayoutModelConfig( DOCLING_LAYOUT_HERON = LayoutModelConfig(
name="docling_layout_heron", name="docling_layout_heron",
repo_id="ds4sd/docling-layout-heron", repo_id="docling-project/docling-layout-heron",
revision="main", revision="main",
model_path="", model_path="",
) )
DOCLING_LAYOUT_HERON_101 = LayoutModelConfig( DOCLING_LAYOUT_HERON_101 = LayoutModelConfig(
name="docling_layout_heron_101", name="docling_layout_heron_101",
repo_id="ds4sd/docling-layout-heron-101", repo_id="docling-project/docling-layout-heron-101",
revision="main", revision="main",
model_path="", model_path="",
) )
DOCLING_LAYOUT_EGRET_MEDIUM = LayoutModelConfig( DOCLING_LAYOUT_EGRET_MEDIUM = LayoutModelConfig(
name="docling_layout_egret_medium", name="docling_layout_egret_medium",
repo_id="ds4sd/docling-layout-egret-medium", repo_id="docling-project/docling-layout-egret-medium",
revision="main", revision="main",
model_path="", model_path="",
) )
DOCLING_LAYOUT_EGRET_LARGE = LayoutModelConfig( DOCLING_LAYOUT_EGRET_LARGE = LayoutModelConfig(
name="docling_layout_egret_large", name="docling_layout_egret_large",
repo_id="ds4sd/docling-layout-egret-large", repo_id="docling-project/docling-layout-egret-large",
revision="main", revision="main",
model_path="", model_path="",
) )
DOCLING_LAYOUT_EGRET_XLARGE = LayoutModelConfig( DOCLING_LAYOUT_EGRET_XLARGE = LayoutModelConfig(
name="docling_layout_egret_xlarge", name="docling_layout_egret_xlarge",
repo_id="ds4sd/docling-layout-egret-xlarge", repo_id="docling-project/docling-layout-egret-xlarge",
revision="main", revision="main",
model_path="", model_path="",
) )

View File

@@ -57,7 +57,7 @@ GRANITEDOCLING_MLX = InlineVlmOptions(
# SmolDocling # SmolDocling
SMOLDOCLING_MLX = InlineVlmOptions( SMOLDOCLING_MLX = InlineVlmOptions(
repo_id="ds4sd/SmolDocling-256M-preview-mlx-bf16", repo_id="docling-project/SmolDocling-256M-preview-mlx-bf16",
prompt="Convert this page to docling.", prompt="Convert this page to docling.",
response_format=ResponseFormat.DOCTAGS, response_format=ResponseFormat.DOCTAGS,
inference_framework=InferenceFramework.MLX, inference_framework=InferenceFramework.MLX,
@@ -68,7 +68,7 @@ SMOLDOCLING_MLX = InlineVlmOptions(
) )
SMOLDOCLING_TRANSFORMERS = InlineVlmOptions( SMOLDOCLING_TRANSFORMERS = InlineVlmOptions(
repo_id="ds4sd/SmolDocling-256M-preview", repo_id="docling-project/SmolDocling-256M-preview",
prompt="Convert this page to docling.", prompt="Convert this page to docling.",
response_format=ResponseFormat.DOCTAGS, response_format=ResponseFormat.DOCTAGS,
inference_framework=InferenceFramework.TRANSFORMERS, inference_framework=InferenceFramework.TRANSFORMERS,
@@ -84,7 +84,7 @@ SMOLDOCLING_TRANSFORMERS = InlineVlmOptions(
) )
SMOLDOCLING_VLLM = InlineVlmOptions( SMOLDOCLING_VLLM = InlineVlmOptions(
repo_id="ds4sd/SmolDocling-256M-preview", repo_id="docling-project/SmolDocling-256M-preview",
prompt="Convert this page to docling.", prompt="Convert this page to docling.",
response_format=ResponseFormat.DOCTAGS, response_format=ResponseFormat.DOCTAGS,
inference_framework=InferenceFramework.VLLM, inference_framework=InferenceFramework.VLLM,

View File

@@ -65,7 +65,7 @@ class CodeFormulaModel(BaseItemAndImageEnrichmentModel):
Processes the given batch of elements and enriches them with predictions. Processes the given batch of elements and enriches them with predictions.
""" """
_model_repo_folder = "ds4sd--CodeFormulaV2" _model_repo_folder = "docling-project--CodeFormulaV2"
elements_batch_size = 5 elements_batch_size = 5
images_scale = 1.67 # = 120 dpi, aligned with training data resolution images_scale = 1.67 # = 120 dpi, aligned with training data resolution
expansion_factor = 0.18 expansion_factor = 0.18
@@ -121,7 +121,7 @@ class CodeFormulaModel(BaseItemAndImageEnrichmentModel):
progress: bool = False, progress: bool = False,
) -> Path: ) -> Path:
return download_hf_model( return download_hf_model(
repo_id="ds4sd/CodeFormulaV2", repo_id="docling-project/CodeFormulaV2",
revision="main", revision="main",
local_dir=local_dir, local_dir=local_dir,
force=force, force=force,

View File

@@ -59,7 +59,7 @@ class DocumentPictureClassifier(BaseItemAndImageEnrichmentModel):
Processes a batch of elements and adds classification annotations. Processes a batch of elements and adds classification annotations.
""" """
_model_repo_folder = "ds4sd--DocumentFigureClassifier" _model_repo_folder = "docling-project--DocumentFigureClassifier"
images_scale = 2 images_scale = 2
def __init__( def __init__(
@@ -108,7 +108,7 @@ class DocumentPictureClassifier(BaseItemAndImageEnrichmentModel):
local_dir: Optional[Path] = None, force: bool = False, progress: bool = False local_dir: Optional[Path] = None, force: bool = False, progress: bool = False
) -> Path: ) -> Path:
return download_hf_model( return download_hf_model(
repo_id="ds4sd/DocumentFigureClassifier", repo_id="docling-project/DocumentFigureClassifier",
revision="v1.0.1", revision="v1.0.1",
local_dir=local_dir, local_dir=local_dir,
force=force, force=force,

View File

@@ -27,7 +27,7 @@ from docling.utils.profiling import TimeRecorder
class TableStructureModel(BasePageModel): class TableStructureModel(BasePageModel):
_model_repo_folder = "ds4sd--docling-models" _model_repo_folder = "docling-project--docling-models"
_model_path = "model_artifacts/tableformer" _model_path = "model_artifacts/tableformer"
def __init__( def __init__(
@@ -93,7 +93,7 @@ class TableStructureModel(BasePageModel):
local_dir: Optional[Path] = None, force: bool = False, progress: bool = False local_dir: Optional[Path] = None, force: bool = False, progress: bool = False
) -> Path: ) -> Path:
return download_hf_model( return download_hf_model(
repo_id="ds4sd/docling-models", repo_id="docling-project/docling-models",
revision="v2.3.0", revision="v2.3.0",
local_dir=local_dir, local_dir=local_dir,
force=force, force=force,