mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 12:48:28 +00:00
chore: replace ds4sd with docling-project (#2596)
Signed-off-by: Peter Staar <taa@zurich.ibm.com>
This commit is contained in:
committed by
GitHub
parent
c21327cd74
commit
06ae8ae29a
@@ -140,7 +140,7 @@ def download_hf_repo(
|
||||
models: Annotated[
|
||||
list[str],
|
||||
typer.Argument(
|
||||
help="Specific models to download from HuggingFace identified by their repo id. For example: ds4sd/docling-models .",
|
||||
help="Specific models to download from HuggingFace identified by their repo id. For example: docling-project/docling-models .",
|
||||
),
|
||||
],
|
||||
output_dir: Annotated[
|
||||
|
||||
@@ -31,42 +31,42 @@ class LayoutModelConfig(BaseModel):
|
||||
# Default Docling Layout Model
|
||||
DOCLING_LAYOUT_V2 = LayoutModelConfig(
|
||||
name="docling_layout_v2",
|
||||
repo_id="ds4sd/docling-layout-old",
|
||||
repo_id="docling-project/docling-layout-old",
|
||||
revision="main",
|
||||
model_path="",
|
||||
)
|
||||
|
||||
DOCLING_LAYOUT_HERON = LayoutModelConfig(
|
||||
name="docling_layout_heron",
|
||||
repo_id="ds4sd/docling-layout-heron",
|
||||
repo_id="docling-project/docling-layout-heron",
|
||||
revision="main",
|
||||
model_path="",
|
||||
)
|
||||
|
||||
DOCLING_LAYOUT_HERON_101 = LayoutModelConfig(
|
||||
name="docling_layout_heron_101",
|
||||
repo_id="ds4sd/docling-layout-heron-101",
|
||||
repo_id="docling-project/docling-layout-heron-101",
|
||||
revision="main",
|
||||
model_path="",
|
||||
)
|
||||
|
||||
DOCLING_LAYOUT_EGRET_MEDIUM = LayoutModelConfig(
|
||||
name="docling_layout_egret_medium",
|
||||
repo_id="ds4sd/docling-layout-egret-medium",
|
||||
repo_id="docling-project/docling-layout-egret-medium",
|
||||
revision="main",
|
||||
model_path="",
|
||||
)
|
||||
|
||||
DOCLING_LAYOUT_EGRET_LARGE = LayoutModelConfig(
|
||||
name="docling_layout_egret_large",
|
||||
repo_id="ds4sd/docling-layout-egret-large",
|
||||
repo_id="docling-project/docling-layout-egret-large",
|
||||
revision="main",
|
||||
model_path="",
|
||||
)
|
||||
|
||||
DOCLING_LAYOUT_EGRET_XLARGE = LayoutModelConfig(
|
||||
name="docling_layout_egret_xlarge",
|
||||
repo_id="ds4sd/docling-layout-egret-xlarge",
|
||||
repo_id="docling-project/docling-layout-egret-xlarge",
|
||||
revision="main",
|
||||
model_path="",
|
||||
)
|
||||
|
||||
@@ -57,7 +57,7 @@ GRANITEDOCLING_MLX = InlineVlmOptions(
|
||||
|
||||
# SmolDocling
|
||||
SMOLDOCLING_MLX = InlineVlmOptions(
|
||||
repo_id="ds4sd/SmolDocling-256M-preview-mlx-bf16",
|
||||
repo_id="docling-project/SmolDocling-256M-preview-mlx-bf16",
|
||||
prompt="Convert this page to docling.",
|
||||
response_format=ResponseFormat.DOCTAGS,
|
||||
inference_framework=InferenceFramework.MLX,
|
||||
@@ -68,7 +68,7 @@ SMOLDOCLING_MLX = InlineVlmOptions(
|
||||
)
|
||||
|
||||
SMOLDOCLING_TRANSFORMERS = InlineVlmOptions(
|
||||
repo_id="ds4sd/SmolDocling-256M-preview",
|
||||
repo_id="docling-project/SmolDocling-256M-preview",
|
||||
prompt="Convert this page to docling.",
|
||||
response_format=ResponseFormat.DOCTAGS,
|
||||
inference_framework=InferenceFramework.TRANSFORMERS,
|
||||
@@ -84,7 +84,7 @@ SMOLDOCLING_TRANSFORMERS = InlineVlmOptions(
|
||||
)
|
||||
|
||||
SMOLDOCLING_VLLM = InlineVlmOptions(
|
||||
repo_id="ds4sd/SmolDocling-256M-preview",
|
||||
repo_id="docling-project/SmolDocling-256M-preview",
|
||||
prompt="Convert this page to docling.",
|
||||
response_format=ResponseFormat.DOCTAGS,
|
||||
inference_framework=InferenceFramework.VLLM,
|
||||
|
||||
@@ -65,7 +65,7 @@ class CodeFormulaModel(BaseItemAndImageEnrichmentModel):
|
||||
Processes the given batch of elements and enriches them with predictions.
|
||||
"""
|
||||
|
||||
_model_repo_folder = "ds4sd--CodeFormulaV2"
|
||||
_model_repo_folder = "docling-project--CodeFormulaV2"
|
||||
elements_batch_size = 5
|
||||
images_scale = 1.67 # = 120 dpi, aligned with training data resolution
|
||||
expansion_factor = 0.18
|
||||
@@ -121,7 +121,7 @@ class CodeFormulaModel(BaseItemAndImageEnrichmentModel):
|
||||
progress: bool = False,
|
||||
) -> Path:
|
||||
return download_hf_model(
|
||||
repo_id="ds4sd/CodeFormulaV2",
|
||||
repo_id="docling-project/CodeFormulaV2",
|
||||
revision="main",
|
||||
local_dir=local_dir,
|
||||
force=force,
|
||||
|
||||
@@ -59,7 +59,7 @@ class DocumentPictureClassifier(BaseItemAndImageEnrichmentModel):
|
||||
Processes a batch of elements and adds classification annotations.
|
||||
"""
|
||||
|
||||
_model_repo_folder = "ds4sd--DocumentFigureClassifier"
|
||||
_model_repo_folder = "docling-project--DocumentFigureClassifier"
|
||||
images_scale = 2
|
||||
|
||||
def __init__(
|
||||
@@ -108,7 +108,7 @@ class DocumentPictureClassifier(BaseItemAndImageEnrichmentModel):
|
||||
local_dir: Optional[Path] = None, force: bool = False, progress: bool = False
|
||||
) -> Path:
|
||||
return download_hf_model(
|
||||
repo_id="ds4sd/DocumentFigureClassifier",
|
||||
repo_id="docling-project/DocumentFigureClassifier",
|
||||
revision="v1.0.1",
|
||||
local_dir=local_dir,
|
||||
force=force,
|
||||
|
||||
@@ -27,7 +27,7 @@ from docling.utils.profiling import TimeRecorder
|
||||
|
||||
|
||||
class TableStructureModel(BasePageModel):
|
||||
_model_repo_folder = "ds4sd--docling-models"
|
||||
_model_repo_folder = "docling-project--docling-models"
|
||||
_model_path = "model_artifacts/tableformer"
|
||||
|
||||
def __init__(
|
||||
@@ -93,7 +93,7 @@ class TableStructureModel(BasePageModel):
|
||||
local_dir: Optional[Path] = None, force: bool = False, progress: bool = False
|
||||
) -> Path:
|
||||
return download_hf_model(
|
||||
repo_id="ds4sd/docling-models",
|
||||
repo_id="docling-project/docling-models",
|
||||
revision="v2.3.0",
|
||||
local_dir=local_dir,
|
||||
force=force,
|
||||
|
||||
Reference in New Issue
Block a user