Update minimal smoldocling example

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
2025-07-27 04:24:45 +00:00 · 2025-02-12 17:07:00 +01:00 · 2025-02-12 17:07:00 +01:00 · 6f9f4f4aee
commit 6f9f4f4aee
parent b1df461ca8
3 changed files with 9 additions and 30 deletions
--- a/docling/datamodel/pipeline_options.py
+++ b/docling/datamodel/pipeline_options.py
@ -256,7 +256,8 @@ granite_picture_description = PictureDescriptionVlmOptions(

 class SmolDoclingOptions(BaseModel):
    artifacts_path: str = ""
-    question: str = "Perform Layout Analysis."
+    question: str = "Convert this page to docling."  # "Perform Layout Analysis."
+
    load_in_8bit: bool = True
    llm_int8_threshold: float = 6.0
    quantized: bool = False
--- a/docling/pipeline/vlm_pipeline.py
+++ b/docling/pipeline/vlm_pipeline.py
@ -113,24 +113,6 @@ class VlmPipeline(PaginatedPipeline):
            # Other models working on `NodeItem` elements in the DoclingDocument
        ]

-    @staticmethod
-    def download_models_hf(
-        local_dir: Optional[Path] = None, force: bool = False
-    ) -> Path:
-        from huggingface_hub import snapshot_download
-        from huggingface_hub.utils import disable_progress_bars
-
-        disable_progress_bars()
-
-        # TODO: download the correct model (private repo)
-        download_path = snapshot_download(
-            repo_id="ds4sd/xxx",
-            force_download=force,
-            local_dir=local_dir,
-        )
-
-        return Path(download_path)
-
    def initialize_page(self, conv_res: ConversionResult, page: Page) -> Page:
        with TimeRecorder(conv_res, "page_init"):
            page._backend = conv_res.input._backend.load_page(page.page_no)  # type: ignore
--- a/docs/examples/minimal_smol_docling.py
+++ b/docs/examples/minimal_smol_docling.py
@ -1,14 +1,11 @@
 import json
-import os
 import time
 from pathlib import Path
-from urllib.parse import urlparse

 import yaml

-from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
 from docling.datamodel.base_models import InputFormat
-from docling.datamodel.pipeline_options import PdfPipelineOptions, SmolDoclingOptions
+from docling.datamodel.pipeline_options import SmolDoclingOptions, VlmPipelineOptions
 from docling.document_converter import DocumentConverter, PdfFormatOption
 from docling.pipeline.vlm_pipeline import VlmPipeline

@ -18,18 +15,17 @@ sources = [
    # "tests/data/2305.03393v1-pg9.pdf",
 ]

-pipeline_options = PdfPipelineOptions()
+pipeline_options = VlmPipelineOptions()  # artifacts_path="~/local_model_artifacts/")
 pipeline_options.generate_page_images = True
 # If force_backend_text = True, text from backend will be used instead of generated text
 pipeline_options.force_backend_text = False
-# pipeline_options.artifacts_path = "model_artifacts/SmolDocling_250M_0.9"
+

 vlm_options = SmolDoclingOptions(
-    artifacts_path="model_artifacts/SmolDocling_250M_0.9",
-    question="Convert this page to docling.",
-    load_in_8bit=True,
-    llm_int8_threshold=6.0,
-    quantized=False,
+    # question="Convert this page to docling.",
+    # load_in_8bit=True,
+    # llm_int8_threshold=6.0,
+    # quantized=False,
 )

 pipeline_options.vlm_options = vlm_options