Merge branch 'main' into dev/add-other-vlm-models

2025-12-16 08:38:14 +00:00 · 2025-05-13 06:08:26 +02:00
parent 7fbe021359 0d0fa6cbe3
commit ee01e3cff0
4 changed files with 48 additions and 1 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,22 @@
 ## [v2.31.1](https://github.com/docling-project/docling/releases/tag/v2.31.1) - 2025-05-12
 ### Fix
 * Add smoldocling in download utils ([#1577](https://github.com/docling-project/docling/issues/1577)) ([`127e386`](https://github.com/docling-project/docling/commit/127e38646fd7f23fcda0e392e756fe27f123bd78))
 * **HTML:** Handle row spans in header rows ([#1536](https://github.com/docling-project/docling/issues/1536)) ([`776e7ec`](https://github.com/docling-project/docling/commit/776e7ecf9ac93d62c66b03f33e5c8560e81b6fb3))
 * Mime error in document streams ([#1523](https://github.com/docling-project/docling/issues/1523)) ([`f1658ed`](https://github.com/docling-project/docling/commit/f1658edbad5c7205bb457322d2c89f7f4d8a4659))
 * Usage of hashlib for FIPS ([#1512](https://github.com/docling-project/docling/issues/1512)) ([`7c70573`](https://github.com/docling-project/docling/commit/7c705739f9db1cfc6c0a502fd5ba8b2093376d7f))
 * Guard against attribute errors in TesseractOcrModel __del__ ([#1494](https://github.com/docling-project/docling/issues/1494)) ([`4ab7e9d`](https://github.com/docling-project/docling/commit/4ab7e9ddfb9d8fd0abc483efb70e701447a602c5))
 * Enable cuda_use_flash_attention2 for PictureDescriptionVlmModel ([#1496](https://github.com/docling-project/docling/issues/1496)) ([`cc45396`](https://github.com/docling-project/docling/commit/cc453961a9196c79f6428305b9007402e448f300))
 * Updated the time-recorder label for reading order ([#1490](https://github.com/docling-project/docling/issues/1490)) ([`976e92e`](https://github.com/docling-project/docling/commit/976e92e289a414b6b70c3e3ca37a60c85fa12535))
 * Incorrect scaling of TableModel bboxes when do_cell_matching is False ([#1459](https://github.com/docling-project/docling/issues/1459)) ([`94d66a0`](https://github.com/docling-project/docling/commit/94d66a076559c4e48017bd619508cfeef104079b))
 ### Documentation
 * Update links in data_prep_kit ([#1559](https://github.com/docling-project/docling/issues/1559)) ([`844babb`](https://github.com/docling-project/docling/commit/844babb39034b39d9c4edcc3f145684991cda174))
 * Add serialization docs, update chunking docs ([#1556](https://github.com/docling-project/docling/issues/1556)) ([`3220a59`](https://github.com/docling-project/docling/commit/3220a592e720174940a3b958555f90352d7320d8))
 * Update supported formats guide ([#1463](https://github.com/docling-project/docling/issues/1463)) ([`3afbe6c`](https://github.com/docling-project/docling/commit/3afbe6c9695d52cf6ed8b48b2f403df7d53342e5))
 ## [v2.31.0](https://github.com/docling-project/docling/releases/tag/v2.31.0) - 2025-04-25
 ### Feature
--- a/docling/cli/models.py
+++ b/docling/cli/models.py
@@ -32,6 +32,8 @@ class _AvailableModels(str, Enum):
    CODE_FORMULA = "code_formula"
    PICTURE_CLASSIFIER = "picture_classifier"
    SMOLVLM = "smolvlm"
    SMOLDOCLING = "smoldocling"
    SMOLDOCLING_MLX = "smoldocling_mlx"
    GRANITE_VISION = "granite_vision"
    EASYOCR = "easyocr"
@@ -105,6 +107,8 @@ def download(
        with_code_formula=_AvailableModels.CODE_FORMULA in to_download,
        with_picture_classifier=_AvailableModels.PICTURE_CLASSIFIER in to_download,
        with_smolvlm=_AvailableModels.SMOLVLM in to_download,
        with_smoldocling=_AvailableModels.SMOLDOCLING in to_download,
        with_smoldocling_mlx=_AvailableModels.SMOLDOCLING_MLX in to_download,
        with_granite_vision=_AvailableModels.GRANITE_VISION in to_download,
        with_easyocr=_AvailableModels.EASYOCR in to_download,
    )
--- a/docling/utils/model_downloader.py
+++ b/docling/utils/model_downloader.py
@@ -4,12 +4,15 @@ from typing import Optional
 from docling.datamodel.pipeline_options import (
    granite_picture_description,
    smoldocling_vlm_conversion_options,
    smoldocling_vlm_mlx_conversion_options,
    smolvlm_picture_description,
 )
 from docling.datamodel.settings import settings
 from docling.models.code_formula_model import CodeFormulaModel
 from docling.models.document_picture_classifier import DocumentPictureClassifier
 from docling.models.easyocr_model import EasyOcrModel
 from docling.models.hf_vlm_model import HuggingFaceVlmModel
 from docling.models.layout_model import LayoutModel
 from docling.models.picture_description_vlm_model import PictureDescriptionVlmModel
 from docling.models.table_structure_model import TableStructureModel
@@ -27,6 +30,8 @@ def download_models(
    with_code_formula: bool = True,
    with_picture_classifier: bool = True,
    with_smolvlm: bool = False,
    with_smoldocling: bool = False,
    with_smoldocling_mlx: bool = False,
    with_granite_vision: bool = False,
    with_easyocr: bool = True,
 ):
@@ -77,6 +82,25 @@ def download_models(
            progress=progress,
        )
    if with_smoldocling:
        _log.info("Downloading SmolDocling model...")
        HuggingFaceVlmModel.download_models(
            repo_id=smoldocling_vlm_conversion_options.repo_id,
            local_dir=output_dir / smoldocling_vlm_conversion_options.repo_cache_folder,
            force=force,
            progress=progress,
        )
    if with_smoldocling_mlx:
        _log.info("Downloading SmolDocling MLX model...")
        HuggingFaceVlmModel.download_models(
            repo_id=smoldocling_vlm_mlx_conversion_options.repo_id,
            local_dir=output_dir
            / smoldocling_vlm_mlx_conversion_options.repo_cache_folder,
            force=force,
            progress=progress,
        )
    if with_granite_vision:
        _log.info("Downloading Granite Vision model...")
        PictureDescriptionVlmModel.download_models(
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "docling"
-version = "2.31.0"  # DO NOT EDIT, updated automatically
+version = "2.31.1"  # DO NOT EDIT, updated automatically
 description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications."
 authors = [
  "Christoph Auer <cau@zurich.ibm.com>",