Merge branch 'main' into dev/add-other-vlm-models

This commit is contained in:
Peter Staar 2025-05-13 06:08:26 +02:00
commit ee01e3cff0
4 changed files with 48 additions and 1 deletions

View File

@ -1,3 +1,22 @@
## [v2.31.1](https://github.com/docling-project/docling/releases/tag/v2.31.1) - 2025-05-12
### Fix
* Add smoldocling in download utils ([#1577](https://github.com/docling-project/docling/issues/1577)) ([`127e386`](https://github.com/docling-project/docling/commit/127e38646fd7f23fcda0e392e756fe27f123bd78))
* **HTML:** Handle row spans in header rows ([#1536](https://github.com/docling-project/docling/issues/1536)) ([`776e7ec`](https://github.com/docling-project/docling/commit/776e7ecf9ac93d62c66b03f33e5c8560e81b6fb3))
* Mime error in document streams ([#1523](https://github.com/docling-project/docling/issues/1523)) ([`f1658ed`](https://github.com/docling-project/docling/commit/f1658edbad5c7205bb457322d2c89f7f4d8a4659))
* Usage of hashlib for FIPS ([#1512](https://github.com/docling-project/docling/issues/1512)) ([`7c70573`](https://github.com/docling-project/docling/commit/7c705739f9db1cfc6c0a502fd5ba8b2093376d7f))
* Guard against attribute errors in TesseractOcrModel __del__ ([#1494](https://github.com/docling-project/docling/issues/1494)) ([`4ab7e9d`](https://github.com/docling-project/docling/commit/4ab7e9ddfb9d8fd0abc483efb70e701447a602c5))
* Enable cuda_use_flash_attention2 for PictureDescriptionVlmModel ([#1496](https://github.com/docling-project/docling/issues/1496)) ([`cc45396`](https://github.com/docling-project/docling/commit/cc453961a9196c79f6428305b9007402e448f300))
* Updated the time-recorder label for reading order ([#1490](https://github.com/docling-project/docling/issues/1490)) ([`976e92e`](https://github.com/docling-project/docling/commit/976e92e289a414b6b70c3e3ca37a60c85fa12535))
* Incorrect scaling of TableModel bboxes when do_cell_matching is False ([#1459](https://github.com/docling-project/docling/issues/1459)) ([`94d66a0`](https://github.com/docling-project/docling/commit/94d66a076559c4e48017bd619508cfeef104079b))
### Documentation
* Update links in data_prep_kit ([#1559](https://github.com/docling-project/docling/issues/1559)) ([`844babb`](https://github.com/docling-project/docling/commit/844babb39034b39d9c4edcc3f145684991cda174))
* Add serialization docs, update chunking docs ([#1556](https://github.com/docling-project/docling/issues/1556)) ([`3220a59`](https://github.com/docling-project/docling/commit/3220a592e720174940a3b958555f90352d7320d8))
* Update supported formats guide ([#1463](https://github.com/docling-project/docling/issues/1463)) ([`3afbe6c`](https://github.com/docling-project/docling/commit/3afbe6c9695d52cf6ed8b48b2f403df7d53342e5))
## [v2.31.0](https://github.com/docling-project/docling/releases/tag/v2.31.0) - 2025-04-25 ## [v2.31.0](https://github.com/docling-project/docling/releases/tag/v2.31.0) - 2025-04-25
### Feature ### Feature

View File

@ -32,6 +32,8 @@ class _AvailableModels(str, Enum):
CODE_FORMULA = "code_formula" CODE_FORMULA = "code_formula"
PICTURE_CLASSIFIER = "picture_classifier" PICTURE_CLASSIFIER = "picture_classifier"
SMOLVLM = "smolvlm" SMOLVLM = "smolvlm"
SMOLDOCLING = "smoldocling"
SMOLDOCLING_MLX = "smoldocling_mlx"
GRANITE_VISION = "granite_vision" GRANITE_VISION = "granite_vision"
EASYOCR = "easyocr" EASYOCR = "easyocr"
@ -105,6 +107,8 @@ def download(
with_code_formula=_AvailableModels.CODE_FORMULA in to_download, with_code_formula=_AvailableModels.CODE_FORMULA in to_download,
with_picture_classifier=_AvailableModels.PICTURE_CLASSIFIER in to_download, with_picture_classifier=_AvailableModels.PICTURE_CLASSIFIER in to_download,
with_smolvlm=_AvailableModels.SMOLVLM in to_download, with_smolvlm=_AvailableModels.SMOLVLM in to_download,
with_smoldocling=_AvailableModels.SMOLDOCLING in to_download,
with_smoldocling_mlx=_AvailableModels.SMOLDOCLING_MLX in to_download,
with_granite_vision=_AvailableModels.GRANITE_VISION in to_download, with_granite_vision=_AvailableModels.GRANITE_VISION in to_download,
with_easyocr=_AvailableModels.EASYOCR in to_download, with_easyocr=_AvailableModels.EASYOCR in to_download,
) )

View File

@ -4,12 +4,15 @@ from typing import Optional
from docling.datamodel.pipeline_options import ( from docling.datamodel.pipeline_options import (
granite_picture_description, granite_picture_description,
smoldocling_vlm_conversion_options,
smoldocling_vlm_mlx_conversion_options,
smolvlm_picture_description, smolvlm_picture_description,
) )
from docling.datamodel.settings import settings from docling.datamodel.settings import settings
from docling.models.code_formula_model import CodeFormulaModel from docling.models.code_formula_model import CodeFormulaModel
from docling.models.document_picture_classifier import DocumentPictureClassifier from docling.models.document_picture_classifier import DocumentPictureClassifier
from docling.models.easyocr_model import EasyOcrModel from docling.models.easyocr_model import EasyOcrModel
from docling.models.hf_vlm_model import HuggingFaceVlmModel
from docling.models.layout_model import LayoutModel from docling.models.layout_model import LayoutModel
from docling.models.picture_description_vlm_model import PictureDescriptionVlmModel from docling.models.picture_description_vlm_model import PictureDescriptionVlmModel
from docling.models.table_structure_model import TableStructureModel from docling.models.table_structure_model import TableStructureModel
@ -27,6 +30,8 @@ def download_models(
with_code_formula: bool = True, with_code_formula: bool = True,
with_picture_classifier: bool = True, with_picture_classifier: bool = True,
with_smolvlm: bool = False, with_smolvlm: bool = False,
with_smoldocling: bool = False,
with_smoldocling_mlx: bool = False,
with_granite_vision: bool = False, with_granite_vision: bool = False,
with_easyocr: bool = True, with_easyocr: bool = True,
): ):
@ -77,6 +82,25 @@ def download_models(
progress=progress, progress=progress,
) )
if with_smoldocling:
_log.info("Downloading SmolDocling model...")
HuggingFaceVlmModel.download_models(
repo_id=smoldocling_vlm_conversion_options.repo_id,
local_dir=output_dir / smoldocling_vlm_conversion_options.repo_cache_folder,
force=force,
progress=progress,
)
if with_smoldocling_mlx:
_log.info("Downloading SmolDocling MLX model...")
HuggingFaceVlmModel.download_models(
repo_id=smoldocling_vlm_mlx_conversion_options.repo_id,
local_dir=output_dir
/ smoldocling_vlm_mlx_conversion_options.repo_cache_folder,
force=force,
progress=progress,
)
if with_granite_vision: if with_granite_vision:
_log.info("Downloading Granite Vision model...") _log.info("Downloading Granite Vision model...")
PictureDescriptionVlmModel.download_models( PictureDescriptionVlmModel.download_models(

View File

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "docling" name = "docling"
version = "2.31.0" # DO NOT EDIT, updated automatically version = "2.31.1" # DO NOT EDIT, updated automatically
description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications." description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications."
authors = [ authors = [
"Christoph Auer <cau@zurich.ibm.com>", "Christoph Auer <cau@zurich.ibm.com>",