feat: Support for Python 3.14 (#2530)

* fix dependencies for py314

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* add metadata and CI tests

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* add back gliner

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* update error message about python 3.14 availability

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* skip tests which cannot run on py 3.14

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* fix lint

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* remove vllm from py 3.14 deps

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* safe import for vllm

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* update lock

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* remove torch.compile()

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* update checkbox results after docling-core changes

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* cannot run mlx example in CI

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* add test for rapidocr backends and skip onnxruntime on py3.14

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* fix other occurances of torch.compile()

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* allow torch.compile for Python <3.14. proper support will be introduced with new torch releases

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

---------

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi
2025-10-28 14:32:15 +01:00
committed by GitHub
parent 9a6fdf936b
commit cdffb47b9a
13 changed files with 2278 additions and 1361 deletions

View File

@@ -20,7 +20,7 @@ env:
tests/test_asr_pipeline.py tests/test_asr_pipeline.py
tests/test_threaded_pipeline.py tests/test_threaded_pipeline.py
PYTEST_TO_SKIP: |- PYTEST_TO_SKIP: |-
EXAMPLES_TO_SKIP: '^(batch_convert|compare_vlm_models|minimal|minimal_vlm_pipeline|minimal_asr_pipeline|export_multimodal|custom_convert|develop_picture_enrichment|rapidocr_with_custom_models|offline_convert|pictures_description|pictures_description_api|vlm_pipeline_api_model|granitedocling_repetition_stopping)\.py$' EXAMPLES_TO_SKIP: '^(batch_convert|compare_vlm_models|minimal|minimal_vlm_pipeline|minimal_asr_pipeline|export_multimodal|custom_convert|develop_picture_enrichment|rapidocr_with_custom_models|offline_convert|pictures_description|pictures_description_api|vlm_pipeline_api_model|granitedocling_repetition_stopping|mlx_whisper_example)\.py$'
jobs: jobs:
lint: lint:
@@ -62,7 +62,7 @@ jobs:
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13'] python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', '3.14']
steps: steps:
- uses: actions/checkout@v5 - uses: actions/checkout@v5
@@ -129,7 +129,7 @@ jobs:
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13'] python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', '3.14']
steps: steps:
- uses: actions/checkout@v5 - uses: actions/checkout@v5
@@ -201,7 +201,7 @@ jobs:
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13'] python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', '3.14']
steps: steps:
- uses: actions/checkout@v5 - uses: actions/checkout@v5

View File

@@ -738,10 +738,15 @@ def convert( # noqa: C901
pipeline_options.vlm_options = SMOLDOCLING_MLX pipeline_options.vlm_options = SMOLDOCLING_MLX
except ImportError: except ImportError:
_log.warning( if sys.version_info < (3, 14):
"To run SmolDocling faster, please install mlx-vlm:\n" _log.warning(
"pip install mlx-vlm" "To run SmolDocling faster, please install mlx-vlm:\n"
) "pip install mlx-vlm"
)
else:
_log.warning(
"You can run SmolDocling faster with MLX support, but it is unfortunately not yet available on Python 3.14."
)
elif vlm_model == VlmModelType.GRANITEDOCLING: elif vlm_model == VlmModelType.GRANITEDOCLING:
pipeline_options.vlm_options = GRANITEDOCLING_TRANSFORMERS pipeline_options.vlm_options = GRANITEDOCLING_TRANSFORMERS
@@ -751,10 +756,16 @@ def convert( # noqa: C901
pipeline_options.vlm_options = GRANITEDOCLING_MLX pipeline_options.vlm_options = GRANITEDOCLING_MLX
except ImportError: except ImportError:
_log.warning( if sys.version_info < (3, 14):
"To run GraniteDocling faster, please install mlx-vlm:\n" _log.warning(
"pip install mlx-vlm" "To run GraniteDocling faster, please install mlx-vlm:\n"
) "pip install mlx-vlm"
)
else:
_log.warning(
"You can run GraniteDocling faster with MLX support, but it is unfortunately not yet available on Python 3.14."
)
elif vlm_model == VlmModelType.SMOLDOCLING_VLLM: elif vlm_model == VlmModelType.SMOLDOCLING_VLLM:
pipeline_options.vlm_options = SMOLDOCLING_VLLM pipeline_options.vlm_options = SMOLDOCLING_VLLM

View File

@@ -1,3 +1,4 @@
import sys
import threading import threading
from collections.abc import Iterable from collections.abc import Iterable
from pathlib import Path from pathlib import Path
@@ -75,7 +76,10 @@ class PictureDescriptionVlmModel(
else "sdpa" else "sdpa"
), ),
) )
self.model = torch.compile(self.model) # type: ignore if sys.version_info < (3, 14):
self.model = torch.compile(self.model) # type: ignore
else:
self.model.eval()
self.provenance = f"{self.options.repo_id}" self.provenance = f"{self.options.repo_id}"

View File

@@ -1,5 +1,6 @@
import importlib.metadata import importlib.metadata
import logging import logging
import sys
import time import time
from collections.abc import Iterable from collections.abc import Iterable
from pathlib import Path from pathlib import Path
@@ -129,7 +130,10 @@ class HuggingFaceTransformersVlmModel(BaseVlmPageModel, HuggingFaceModelDownload
trust_remote_code=vlm_options.trust_remote_code, trust_remote_code=vlm_options.trust_remote_code,
revision=vlm_options.revision, revision=vlm_options.revision,
) )
self.vlm_model = torch.compile(self.vlm_model) # type: ignore if sys.version_info < (3, 14):
self.vlm_model = torch.compile(self.vlm_model) # type: ignore
else:
self.vlm_model.eval()
# Load generation config # Load generation config
self.generation_config = GenerationConfig.from_pretrained( self.generation_config = GenerationConfig.from_pretrained(

View File

@@ -50,9 +50,14 @@ class HuggingFaceMlxModel(BaseVlmPageModel, HuggingFaceModelDownloadMixin):
from mlx_vlm.prompt_utils import apply_chat_template # type: ignore from mlx_vlm.prompt_utils import apply_chat_template # type: ignore
from mlx_vlm.utils import load_config # type: ignore from mlx_vlm.utils import load_config # type: ignore
except ImportError: except ImportError:
raise ImportError( if sys.version_info < (3, 14):
"mlx-vlm is not installed. Please install it via `pip install mlx-vlm` to use MLX VLM models." raise ImportError(
) "mlx-vlm is not installed. Please install it via `pip install mlx-vlm` to use MLX VLM models."
)
else:
raise ImportError(
"mlx-vlm is not installed. It is not yet available on Python 3.14."
)
repo_cache_folder = vlm_options.repo_id.replace("/", "--") repo_cache_folder = vlm_options.repo_id.replace("/", "--")

View File

@@ -1,4 +1,5 @@
import logging import logging
import sys
import time import time
from collections.abc import Iterable from collections.abc import Iterable
from pathlib import Path from pathlib import Path
@@ -153,7 +154,10 @@ class NuExtractTransformersModel(BaseVlmModel, HuggingFaceModelDownloadMixin):
), ),
trust_remote_code=vlm_options.trust_remote_code, trust_remote_code=vlm_options.trust_remote_code,
) )
self.vlm_model = torch.compile(self.vlm_model) # type: ignore if sys.version_info < (3, 14):
self.vlm_model = torch.compile(self.vlm_model) # type: ignore
else:
self.vlm_model.eval()
# Load generation config # Load generation config
self.generation_config = GenerationConfig.from_pretrained(artifacts_path) self.generation_config = GenerationConfig.from_pretrained(artifacts_path)

View File

@@ -1,4 +1,5 @@
import logging import logging
import sys
import time import time
from collections.abc import Iterable from collections.abc import Iterable
from pathlib import Path from pathlib import Path
@@ -100,7 +101,18 @@ class VllmVlmModel(BaseVlmPageModel, HuggingFaceModelDownloadMixin):
return return
from transformers import AutoProcessor from transformers import AutoProcessor
from vllm import LLM, SamplingParams
try:
from vllm import LLM, SamplingParams
except ImportError:
if sys.version_info < (3, 14):
raise ImportError(
"vllm is not installed. Please install it via `pip install vllm`."
)
else:
raise ImportError(
"vllm is not installed. It is not yet available on Python 3.14."
)
# Device selection # Device selection
self.device = decide_device( self.device = decide_device(

View File

@@ -1,6 +1,7 @@
import logging import logging
import os import os
import re import re
import sys
import tempfile import tempfile
from io import BytesIO from io import BytesIO
from pathlib import Path from pathlib import Path
@@ -117,9 +118,15 @@ class _NativeWhisperModel:
try: try:
import whisper # type: ignore import whisper # type: ignore
except ImportError: except ImportError:
raise ImportError( if sys.version_info < (3, 14):
"whisper is not installed. Please install it via `pip install openai-whisper` or do `uv sync --extra asr`." raise ImportError(
) "whisper is not installed. Please install it via `pip install openai-whisper` or do `uv sync --extra asr`."
)
else:
raise ImportError(
"whisper is not installed. Unfortunately its dependencies are not yet available for Python 3.14."
)
self.asr_options = asr_options self.asr_options = asr_options
self.max_tokens = asr_options.max_new_tokens self.max_tokens = asr_options.max_new_tokens
self.temperature = asr_options.temperature self.temperature = asr_options.temperature

View File

@@ -30,6 +30,7 @@ classifiers = [
"Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13", "Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3.14",
] ]
readme = "README.md" readme = "README.md"
authors = [ authors = [
@@ -63,7 +64,7 @@ dependencies = [
'pandas (>=2.1.4,<3.0.0)', 'pandas (>=2.1.4,<3.0.0)',
'marko (>=2.1.2,<3.0.0)', 'marko (>=2.1.2,<3.0.0)',
'openpyxl (>=3.1.5,<4.0.0)', 'openpyxl (>=3.1.5,<4.0.0)',
'lxml (>=4.0.0,<6.0.0)', 'lxml (>=4.0.0,<7.0.0)',
'pillow (>=10.0.0,<12.0.0)', 'pillow (>=10.0.0,<12.0.0)',
'tqdm (>=4.65.0,<5.0.0)', 'tqdm (>=4.65.0,<5.0.0)',
'pluggy (>=1.0.0,<2.0.0)', 'pluggy (>=1.0.0,<2.0.0)',
@@ -95,19 +96,19 @@ ocrmac = ['ocrmac (>=1.0.0,<2.0.0) ; sys_platform == "darwin"']
vlm = [ vlm = [
'transformers (>=4.46.0,<5.0.0)', 'transformers (>=4.46.0,<5.0.0)',
'accelerate (>=1.2.1,<2.0.0)', 'accelerate (>=1.2.1,<2.0.0)',
'mlx-vlm (>=0.3.0,<1.0.0) ; python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64"', 'mlx-vlm (>=0.3.0,<1.0.0) ; python_version >= "3.10" and python_version < "3.14" and sys_platform == "darwin" and platform_machine == "arm64"',
'vllm (>=0.10.0,<1.0.0) ; python_version >= "3.10" and sys_platform == "linux" and platform_machine == "x86_64"', 'vllm (>=0.10.0,<1.0.0) ; python_version >= "3.10" and python_version < "3.14" and sys_platform == "linux" and platform_machine == "x86_64"',
"qwen-vl-utils>=0.0.11", "qwen-vl-utils>=0.0.11",
] ]
rapidocr = [ rapidocr = [
'rapidocr (>=3.3,<4.0.0) ; python_version < "3.14"', 'rapidocr (>=3.3,<4.0.0)',
'onnxruntime (>=1.7.0,<2.0.0)', 'onnxruntime (>=1.7.0,<2.0.0) ; python_version < "3.14"',
# 'onnxruntime (>=1.7.0,<2.0.0) ; python_version >= "3.10"', # 'onnxruntime (>=1.7.0,<2.0.0) ; python_version >= "3.10"',
# 'onnxruntime (>=1.7.0,<1.20.0) ; python_version < "3.10"', # 'onnxruntime (>=1.7.0,<1.20.0) ; python_version < "3.10"',
] ]
asr = [ asr = [
'mlx-whisper>=0.4.3 ; python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64"', 'mlx-whisper>=0.4.3 ; python_version >= "3.10" and python_version < "3.14" and sys_platform == "darwin" and platform_machine == "arm64"',
"openai-whisper>=20250625", 'openai-whisper>=20250625 ; python_version < "3.14"',
] ]
[dependency-groups] [dependency-groups]
@@ -146,10 +147,10 @@ examples = [
"langchain-milvus~=0.1", "langchain-milvus~=0.1",
"langchain-text-splitters~=0.2", "langchain-text-splitters~=0.2",
"modelscope>=1.29.0", "modelscope>=1.29.0",
"gliner>=0.2.21", 'gliner>=0.2.21 ; python_version < "3.14"', # gliner depends on onnxruntime which is not available on py3.14
] ]
constraints = [ constraints = [
'onnxruntime (>=1.7.0,<2.0.0) ; python_version >= "3.10"', 'onnxruntime (>=1.7.0,<2.0.0) ; python_version >= "3.10" and python_version < "3.14"',
'onnxruntime (>=1.7.0,<1.20.0) ; python_version < "3.10"', 'onnxruntime (>=1.7.0,<1.20.0) ; python_version < "3.10"',
] ]

View File

@@ -16,9 +16,9 @@
استاندارد اجباری است؟ استاندارد اجباری است؟
بلی - [ ] بلی
خير - [x] خير
مرجع صادرکننده استاندارد مرجع صادرکننده استاندارد
@@ -26,7 +26,7 @@
آيا توليدکننده محصول، استاندارد مذکور را اخذ نموده است؟ آيا توليدکننده محصول، استاندارد مذکور را اخذ نموده است؟
بلی خير - [x] بلی خير
## -3 پذيرش در بورس ## -3 پذيرش در بورس

View File

@@ -1,3 +1,4 @@
import sys
from pathlib import Path from pathlib import Path
from unittest.mock import Mock, patch from unittest.mock import Mock, patch
@@ -10,6 +11,11 @@ from docling.datamodel.pipeline_options import AsrPipelineOptions
from docling.document_converter import AudioFormatOption, DocumentConverter from docling.document_converter import AudioFormatOption, DocumentConverter
from docling.pipeline.asr_pipeline import AsrPipeline from docling.pipeline.asr_pipeline import AsrPipeline
pytestmark = pytest.mark.skipif(
sys.version_info >= (3, 14),
reason="Python 3.14 is not yet supported by whisper dependencies.",
)
@pytest.fixture @pytest.fixture
def test_audio_path(): def test_audio_path():

View File

@@ -70,13 +70,19 @@ def test_e2e_conversions():
(EasyOcrOptions(force_full_page_ocr=True), False), (EasyOcrOptions(force_full_page_ocr=True), False),
] ]
# rapidocr is only available for Python >=3.6,<3.13 for rapidocr_backend in ["onnxruntime", "torch"]:
if sys.version_info < (3, 13): if sys.version_info >= (3, 14) and rapidocr_backend == "onnxruntime":
engines.append((RapidOcrOptions(), False)) # skip onnxruntime backend on Python 3.14
engines.append((RapidOcrOptions(force_full_page_ocr=True), False)) continue
engines.append((RapidOcrOptions(backend=rapidocr_backend), False))
engines.append(
(RapidOcrOptions(backend=rapidocr_backend, force_full_page_ocr=True), False)
)
engines.append( engines.append(
( (
RapidOcrOptions( RapidOcrOptions(
backend=rapidocr_backend,
force_full_page_ocr=True, force_full_page_ocr=True,
rec_font_path="test", rec_font_path="test",
rapidocr_params={"Rec.font_path": None}, # overwrites rec_font_path rapidocr_params={"Rec.font_path": None}, # overwrites rec_font_path

3503
uv.lock generated

File diff suppressed because it is too large Load Diff