feat: Support for Python 3.14 (#2530)

* fix dependencies for py314

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* add metadata and CI tests

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* add back gliner

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* update error message about python 3.14 availability

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* skip tests which cannot run on py 3.14

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* fix lint

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* remove vllm from py 3.14 deps

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* safe import for vllm

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* update lock

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* remove torch.compile()

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* update checkbox results after docling-core changes

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* cannot run mlx example in CI

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* add test for rapidocr backends and skip onnxruntime on py3.14

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* fix other occurances of torch.compile()

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* allow torch.compile for Python <3.14. proper support will be introduced with new torch releases

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

---------

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi
2025-10-28 14:32:15 +01:00
committed by GitHub
parent 9a6fdf936b
commit cdffb47b9a
13 changed files with 2278 additions and 1361 deletions

View File

@@ -20,7 +20,7 @@ env:
tests/test_asr_pipeline.py
tests/test_threaded_pipeline.py
PYTEST_TO_SKIP: |-
EXAMPLES_TO_SKIP: '^(batch_convert|compare_vlm_models|minimal|minimal_vlm_pipeline|minimal_asr_pipeline|export_multimodal|custom_convert|develop_picture_enrichment|rapidocr_with_custom_models|offline_convert|pictures_description|pictures_description_api|vlm_pipeline_api_model|granitedocling_repetition_stopping)\.py$'
EXAMPLES_TO_SKIP: '^(batch_convert|compare_vlm_models|minimal|minimal_vlm_pipeline|minimal_asr_pipeline|export_multimodal|custom_convert|develop_picture_enrichment|rapidocr_with_custom_models|offline_convert|pictures_description|pictures_description_api|vlm_pipeline_api_model|granitedocling_repetition_stopping|mlx_whisper_example)\.py$'
jobs:
lint:
@@ -62,7 +62,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', '3.14']
steps:
- uses: actions/checkout@v5
@@ -129,7 +129,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', '3.14']
steps:
- uses: actions/checkout@v5
@@ -201,7 +201,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', '3.14']
steps:
- uses: actions/checkout@v5

View File

@@ -738,10 +738,15 @@ def convert( # noqa: C901
pipeline_options.vlm_options = SMOLDOCLING_MLX
except ImportError:
_log.warning(
"To run SmolDocling faster, please install mlx-vlm:\n"
"pip install mlx-vlm"
)
if sys.version_info < (3, 14):
_log.warning(
"To run SmolDocling faster, please install mlx-vlm:\n"
"pip install mlx-vlm"
)
else:
_log.warning(
"You can run SmolDocling faster with MLX support, but it is unfortunately not yet available on Python 3.14."
)
elif vlm_model == VlmModelType.GRANITEDOCLING:
pipeline_options.vlm_options = GRANITEDOCLING_TRANSFORMERS
@@ -751,10 +756,16 @@ def convert( # noqa: C901
pipeline_options.vlm_options = GRANITEDOCLING_MLX
except ImportError:
_log.warning(
"To run GraniteDocling faster, please install mlx-vlm:\n"
"pip install mlx-vlm"
)
if sys.version_info < (3, 14):
_log.warning(
"To run GraniteDocling faster, please install mlx-vlm:\n"
"pip install mlx-vlm"
)
else:
_log.warning(
"You can run GraniteDocling faster with MLX support, but it is unfortunately not yet available on Python 3.14."
)
elif vlm_model == VlmModelType.SMOLDOCLING_VLLM:
pipeline_options.vlm_options = SMOLDOCLING_VLLM

View File

@@ -1,3 +1,4 @@
import sys
import threading
from collections.abc import Iterable
from pathlib import Path
@@ -75,7 +76,10 @@ class PictureDescriptionVlmModel(
else "sdpa"
),
)
self.model = torch.compile(self.model) # type: ignore
if sys.version_info < (3, 14):
self.model = torch.compile(self.model) # type: ignore
else:
self.model.eval()
self.provenance = f"{self.options.repo_id}"

View File

@@ -1,5 +1,6 @@
import importlib.metadata
import logging
import sys
import time
from collections.abc import Iterable
from pathlib import Path
@@ -129,7 +130,10 @@ class HuggingFaceTransformersVlmModel(BaseVlmPageModel, HuggingFaceModelDownload
trust_remote_code=vlm_options.trust_remote_code,
revision=vlm_options.revision,
)
self.vlm_model = torch.compile(self.vlm_model) # type: ignore
if sys.version_info < (3, 14):
self.vlm_model = torch.compile(self.vlm_model) # type: ignore
else:
self.vlm_model.eval()
# Load generation config
self.generation_config = GenerationConfig.from_pretrained(

View File

@@ -50,9 +50,14 @@ class HuggingFaceMlxModel(BaseVlmPageModel, HuggingFaceModelDownloadMixin):
from mlx_vlm.prompt_utils import apply_chat_template # type: ignore
from mlx_vlm.utils import load_config # type: ignore
except ImportError:
raise ImportError(
"mlx-vlm is not installed. Please install it via `pip install mlx-vlm` to use MLX VLM models."
)
if sys.version_info < (3, 14):
raise ImportError(
"mlx-vlm is not installed. Please install it via `pip install mlx-vlm` to use MLX VLM models."
)
else:
raise ImportError(
"mlx-vlm is not installed. It is not yet available on Python 3.14."
)
repo_cache_folder = vlm_options.repo_id.replace("/", "--")

View File

@@ -1,4 +1,5 @@
import logging
import sys
import time
from collections.abc import Iterable
from pathlib import Path
@@ -153,7 +154,10 @@ class NuExtractTransformersModel(BaseVlmModel, HuggingFaceModelDownloadMixin):
),
trust_remote_code=vlm_options.trust_remote_code,
)
self.vlm_model = torch.compile(self.vlm_model) # type: ignore
if sys.version_info < (3, 14):
self.vlm_model = torch.compile(self.vlm_model) # type: ignore
else:
self.vlm_model.eval()
# Load generation config
self.generation_config = GenerationConfig.from_pretrained(artifacts_path)

View File

@@ -1,4 +1,5 @@
import logging
import sys
import time
from collections.abc import Iterable
from pathlib import Path
@@ -100,7 +101,18 @@ class VllmVlmModel(BaseVlmPageModel, HuggingFaceModelDownloadMixin):
return
from transformers import AutoProcessor
from vllm import LLM, SamplingParams
try:
from vllm import LLM, SamplingParams
except ImportError:
if sys.version_info < (3, 14):
raise ImportError(
"vllm is not installed. Please install it via `pip install vllm`."
)
else:
raise ImportError(
"vllm is not installed. It is not yet available on Python 3.14."
)
# Device selection
self.device = decide_device(

View File

@@ -1,6 +1,7 @@
import logging
import os
import re
import sys
import tempfile
from io import BytesIO
from pathlib import Path
@@ -117,9 +118,15 @@ class _NativeWhisperModel:
try:
import whisper # type: ignore
except ImportError:
raise ImportError(
"whisper is not installed. Please install it via `pip install openai-whisper` or do `uv sync --extra asr`."
)
if sys.version_info < (3, 14):
raise ImportError(
"whisper is not installed. Please install it via `pip install openai-whisper` or do `uv sync --extra asr`."
)
else:
raise ImportError(
"whisper is not installed. Unfortunately its dependencies are not yet available for Python 3.14."
)
self.asr_options = asr_options
self.max_tokens = asr_options.max_new_tokens
self.temperature = asr_options.temperature

View File

@@ -30,6 +30,7 @@ classifiers = [
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3.14",
]
readme = "README.md"
authors = [
@@ -63,7 +64,7 @@ dependencies = [
'pandas (>=2.1.4,<3.0.0)',
'marko (>=2.1.2,<3.0.0)',
'openpyxl (>=3.1.5,<4.0.0)',
'lxml (>=4.0.0,<6.0.0)',
'lxml (>=4.0.0,<7.0.0)',
'pillow (>=10.0.0,<12.0.0)',
'tqdm (>=4.65.0,<5.0.0)',
'pluggy (>=1.0.0,<2.0.0)',
@@ -95,19 +96,19 @@ ocrmac = ['ocrmac (>=1.0.0,<2.0.0) ; sys_platform == "darwin"']
vlm = [
'transformers (>=4.46.0,<5.0.0)',
'accelerate (>=1.2.1,<2.0.0)',
'mlx-vlm (>=0.3.0,<1.0.0) ; python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64"',
'vllm (>=0.10.0,<1.0.0) ; python_version >= "3.10" and sys_platform == "linux" and platform_machine == "x86_64"',
'mlx-vlm (>=0.3.0,<1.0.0) ; python_version >= "3.10" and python_version < "3.14" and sys_platform == "darwin" and platform_machine == "arm64"',
'vllm (>=0.10.0,<1.0.0) ; python_version >= "3.10" and python_version < "3.14" and sys_platform == "linux" and platform_machine == "x86_64"',
"qwen-vl-utils>=0.0.11",
]
rapidocr = [
'rapidocr (>=3.3,<4.0.0) ; python_version < "3.14"',
'onnxruntime (>=1.7.0,<2.0.0)',
'rapidocr (>=3.3,<4.0.0)',
'onnxruntime (>=1.7.0,<2.0.0) ; python_version < "3.14"',
# 'onnxruntime (>=1.7.0,<2.0.0) ; python_version >= "3.10"',
# 'onnxruntime (>=1.7.0,<1.20.0) ; python_version < "3.10"',
]
asr = [
'mlx-whisper>=0.4.3 ; python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64"',
"openai-whisper>=20250625",
'mlx-whisper>=0.4.3 ; python_version >= "3.10" and python_version < "3.14" and sys_platform == "darwin" and platform_machine == "arm64"',
'openai-whisper>=20250625 ; python_version < "3.14"',
]
[dependency-groups]
@@ -146,10 +147,10 @@ examples = [
"langchain-milvus~=0.1",
"langchain-text-splitters~=0.2",
"modelscope>=1.29.0",
"gliner>=0.2.21",
'gliner>=0.2.21 ; python_version < "3.14"', # gliner depends on onnxruntime which is not available on py3.14
]
constraints = [
'onnxruntime (>=1.7.0,<2.0.0) ; python_version >= "3.10"',
'onnxruntime (>=1.7.0,<2.0.0) ; python_version >= "3.10" and python_version < "3.14"',
'onnxruntime (>=1.7.0,<1.20.0) ; python_version < "3.10"',
]

View File

@@ -16,9 +16,9 @@
استاندارد اجباری است؟
بلی
- [ ] بلی
خير
- [x] خير
مرجع صادرکننده استاندارد
@@ -26,7 +26,7 @@
آيا توليدکننده محصول، استاندارد مذکور را اخذ نموده است؟
بلی خير
- [x] بلی خير
## -3 پذيرش در بورس

View File

@@ -1,3 +1,4 @@
import sys
from pathlib import Path
from unittest.mock import Mock, patch
@@ -10,6 +11,11 @@ from docling.datamodel.pipeline_options import AsrPipelineOptions
from docling.document_converter import AudioFormatOption, DocumentConverter
from docling.pipeline.asr_pipeline import AsrPipeline
pytestmark = pytest.mark.skipif(
sys.version_info >= (3, 14),
reason="Python 3.14 is not yet supported by whisper dependencies.",
)
@pytest.fixture
def test_audio_path():

View File

@@ -70,13 +70,19 @@ def test_e2e_conversions():
(EasyOcrOptions(force_full_page_ocr=True), False),
]
# rapidocr is only available for Python >=3.6,<3.13
if sys.version_info < (3, 13):
engines.append((RapidOcrOptions(), False))
engines.append((RapidOcrOptions(force_full_page_ocr=True), False))
for rapidocr_backend in ["onnxruntime", "torch"]:
if sys.version_info >= (3, 14) and rapidocr_backend == "onnxruntime":
# skip onnxruntime backend on Python 3.14
continue
engines.append((RapidOcrOptions(backend=rapidocr_backend), False))
engines.append(
(RapidOcrOptions(backend=rapidocr_backend, force_full_page_ocr=True), False)
)
engines.append(
(
RapidOcrOptions(
backend=rapidocr_backend,
force_full_page_ocr=True,
rec_font_path="test",
rapidocr_params={"Rec.font_path": None}, # overwrites rec_font_path

3503
uv.lock generated

File diff suppressed because it is too large Load Diff