mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 12:48:28 +00:00
feat: Support for Python 3.14 (#2530)
* fix dependencies for py314 Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add metadata and CI tests Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add back gliner Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * update error message about python 3.14 availability Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * skip tests which cannot run on py 3.14 Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * fix lint Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * remove vllm from py 3.14 deps Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * safe import for vllm Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * update lock Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * remove torch.compile() Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * update checkbox results after docling-core changes Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * cannot run mlx example in CI Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add test for rapidocr backends and skip onnxruntime on py3.14 Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * fix other occurances of torch.compile() Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * allow torch.compile for Python <3.14. proper support will be introduced with new torch releases Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> --------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
8
.github/workflows/checks.yml
vendored
8
.github/workflows/checks.yml
vendored
@@ -20,7 +20,7 @@ env:
|
||||
tests/test_asr_pipeline.py
|
||||
tests/test_threaded_pipeline.py
|
||||
PYTEST_TO_SKIP: |-
|
||||
EXAMPLES_TO_SKIP: '^(batch_convert|compare_vlm_models|minimal|minimal_vlm_pipeline|minimal_asr_pipeline|export_multimodal|custom_convert|develop_picture_enrichment|rapidocr_with_custom_models|offline_convert|pictures_description|pictures_description_api|vlm_pipeline_api_model|granitedocling_repetition_stopping)\.py$'
|
||||
EXAMPLES_TO_SKIP: '^(batch_convert|compare_vlm_models|minimal|minimal_vlm_pipeline|minimal_asr_pipeline|export_multimodal|custom_convert|develop_picture_enrichment|rapidocr_with_custom_models|offline_convert|pictures_description|pictures_description_api|vlm_pipeline_api_model|granitedocling_repetition_stopping|mlx_whisper_example)\.py$'
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
@@ -62,7 +62,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
|
||||
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', '3.14']
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
@@ -129,7 +129,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
|
||||
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', '3.14']
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
@@ -201,7 +201,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
|
||||
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', '3.14']
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
|
||||
@@ -738,10 +738,15 @@ def convert( # noqa: C901
|
||||
|
||||
pipeline_options.vlm_options = SMOLDOCLING_MLX
|
||||
except ImportError:
|
||||
_log.warning(
|
||||
"To run SmolDocling faster, please install mlx-vlm:\n"
|
||||
"pip install mlx-vlm"
|
||||
)
|
||||
if sys.version_info < (3, 14):
|
||||
_log.warning(
|
||||
"To run SmolDocling faster, please install mlx-vlm:\n"
|
||||
"pip install mlx-vlm"
|
||||
)
|
||||
else:
|
||||
_log.warning(
|
||||
"You can run SmolDocling faster with MLX support, but it is unfortunately not yet available on Python 3.14."
|
||||
)
|
||||
|
||||
elif vlm_model == VlmModelType.GRANITEDOCLING:
|
||||
pipeline_options.vlm_options = GRANITEDOCLING_TRANSFORMERS
|
||||
@@ -751,10 +756,16 @@ def convert( # noqa: C901
|
||||
|
||||
pipeline_options.vlm_options = GRANITEDOCLING_MLX
|
||||
except ImportError:
|
||||
_log.warning(
|
||||
"To run GraniteDocling faster, please install mlx-vlm:\n"
|
||||
"pip install mlx-vlm"
|
||||
)
|
||||
if sys.version_info < (3, 14):
|
||||
_log.warning(
|
||||
"To run GraniteDocling faster, please install mlx-vlm:\n"
|
||||
"pip install mlx-vlm"
|
||||
)
|
||||
else:
|
||||
_log.warning(
|
||||
"You can run GraniteDocling faster with MLX support, but it is unfortunately not yet available on Python 3.14."
|
||||
)
|
||||
|
||||
elif vlm_model == VlmModelType.SMOLDOCLING_VLLM:
|
||||
pipeline_options.vlm_options = SMOLDOCLING_VLLM
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import sys
|
||||
import threading
|
||||
from collections.abc import Iterable
|
||||
from pathlib import Path
|
||||
@@ -75,7 +76,10 @@ class PictureDescriptionVlmModel(
|
||||
else "sdpa"
|
||||
),
|
||||
)
|
||||
self.model = torch.compile(self.model) # type: ignore
|
||||
if sys.version_info < (3, 14):
|
||||
self.model = torch.compile(self.model) # type: ignore
|
||||
else:
|
||||
self.model.eval()
|
||||
|
||||
self.provenance = f"{self.options.repo_id}"
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import importlib.metadata
|
||||
import logging
|
||||
import sys
|
||||
import time
|
||||
from collections.abc import Iterable
|
||||
from pathlib import Path
|
||||
@@ -129,7 +130,10 @@ class HuggingFaceTransformersVlmModel(BaseVlmPageModel, HuggingFaceModelDownload
|
||||
trust_remote_code=vlm_options.trust_remote_code,
|
||||
revision=vlm_options.revision,
|
||||
)
|
||||
self.vlm_model = torch.compile(self.vlm_model) # type: ignore
|
||||
if sys.version_info < (3, 14):
|
||||
self.vlm_model = torch.compile(self.vlm_model) # type: ignore
|
||||
else:
|
||||
self.vlm_model.eval()
|
||||
|
||||
# Load generation config
|
||||
self.generation_config = GenerationConfig.from_pretrained(
|
||||
|
||||
@@ -50,9 +50,14 @@ class HuggingFaceMlxModel(BaseVlmPageModel, HuggingFaceModelDownloadMixin):
|
||||
from mlx_vlm.prompt_utils import apply_chat_template # type: ignore
|
||||
from mlx_vlm.utils import load_config # type: ignore
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"mlx-vlm is not installed. Please install it via `pip install mlx-vlm` to use MLX VLM models."
|
||||
)
|
||||
if sys.version_info < (3, 14):
|
||||
raise ImportError(
|
||||
"mlx-vlm is not installed. Please install it via `pip install mlx-vlm` to use MLX VLM models."
|
||||
)
|
||||
else:
|
||||
raise ImportError(
|
||||
"mlx-vlm is not installed. It is not yet available on Python 3.14."
|
||||
)
|
||||
|
||||
repo_cache_folder = vlm_options.repo_id.replace("/", "--")
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
import sys
|
||||
import time
|
||||
from collections.abc import Iterable
|
||||
from pathlib import Path
|
||||
@@ -153,7 +154,10 @@ class NuExtractTransformersModel(BaseVlmModel, HuggingFaceModelDownloadMixin):
|
||||
),
|
||||
trust_remote_code=vlm_options.trust_remote_code,
|
||||
)
|
||||
self.vlm_model = torch.compile(self.vlm_model) # type: ignore
|
||||
if sys.version_info < (3, 14):
|
||||
self.vlm_model = torch.compile(self.vlm_model) # type: ignore
|
||||
else:
|
||||
self.vlm_model.eval()
|
||||
|
||||
# Load generation config
|
||||
self.generation_config = GenerationConfig.from_pretrained(artifacts_path)
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
import sys
|
||||
import time
|
||||
from collections.abc import Iterable
|
||||
from pathlib import Path
|
||||
@@ -100,7 +101,18 @@ class VllmVlmModel(BaseVlmPageModel, HuggingFaceModelDownloadMixin):
|
||||
return
|
||||
|
||||
from transformers import AutoProcessor
|
||||
from vllm import LLM, SamplingParams
|
||||
|
||||
try:
|
||||
from vllm import LLM, SamplingParams
|
||||
except ImportError:
|
||||
if sys.version_info < (3, 14):
|
||||
raise ImportError(
|
||||
"vllm is not installed. Please install it via `pip install vllm`."
|
||||
)
|
||||
else:
|
||||
raise ImportError(
|
||||
"vllm is not installed. It is not yet available on Python 3.14."
|
||||
)
|
||||
|
||||
# Device selection
|
||||
self.device = decide_device(
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import tempfile
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
@@ -117,9 +118,15 @@ class _NativeWhisperModel:
|
||||
try:
|
||||
import whisper # type: ignore
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"whisper is not installed. Please install it via `pip install openai-whisper` or do `uv sync --extra asr`."
|
||||
)
|
||||
if sys.version_info < (3, 14):
|
||||
raise ImportError(
|
||||
"whisper is not installed. Please install it via `pip install openai-whisper` or do `uv sync --extra asr`."
|
||||
)
|
||||
else:
|
||||
raise ImportError(
|
||||
"whisper is not installed. Unfortunately its dependencies are not yet available for Python 3.14."
|
||||
)
|
||||
|
||||
self.asr_options = asr_options
|
||||
self.max_tokens = asr_options.max_new_tokens
|
||||
self.temperature = asr_options.temperature
|
||||
|
||||
@@ -30,6 +30,7 @@ classifiers = [
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: 3.13",
|
||||
"Programming Language :: Python :: 3.14",
|
||||
]
|
||||
readme = "README.md"
|
||||
authors = [
|
||||
@@ -63,7 +64,7 @@ dependencies = [
|
||||
'pandas (>=2.1.4,<3.0.0)',
|
||||
'marko (>=2.1.2,<3.0.0)',
|
||||
'openpyxl (>=3.1.5,<4.0.0)',
|
||||
'lxml (>=4.0.0,<6.0.0)',
|
||||
'lxml (>=4.0.0,<7.0.0)',
|
||||
'pillow (>=10.0.0,<12.0.0)',
|
||||
'tqdm (>=4.65.0,<5.0.0)',
|
||||
'pluggy (>=1.0.0,<2.0.0)',
|
||||
@@ -95,19 +96,19 @@ ocrmac = ['ocrmac (>=1.0.0,<2.0.0) ; sys_platform == "darwin"']
|
||||
vlm = [
|
||||
'transformers (>=4.46.0,<5.0.0)',
|
||||
'accelerate (>=1.2.1,<2.0.0)',
|
||||
'mlx-vlm (>=0.3.0,<1.0.0) ; python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64"',
|
||||
'vllm (>=0.10.0,<1.0.0) ; python_version >= "3.10" and sys_platform == "linux" and platform_machine == "x86_64"',
|
||||
'mlx-vlm (>=0.3.0,<1.0.0) ; python_version >= "3.10" and python_version < "3.14" and sys_platform == "darwin" and platform_machine == "arm64"',
|
||||
'vllm (>=0.10.0,<1.0.0) ; python_version >= "3.10" and python_version < "3.14" and sys_platform == "linux" and platform_machine == "x86_64"',
|
||||
"qwen-vl-utils>=0.0.11",
|
||||
]
|
||||
rapidocr = [
|
||||
'rapidocr (>=3.3,<4.0.0) ; python_version < "3.14"',
|
||||
'onnxruntime (>=1.7.0,<2.0.0)',
|
||||
'rapidocr (>=3.3,<4.0.0)',
|
||||
'onnxruntime (>=1.7.0,<2.0.0) ; python_version < "3.14"',
|
||||
# 'onnxruntime (>=1.7.0,<2.0.0) ; python_version >= "3.10"',
|
||||
# 'onnxruntime (>=1.7.0,<1.20.0) ; python_version < "3.10"',
|
||||
]
|
||||
asr = [
|
||||
'mlx-whisper>=0.4.3 ; python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64"',
|
||||
"openai-whisper>=20250625",
|
||||
'mlx-whisper>=0.4.3 ; python_version >= "3.10" and python_version < "3.14" and sys_platform == "darwin" and platform_machine == "arm64"',
|
||||
'openai-whisper>=20250625 ; python_version < "3.14"',
|
||||
]
|
||||
|
||||
[dependency-groups]
|
||||
@@ -146,10 +147,10 @@ examples = [
|
||||
"langchain-milvus~=0.1",
|
||||
"langchain-text-splitters~=0.2",
|
||||
"modelscope>=1.29.0",
|
||||
"gliner>=0.2.21",
|
||||
'gliner>=0.2.21 ; python_version < "3.14"', # gliner depends on onnxruntime which is not available on py3.14
|
||||
]
|
||||
constraints = [
|
||||
'onnxruntime (>=1.7.0,<2.0.0) ; python_version >= "3.10"',
|
||||
'onnxruntime (>=1.7.0,<2.0.0) ; python_version >= "3.10" and python_version < "3.14"',
|
||||
'onnxruntime (>=1.7.0,<1.20.0) ; python_version < "3.10"',
|
||||
]
|
||||
|
||||
|
||||
@@ -16,9 +16,9 @@
|
||||
|
||||
استاندارد اجباری است؟
|
||||
|
||||
بلی
|
||||
- [ ] بلی
|
||||
|
||||
خير
|
||||
- [x] خير
|
||||
|
||||
مرجع صادرکننده استاندارد
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
|
||||
آيا توليدکننده محصول، استاندارد مذکور را اخذ نموده است؟
|
||||
|
||||
بلی خير
|
||||
- [x] بلی خير
|
||||
|
||||
## -3 پذيرش در بورس
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
@@ -10,6 +11,11 @@ from docling.datamodel.pipeline_options import AsrPipelineOptions
|
||||
from docling.document_converter import AudioFormatOption, DocumentConverter
|
||||
from docling.pipeline.asr_pipeline import AsrPipeline
|
||||
|
||||
pytestmark = pytest.mark.skipif(
|
||||
sys.version_info >= (3, 14),
|
||||
reason="Python 3.14 is not yet supported by whisper dependencies.",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_audio_path():
|
||||
|
||||
@@ -70,13 +70,19 @@ def test_e2e_conversions():
|
||||
(EasyOcrOptions(force_full_page_ocr=True), False),
|
||||
]
|
||||
|
||||
# rapidocr is only available for Python >=3.6,<3.13
|
||||
if sys.version_info < (3, 13):
|
||||
engines.append((RapidOcrOptions(), False))
|
||||
engines.append((RapidOcrOptions(force_full_page_ocr=True), False))
|
||||
for rapidocr_backend in ["onnxruntime", "torch"]:
|
||||
if sys.version_info >= (3, 14) and rapidocr_backend == "onnxruntime":
|
||||
# skip onnxruntime backend on Python 3.14
|
||||
continue
|
||||
|
||||
engines.append((RapidOcrOptions(backend=rapidocr_backend), False))
|
||||
engines.append(
|
||||
(RapidOcrOptions(backend=rapidocr_backend, force_full_page_ocr=True), False)
|
||||
)
|
||||
engines.append(
|
||||
(
|
||||
RapidOcrOptions(
|
||||
backend=rapidocr_backend,
|
||||
force_full_page_ocr=True,
|
||||
rec_font_path="test",
|
||||
rapidocr_params={"Rec.font_path": None}, # overwrites rec_font_path
|
||||
|
||||
Reference in New Issue
Block a user