diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 14a59f6d..6cd0d38b 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -6,7 +6,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.10', '3.11', '3.12'] + python-version: ['3.9', '3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v3 - name: Install tesseract diff --git a/CHANGELOG.md b/CHANGELOG.md index fafb1ad5..933aba35 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,14 @@ +## [v2.7.0](https://github.com/DS4SD/docling/releases/tag/v2.7.0) - 2024-11-20 + +### Feature + +* Add support for `ocrmac` OCR engine on macOS ([#276](https://github.com/DS4SD/docling/issues/276)) ([`6efa96c`](https://github.com/DS4SD/docling/commit/6efa96c983fc509b2c7b35a4a25a714284f2f782)) + +### Fix + +* Python3.9 support ([#396](https://github.com/DS4SD/docling/issues/396)) ([`7b013ab`](https://github.com/DS4SD/docling/commit/7b013abcf31ba49e2141dfd408bc8c23e8d87d91)) +* Propagate document limits to converter ([#388](https://github.com/DS4SD/docling/issues/388)) ([`32ebf55`](https://github.com/DS4SD/docling/commit/32ebf55e3338dd22f9a23c55595f15835794d961)) + ## [v2.6.0](https://github.com/DS4SD/docling/releases/tag/v2.6.0) - 2024-11-19 ### Feature diff --git a/README.md b/README.md index ca9ac4d1..893b604f 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ [![arXiv](https://img.shields.io/badge/arXiv-2408.09869-b31b1b.svg)](https://arxiv.org/abs/2408.09869) [![Docs](https://img.shields.io/badge/docs-live-brightgreen)](https://ds4sd.github.io/docling/) [![PyPI version](https://img.shields.io/pypi/v/docling)](https://pypi.org/project/docling/) -![Python](https://img.shields.io/badge/python-3.10%20%7C%203.11%20%7C%203.12-blue) +[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/docling)](https://pypi.org/project/docling/) [![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)](https://python-poetry.org/) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) [![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/) diff --git a/docling/cli/main.py b/docling/cli/main.py index 416c0d31..39201ffe 100644 --- a/docling/cli/main.py +++ b/docling/cli/main.py @@ -24,6 +24,7 @@ from docling.datamodel.base_models import ( from docling.datamodel.document import ConversionResult from docling.datamodel.pipeline_options import ( EasyOcrOptions, + OcrMacOptions, OcrOptions, PdfPipelineOptions, TableFormerMode, @@ -75,6 +76,7 @@ class OcrEngine(str, Enum): EASYOCR = "easyocr" TESSERACT_CLI = "tesseract_cli" TESSERACT = "tesseract" + OCRMAC = "ocrmac" PADDLEOCR = "paddleocr" @@ -254,17 +256,18 @@ def convert( export_txt = OutputFormat.TEXT in to_formats export_doctags = OutputFormat.DOCTAGS in to_formats - match ocr_engine: - case OcrEngine.EASYOCR: - ocr_options: OcrOptions = EasyOcrOptions(force_full_page_ocr=force_ocr) - case OcrEngine.TESSERACT_CLI: - ocr_options = TesseractCliOcrOptions(force_full_page_ocr=force_ocr) - case OcrEngine.TESSERACT: - ocr_options = TesseractOcrOptions(force_full_page_ocr=force_ocr) - case OcrEngine.PADDLEOCR: - ocr_options = PaddleOcrOptions(force_full_page_ocr=force_ocr) - case _: - raise RuntimeError(f"Unexpected OCR engine type {ocr_engine}") + if ocr_engine == OcrEngine.EASYOCR: + ocr_options: OcrOptions = EasyOcrOptions(force_full_page_ocr=force_ocr) + elif ocr_engine == OcrEngine.TESSERACT_CLI: + ocr_options = TesseractCliOcrOptions(force_full_page_ocr=force_ocr) + elif ocr_engine == OcrEngine.TESSERACT: + ocr_options = TesseractOcrOptions(force_full_page_ocr=force_ocr) + elif ocr_engine == OcrEngine.OCRMAC: + ocr_options = OcrMacOptions(force_full_page_ocr=force_ocr) + elif ocr_engine == OcrEngine.PADDLEOCR: + ocr_options = PaddleOcrOptions(force_full_page_ocr=force_ocr) + else: + raise RuntimeError(f"Unexpected OCR engine type {ocr_engine}") ocr_lang_list = _split_list(ocr_lang) if ocr_lang_list is not None: @@ -281,15 +284,14 @@ def convert( if artifacts_path is not None: pipeline_options.artifacts_path = artifacts_path - match pdf_backend: - case PdfBackend.DLPARSE_V1: - backend: Type[PdfDocumentBackend] = DoclingParseDocumentBackend - case PdfBackend.DLPARSE_V2: - backend = DoclingParseV2DocumentBackend - case PdfBackend.PYPDFIUM2: - backend = PyPdfiumDocumentBackend - case _: - raise RuntimeError(f"Unexpected PDF backend type {pdf_backend}") + if pdf_backend == PdfBackend.DLPARSE_V1: + backend: Type[PdfDocumentBackend] = DoclingParseDocumentBackend + elif pdf_backend == PdfBackend.DLPARSE_V2: + backend = DoclingParseV2DocumentBackend + elif pdf_backend == PdfBackend.PYPDFIUM2: + backend = PyPdfiumDocumentBackend + else: + raise RuntimeError(f"Unexpected PDF backend type {pdf_backend}") format_options: Dict[InputFormat, FormatOption] = { InputFormat.PDF: PdfFormatOption( diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py index 99d6c9f2..4e8617c7 100644 --- a/docling/datamodel/pipeline_options.py +++ b/docling/datamodel/pipeline_options.py @@ -1,6 +1,6 @@ from enum import Enum from pathlib import Path -from typing import List, Literal, Optional, Union +from typing import List, Literal, Optional, Union, Annotated from pydantic import BaseModel, ConfigDict, Field @@ -43,7 +43,10 @@ class EasyOcrOptions(OcrOptions): class PaddleOcrOptions(OcrOptions): kind: Literal["paddleocr"] = "paddleocr" - lang: str = "en" + lang: Annotated[ + list[str], + Field(min_items=1, max_items=1) # Limits the list length to 0 or 1 items + ] = ["en"] use_gpu: bool = True # same default as paddleocr.ocr use_angle_cls: bool = True show_log: bool = False @@ -75,6 +78,17 @@ class TesseractOcrOptions(OcrOptions): ) +class OcrMacOptions(OcrOptions): + kind: Literal["ocrmac"] = "ocrmac" + lang: List[str] = ["fr-FR", "de-DE", "es-ES", "en-US"] + recognition: str = "accurate" + framework: str = "vision" + + model_config = ConfigDict( + extra="forbid", + ) + + class PipelineOptions(BaseModel): create_legacy_output: bool = ( True # This defautl will be set to False on a future version of docling @@ -87,9 +101,9 @@ class PdfPipelineOptions(PipelineOptions): do_ocr: bool = True # True: perform OCR, replace programmatic PDF text table_structure_options: TableStructureOptions = TableStructureOptions() - ocr_options: Union[EasyOcrOptions, TesseractCliOcrOptions, TesseractOcrOptions, PaddleOcrOptions] = ( - Field(EasyOcrOptions(), discriminator="kind") - ) + ocr_options: Union[ + EasyOcrOptions, TesseractCliOcrOptions, TesseractOcrOptions, PaddleOcrOptions, OcrMacOptions + ] = Field(EasyOcrOptions(), discriminator="kind") images_scale: float = 1.0 generate_page_images: bool = False diff --git a/docling/document_converter.py b/docling/document_converter.py index 9304fb11..74e6f84a 100644 --- a/docling/document_converter.py +++ b/docling/document_converter.py @@ -3,7 +3,7 @@ import sys import time from functools import partial from pathlib import Path -from typing import Dict, Iterable, Iterator, List, Optional, Type +from typing import Dict, Iterable, Iterator, List, Optional, Type, Union from pydantic import BaseModel, ConfigDict, model_validator, validate_call @@ -155,7 +155,7 @@ class DocumentConverter: @validate_call(config=ConfigDict(strict=True)) def convert( self, - source: Path | str | DocumentStream, # TODO review naming + source: Union[Path, str, DocumentStream], # TODO review naming raises_on_error: bool = True, max_num_pages: int = sys.maxsize, max_file_size: int = sys.maxsize, @@ -172,7 +172,7 @@ class DocumentConverter: @validate_call(config=ConfigDict(strict=True)) def convert_all( self, - source: Iterable[Path | str | DocumentStream], # TODO review naming + source: Iterable[Union[Path, str, DocumentStream]], # TODO review naming raises_on_error: bool = True, # True: raises on first conversion error; False: does not raise on conv error max_num_pages: int = sys.maxsize, max_file_size: int = sys.maxsize, diff --git a/docling/models/ocr_mac_model.py b/docling/models/ocr_mac_model.py new file mode 100644 index 00000000..38bcf1ca --- /dev/null +++ b/docling/models/ocr_mac_model.py @@ -0,0 +1,118 @@ +import logging +import tempfile +from typing import Iterable, Optional, Tuple + +from docling_core.types.doc import BoundingBox, CoordOrigin + +from docling.datamodel.base_models import OcrCell, Page +from docling.datamodel.document import ConversionResult +from docling.datamodel.pipeline_options import OcrMacOptions +from docling.datamodel.settings import settings +from docling.models.base_ocr_model import BaseOcrModel +from docling.utils.profiling import TimeRecorder + +_log = logging.getLogger(__name__) + + +class OcrMacModel(BaseOcrModel): + def __init__(self, enabled: bool, options: OcrMacOptions): + super().__init__(enabled=enabled, options=options) + self.options: OcrMacOptions + + self.scale = 3 # multiplier for 72 dpi == 216 dpi. + + if self.enabled: + install_errmsg = ( + "ocrmac is not correctly installed. " + "Please install it via `pip install ocrmac` to use this OCR engine. " + "Alternatively, Docling has support for other OCR engines. See the documentation: " + "https://ds4sd.github.io/docling/installation/" + ) + try: + from ocrmac import ocrmac + except ImportError: + raise ImportError(install_errmsg) + + self.reader_RIL = ocrmac.OCR + + def __call__( + self, conv_res: ConversionResult, page_batch: Iterable[Page] + ) -> Iterable[Page]: + + if not self.enabled: + yield from page_batch + return + + for page in page_batch: + assert page._backend is not None + if not page._backend.is_valid(): + yield page + else: + with TimeRecorder(conv_res, "ocr"): + + ocr_rects = self.get_ocr_rects(page) + + all_ocr_cells = [] + for ocr_rect in ocr_rects: + # Skip zero area boxes + if ocr_rect.area() == 0: + continue + high_res_image = page._backend.get_page_image( + scale=self.scale, cropbox=ocr_rect + ) + + with tempfile.NamedTemporaryFile( + suffix=".png", mode="w" + ) as image_file: + fname = image_file.name + high_res_image.save(fname) + + boxes = self.reader_RIL( + fname, + recognition_level=self.options.recognition, + framework=self.options.framework, + language_preference=self.options.lang, + ).recognize() + + im_width, im_height = high_res_image.size + cells = [] + for ix, (text, confidence, box) in enumerate(boxes): + x = float(box[0]) + y = float(box[1]) + w = float(box[2]) + h = float(box[3]) + + x1 = x * im_width + y2 = (1 - y) * im_height + + x2 = x1 + w * im_width + y1 = y2 - h * im_height + + left = x1 / self.scale + top = y1 / self.scale + right = x2 / self.scale + bottom = y2 / self.scale + + cells.append( + OcrCell( + id=ix, + text=text, + confidence=confidence, + bbox=BoundingBox.from_tuple( + coord=(left, top, right, bottom), + origin=CoordOrigin.TOPLEFT, + ), + ) + ) + + # del high_res_image + all_ocr_cells.extend(cells) + + # Post-process the cells + page.cells = self.post_process_cells(all_ocr_cells, page.cells) + + # DEBUG code: + if settings.debug.visualize_ocr: + self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects) + + yield page diff --git a/docling/models/paddle_ocr_model.py b/docling/models/paddle_ocr_model.py index ad40db44..5547a6c0 100644 --- a/docling/models/paddle_ocr_model.py +++ b/docling/models/paddle_ocr_model.py @@ -4,13 +4,12 @@ from typing import Iterable import numpy from docling_core.types.doc import BoundingBox, CoordOrigin -from docling.datamodel.base_models import Cell, OcrCell, Page +from docling.datamodel.base_models import OcrCell, Page from docling.datamodel.document import ConversionResult from docling.datamodel.pipeline_options import PaddleOcrOptions from docling.datamodel.settings import settings from docling.models.base_ocr_model import BaseOcrModel from docling.utils.profiling import TimeRecorder -import cv2 _log = logging.getLogger(__name__) @@ -32,7 +31,7 @@ class PaddleOcrModel(BaseOcrModel): ) self.reader = PaddleOCR( - lang=self.options.lang, + lang=self.options.lang[0], use_gpu=self.options.use_gpu, use_angle_cls=self.options.use_angle_cls, show_log=self.options.show_log, diff --git a/docling/pipeline/standard_pdf_pipeline.py b/docling/pipeline/standard_pdf_pipeline.py index 8f12eaf2..2908d0ee 100644 --- a/docling/pipeline/standard_pdf_pipeline.py +++ b/docling/pipeline/standard_pdf_pipeline.py @@ -1,4 +1,5 @@ import logging +import sys from pathlib import Path from typing import Optional @@ -10,6 +11,7 @@ from docling.datamodel.base_models import AssembledUnit, Page from docling.datamodel.document import ConversionResult from docling.datamodel.pipeline_options import ( EasyOcrOptions, + OcrMacOptions, PdfPipelineOptions, TesseractCliOcrOptions, TesseractOcrOptions, @@ -20,6 +22,7 @@ from docling.models.ds_glm_model import GlmModel, GlmOptions from docling.models.easyocr_model import EasyOcrModel from docling.models.paddle_ocr_model import PaddleOcrModel from docling.models.layout_model import LayoutModel +from docling.models.ocr_mac_model import OcrMacModel from docling.models.page_assemble_model import PageAssembleModel, PageAssembleOptions from docling.models.page_preprocessing_model import ( PagePreprocessingModel, @@ -120,6 +123,15 @@ class StandardPdfPipeline(PaginatedPipeline): enabled=self.pipeline_options.do_ocr, options=self.pipeline_options.ocr_options, ) + elif isinstance(self.pipeline_options.ocr_options, OcrMacOptions): + if "darwin" != sys.platform: + raise RuntimeError( + f"The specified OCR type is only supported on Mac: {self.pipeline_options.ocr_options.kind}." + ) + return OcrMacModel( + enabled=self.pipeline_options.do_ocr, + options=self.pipeline_options.ocr_options, + ) elif isinstance(self.pipeline_options.ocr_options, PaddleOcrOptions): return PaddleOcrModel( enabled=self.pipeline_options.do_ocr, diff --git a/docs/examples/custom_convert.py b/docs/examples/custom_convert.py index 7631848b..2d300904 100644 --- a/docs/examples/custom_convert.py +++ b/docs/examples/custom_convert.py @@ -7,6 +7,7 @@ from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend from docling.datamodel.base_models import InputFormat from docling.datamodel.pipeline_options import PdfPipelineOptions from docling.document_converter import DocumentConverter, PdfFormatOption +from docling.models.ocr_mac_model import OcrMacOptions from docling.models.tesseract_ocr_cli_model import TesseractCliOcrOptions from docling.models.tesseract_ocr_model import TesseractOcrOptions @@ -122,6 +123,20 @@ def main(): # } # ) + # Docling Parse with ocrmac(Mac only) + # ---------------------- + # pipeline_options = PdfPipelineOptions() + # pipeline_options.do_ocr = True + # pipeline_options.do_table_structure = True + # pipeline_options.table_structure_options.do_cell_matching = True + # pipeline_options.ocr_options = OcrMacOptions() + + # doc_converter = DocumentConverter( + # format_options={ + # InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options) + # } + # ) + ########################################################################### start_time = time.time() diff --git a/docs/examples/full_page_ocr.py b/docs/examples/full_page_ocr.py index 308f3305..251617a3 100644 --- a/docs/examples/full_page_ocr.py +++ b/docs/examples/full_page_ocr.py @@ -4,6 +4,7 @@ from docling.backend.docling_parse_backend import DoclingParseDocumentBackend from docling.datamodel.base_models import InputFormat from docling.datamodel.pipeline_options import ( EasyOcrOptions, + OcrMacOptions, PdfPipelineOptions, TesseractCliOcrOptions, TesseractOcrOptions, @@ -20,9 +21,10 @@ def main(): pipeline_options.do_table_structure = True pipeline_options.table_structure_options.do_cell_matching = True - # Any of the OCR options can be used:EasyOcrOptions, TesseractOcrOptions, TesseractCliOcrOptions, PaddleOcrOptions + # Any of the OCR options can be used:EasyOcrOptions, TesseractOcrOptions, TesseractCliOcrOptions, OcrMacOptions(Mac only), PaddleOcrOptions # ocr_options = EasyOcrOptions(force_full_page_ocr=True) # ocr_options = TesseractOcrOptions(force_full_page_ocr=True) + # ocr_options = OcrMacOptions(force_full_page_ocr=True) # ocr_options = PaddleOcrOptions(force_full_page_ocr=True) ocr_options = TesseractCliOcrOptions(force_full_page_ocr=True) pipeline_options.ocr_options = ocr_options diff --git a/docs/installation.md b/docs/installation.md index efd71e86..d3d776e4 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -30,6 +30,7 @@ Works on macOS, Linux, and Windows, with support for both x86_64 and arm64 archi | [EasyOCR](https://github.com/JaidedAI/EasyOCR) | Default in Docling or via `pip install easyocr`. | `EasyOcrOptions` | | Tesseract | System dependency. See description for Tesseract and Tesserocr below. | `TesseractOcrOptions` | | Tesseract CLI | System dependency. See description below. | `TesseractCliOcrOptions` | + | OcrMac | System dependency. See description below. | `OcrMacOptions` | | PaddleOCR | Extra feature not included in Default Docling installation can be installed via `pip install paddlepaddle paddleocr` | `PaddleOcrOptions` | The Docling `DocumentConverter` allows to choose the OCR engine with the `ocr_options` settings. For example @@ -92,6 +93,17 @@ Works on macOS, Linux, and Windows, with support for both x86_64 and arm64 archi pip install --no-binary :all: tesserocr ``` +

ocrmac installation

+ + [ocrmac](https://github.com/straussmaximilian/ocrmac) is using + Apple's vision(or livetext) framework as OCR backend. + For using this engine with Docling, ocrmac must be installed on your system. + This only works on macOS systems with newer macOS versions (10.15+). + + ```console + pip install ocrmac + ``` + ## Development setup To develop Docling features, bugfixes etc., install as follows from your local clone's root dir: diff --git a/poetry.lock b/poetry.lock index 63d6d06a..efb07a68 100644 --- a/poetry.lock +++ b/poetry.lock @@ -898,13 +898,13 @@ tabulate = ">=0.9.0,<0.10.0" [[package]] name = "docling-ibm-models" -version = "2.0.5" +version = "2.0.6" description = "This package contains the AI models used by the Docling PDF conversion package" optional = false -python-versions = "<4.0,>=3.10" +python-versions = "<4.0,>=3.9" files = [ - {file = "docling_ibm_models-2.0.5-py3-none-any.whl", hash = "sha256:a939acd6fdd97a4c2422af1e303a059ff8150d125d66875861ee927e6e5da8de"}, - {file = "docling_ibm_models-2.0.5.tar.gz", hash = "sha256:3157755e206f0fa364094e3b87a2e573b0dd4f1591083d852b6b71c6e3bb7cc9"}, + {file = "docling_ibm_models-2.0.6-py3-none-any.whl", hash = "sha256:1702b413353d18089511cb73fc325606eb3601b1406b1367a7c5070081f44af2"}, + {file = "docling_ibm_models-2.0.6.tar.gz", hash = "sha256:b06bb8e426c8d53cb300b17a432120917a335390665302d82f311a3647ee1bca"}, ] [package.dependencies] @@ -922,41 +922,49 @@ tqdm = ">=4.64.0,<5.0.0" [[package]] name = "docling-parse" -version = "2.0.4" +version = "2.1.0" description = "Simple package to extract text with coordinates from programmatic PDFs" optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "docling_parse-2.0.4-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:1ba71aa48538fd118b7f5e872573d384c335d205d3c6bde102067e0bf2b7d6a9"}, - {file = "docling_parse-2.0.4-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:156f34124408a1d016e6a3e1c5a82f58e43c96acc8f3896e81bfb2b5ecc127d5"}, - {file = "docling_parse-2.0.4-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:32f6e791bceb3b1cac357878929ec976aeb50c40b395518934f4817bb2530eae"}, - {file = "docling_parse-2.0.4-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:580b01b9276845a410d3aa59397cacb06ad3cf4f471bdfd18187ac0dfdcaaafc"}, - {file = "docling_parse-2.0.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4fce1f091aed82e16726658174d06098642c197ce1ded9508571aa2416a2a03"}, - {file = "docling_parse-2.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d5447cce907ba6725609346f04d38a83671d2e0b13b468d27a0a861d96af144"}, - {file = "docling_parse-2.0.4-cp310-cp310-win_amd64.whl", hash = "sha256:ce2120287efe4fe408795cfeea881a71d6980527a46ee583a69247e8404d4c0b"}, - {file = "docling_parse-2.0.4-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:52c94fe627382541e13a8e7fbad8242b618636db55eeeeacc6e92dbf88130812"}, - {file = "docling_parse-2.0.4-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:10c7a2e68a124ebb54b1e27ce6c85ef2f4d9da294e391fc131a9b39b1f9ce657"}, - {file = "docling_parse-2.0.4-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:43dc41995310ed0b0015bea6f72df7cb71106a8550d79946f66f30b2ab2c3a29"}, - {file = "docling_parse-2.0.4-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:f385d97cb0cf0933a5f0eb4da8b0f9fc9d8629bbf93d57b9043b7a51ba0b33c8"}, - {file = "docling_parse-2.0.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:449c449168cada11eaa83a779e2c25ed4e9e9aec63db2012222ee28fa048a020"}, - {file = "docling_parse-2.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87dbf20715dbbb9b7d5ff49475b65ce88454c43c0b00bb8ec5bda30643c79003"}, - {file = "docling_parse-2.0.4-cp311-cp311-win_amd64.whl", hash = "sha256:6e56726829cb82977f5441db4e1f4d9357faf3ed3dfd55bfa135e650d476a8d8"}, - {file = "docling_parse-2.0.4-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:60c560ed7fdfda1748dca23c858d2d5eb0eff5858fef060bfc4851e1f949e915"}, - {file = "docling_parse-2.0.4-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:d35c8c3dd8f580820ec8905be48e37a36f8c3fe8cacbe366ba75c7c35e0de938"}, - {file = "docling_parse-2.0.4-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:419dabf6aa0f895878d489a95bdd173661d0891674638c6c01a9b5ca8f156839"}, - {file = "docling_parse-2.0.4-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:ff7fb21829aa2acad6874ac78b87cfaa642b0910ae6d60e90007c2021fe05c73"}, - {file = "docling_parse-2.0.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:132e7db9042f13141ec089562478737fb8df70fcd33a0cb0161c7e6cfebf5b46"}, - {file = "docling_parse-2.0.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b1ccc13bb32b5c5877f9c3f8870a88beb56d1ab3335ce8a81561cdba1054dfb"}, - {file = "docling_parse-2.0.4-cp312-cp312-win_amd64.whl", hash = "sha256:ac34fae4e0080dd8719c22a4aa49a013003a13f3f6bf68f5763136ac7626e390"}, - {file = "docling_parse-2.0.4-cp313-cp313-macosx_13_0_arm64.whl", hash = "sha256:e8cc56e41ae4caf4302ebeaaeb02de2b60edcf5ed4bdcdf13a67eca0c1b9f39a"}, - {file = "docling_parse-2.0.4-cp313-cp313-macosx_13_0_x86_64.whl", hash = "sha256:bc2efba8183356c6437a62e9802055988f5edc2d907ea1a42a2613737b2fc77e"}, - {file = "docling_parse-2.0.4-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:db839a2a7a8742b93a5cab4d91c664938306d248177bc5b716527003c32054a3"}, - {file = "docling_parse-2.0.4-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:737fb79b6c91a676ac7cdf4ced65c85f687a968b9d1d2ef95b04958fbbc554ff"}, - {file = "docling_parse-2.0.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c02edd935eb5d4d3b4a64564ac92f6a427bb106cb5632f745d853a6ba7b7441"}, - {file = "docling_parse-2.0.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e84a3c50086380fff7925cfd9e9e4e62b2d2f4b79660520999f39207d478b18"}, - {file = "docling_parse-2.0.4-cp313-cp313-win_amd64.whl", hash = "sha256:99cfb99c1fc65573a45e2c99b98cc6483134451d42a81b9f4cea27e4e858415e"}, - {file = "docling_parse-2.0.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:2e2dbe4e18b6aa2f2fe8801685846d7b6a9bb355d6eac48b697cd9d1b62501e1"}, - {file = "docling_parse-2.0.4.tar.gz", hash = "sha256:bdcdfe070509e137846108056931e3738ad3225fcb31ed1496e9368690c3036d"}, + {file = "docling_parse-2.1.0-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:9bcb089b52fe2e8b414b7a6e812d8402c3a7c664c30c71d71fb6293605ea71cc"}, + {file = "docling_parse-2.1.0-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:430471c51ddb44f180a2350955d5f3e6a507449165e062e6d2bf94a77e3a9ce3"}, + {file = "docling_parse-2.1.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:f6b9d5883af783516861732eacd03cd37920c1ec4e16ad65b8ddeface8df05a3"}, + {file = "docling_parse-2.1.0-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:2a851f28cd61ebe1a94ae9f076ae33e228a80f2c216e7fe558540d6aca22a31a"}, + {file = "docling_parse-2.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fdd7e07d836d39f5fc0703ebd39ae83a453f449af8508937da6374c12a237084"}, + {file = "docling_parse-2.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6a3b53a03e8b4d693398f826ed4212bb5903dd557f8a33753248f83762130af8"}, + {file = "docling_parse-2.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:92e5c66368bd2b316c7f5b8a55a82052037c1e3b182263628e157fea0d8c92ca"}, + {file = "docling_parse-2.1.0-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:55f720106faf7999d221cc198bd2e22336aa98f46b3456100ec8ea42f6c90e85"}, + {file = "docling_parse-2.1.0-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:e0da2c524d2dce0bd5d3e145964e21dc3ab56c58f2c8940e4aa8e62863a393da"}, + {file = "docling_parse-2.1.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:691f4f1753b59c3bb4cca0c2ad87fd26f59223387cdc0ee3a3d8d6d849793625"}, + {file = "docling_parse-2.1.0-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:fadda03667fe9e52f3be92ed6f8ce3d8f7209358b755a5950fd0348de79141d0"}, + {file = "docling_parse-2.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ef354a061a7e57f20baf56f8e8d64b94876dae8d098ddd0d941207d81e8b8f80"}, + {file = "docling_parse-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5470b110f35a0a30231607bb1a9ca4e2cb3bf2257d67b29608caa71c553b8a4"}, + {file = "docling_parse-2.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:21a97cfa79179c875d451938b775685f382a47be8f468720e743e4acfd13755e"}, + {file = "docling_parse-2.1.0-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:72633ec66e5391479260b99fd1d2ff8abd029e6dbe9782c5bd7583037cdc8018"}, + {file = "docling_parse-2.1.0-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:4f29321823fdde287b8986ad23b034de4d09948f4dad80a01c4b853dc923091e"}, + {file = "docling_parse-2.1.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:728ef624ebd487d12872af4ace05a8a25ce52a4debd9da1d870b96e5a2defb8a"}, + {file = "docling_parse-2.1.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:738820a424d27409da2b39af705dd3aa5dc4090f980638ff4f49865e5444c958"}, + {file = "docling_parse-2.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7360bdc2aacd463076799984c989669d1711295a643d2f4be8033150c809b33"}, + {file = "docling_parse-2.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3a0853dd61aeb381560fcc8e8bf8a444992478ad5b9895932627de0dd14000d"}, + {file = "docling_parse-2.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:4ce35cc16ff34f23679c50dd3dfb199df10a4803b17e95f91595abd14232c5af"}, + {file = "docling_parse-2.1.0-cp313-cp313-macosx_13_0_arm64.whl", hash = "sha256:eb38e62bb4202025d8a5a2154cd383db13259707eb753307c7bf9f446d519364"}, + {file = "docling_parse-2.1.0-cp313-cp313-macosx_13_0_x86_64.whl", hash = "sha256:d76b5888cbad92410ec92c6deadf1c1a9467f4498c697a3330eacc51e0f6a5c4"}, + {file = "docling_parse-2.1.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:610f64eba191a0501fb09bcad6b34b46f0d58179d5ef0e7071356ee35bc6b558"}, + {file = "docling_parse-2.1.0-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:f15061e05ebbf15f723e0fedc26b4e473a6399e2890d9475a21a930eb61f1e93"}, + {file = "docling_parse-2.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a2089530a3751dfa71ee0fa37d585fcb9496f1400e642b582d99f85afe79e851"}, + {file = "docling_parse-2.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1ae020256c3593028011fda0af1de417329c868037698681a58a7d0a1e1a194"}, + {file = "docling_parse-2.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:5215ff823fcc2e65bd924731b0de16dbdd507424fce32745e15ac54fe059a045"}, + {file = "docling_parse-2.1.0-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:0e002df567f09f0dd982cea0c17f287d55a0b953d2b13ee9bcc51a1c2e306cc0"}, + {file = "docling_parse-2.1.0-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:9eef2c4c47586410083b6db9210bc2cef12af2eb67f8c88dcd2b46ca5010482b"}, + {file = "docling_parse-2.1.0-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:944cf36e4b9db0b1477e71f891321ea522498c8b9039a2acff52d85feed2f95e"}, + {file = "docling_parse-2.1.0-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:68c357f66c0899ea1deb95a84ad929aaba10bc68bee2606563b1aae62d448186"}, + {file = "docling_parse-2.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26e98077db92fce59fe356a411c944525182f3cd8e9b3d228787439eb5429c63"}, + {file = "docling_parse-2.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6dcacc29528f25ab65cf366829fad2584d2f23abbcf792e258a1de4ee0685f09"}, + {file = "docling_parse-2.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:9c7af4d68ca51ed7fa1170a7715a4ae97271cf30fce8b623bc8cce92aaa253e2"}, + {file = "docling_parse-2.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:2316a5f4fd77ac673dace32cc6011f56fd1815941dc651df244a52cfd0bc70a6"}, + {file = "docling_parse-2.1.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:540f4b7760addc6bf83358a6fb8853c50048bf9687ea0e89a79d515f75a26ac0"}, + {file = "docling_parse-2.1.0.tar.gz", hash = "sha256:e8d39286f46842ba0a99f383b28712b7c8198a18be71b69fe2d4cf5105daa7f3"}, ] [package.dependencies] @@ -1648,13 +1656,13 @@ test = ["flaky", "ipyparallel", "pre-commit", "pytest (>=7.0)", "pytest-asyncio [[package]] name = "ipython" -version = "8.29.0" +version = "8.18.1" description = "IPython: Productive Interactive Computing" optional = false -python-versions = ">=3.10" +python-versions = ">=3.9" files = [ - {file = "ipython-8.29.0-py3-none-any.whl", hash = "sha256:0188a1bd83267192123ccea7f4a8ed0a78910535dbaa3f37671dca76ebd429c8"}, - {file = "ipython-8.29.0.tar.gz", hash = "sha256:40b60e15b22591450eef73e40a027cf77bd652e757523eebc5bd7c7c498290eb"}, + {file = "ipython-8.18.1-py3-none-any.whl", hash = "sha256:e8267419d72d81955ec1177f8a29aaa90ac80ad647499201119e2f05e99aa397"}, + {file = "ipython-8.18.1.tar.gz", hash = "sha256:ca6f079bb33457c66e233e4580ebfc4128855b4cf6370dddd73842a9563e8a27"}, ] [package.dependencies] @@ -1663,26 +1671,25 @@ decorator = "*" exceptiongroup = {version = "*", markers = "python_version < \"3.11\""} jedi = ">=0.16" matplotlib-inline = "*" -pexpect = {version = ">4.3", markers = "sys_platform != \"win32\" and sys_platform != \"emscripten\""} +pexpect = {version = ">4.3", markers = "sys_platform != \"win32\""} prompt-toolkit = ">=3.0.41,<3.1.0" pygments = ">=2.4.0" stack-data = "*" -traitlets = ">=5.13.0" -typing-extensions = {version = ">=4.6", markers = "python_version < \"3.12\""} +traitlets = ">=5" +typing-extensions = {version = "*", markers = "python_version < \"3.10\""} [package.extras] -all = ["ipython[black,doc,kernel,matplotlib,nbconvert,nbformat,notebook,parallel,qtconsole]", "ipython[test,test-extra]"] +all = ["black", "curio", "docrepr", "exceptiongroup", "ipykernel", "ipyparallel", "ipywidgets", "matplotlib", "matplotlib (!=3.2.0)", "nbconvert", "nbformat", "notebook", "numpy (>=1.22)", "pandas", "pickleshare", "pytest (<7)", "pytest (<7.1)", "pytest-asyncio (<0.22)", "qtconsole", "setuptools (>=18.5)", "sphinx (>=1.3)", "sphinx-rtd-theme", "stack-data", "testpath", "trio", "typing-extensions"] black = ["black"] -doc = ["docrepr", "exceptiongroup", "intersphinx-registry", "ipykernel", "ipython[test]", "matplotlib", "setuptools (>=18.5)", "sphinx (>=1.3)", "sphinx-rtd-theme", "sphinxcontrib-jquery", "tomli", "typing-extensions"] +doc = ["docrepr", "exceptiongroup", "ipykernel", "matplotlib", "pickleshare", "pytest (<7)", "pytest (<7.1)", "pytest-asyncio (<0.22)", "setuptools (>=18.5)", "sphinx (>=1.3)", "sphinx-rtd-theme", "stack-data", "testpath", "typing-extensions"] kernel = ["ipykernel"] -matplotlib = ["matplotlib"] nbconvert = ["nbconvert"] nbformat = ["nbformat"] notebook = ["ipywidgets", "notebook"] parallel = ["ipyparallel"] qtconsole = ["qtconsole"] -test = ["packaging", "pickleshare", "pytest", "pytest-asyncio (<0.22)", "testpath"] -test-extra = ["curio", "ipython[test]", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.23)", "pandas", "trio"] +test = ["pickleshare", "pytest (<7.1)", "pytest-asyncio (<0.22)", "testpath"] +test-extra = ["curio", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.22)", "pandas", "pickleshare", "pytest (<7.1)", "pytest-asyncio (<0.22)", "testpath", "trio"] [[package]] name = "ipywidgets" @@ -1936,6 +1943,7 @@ files = [ ] [package.dependencies] +importlib-metadata = {version = ">=4.8.3", markers = "python_version < \"3.10\""} jupyter-core = ">=4.12,<5.0.dev0 || >=5.1.dev0" python-dateutil = ">=2.8.2" pyzmq = ">=23.0" @@ -2367,6 +2375,9 @@ files = [ {file = "markdown-3.7.tar.gz", hash = "sha256:2ae2471477cfd02dbbf038d5d9bc226d40def84b4fe2986e49b59b6b472bbed2"}, ] +[package.dependencies] +importlib-metadata = {version = ">=4.4", markers = "python_version < \"3.10\""} + [package.extras] docs = ["mdx-gh-links (>=0.2)", "mkdocs (>=1.5)", "mkdocs-gen-files", "mkdocs-literate-nav", "mkdocs-nature (>=0.6)", "mkdocs-section-index", "mkdocstrings[python]"] testing = ["coverage", "pyyaml"] @@ -2608,6 +2619,7 @@ files = [ click = ">=7.0" colorama = {version = ">=0.4", markers = "platform_system == \"Windows\""} ghp-import = ">=1.0" +importlib-metadata = {version = ">=4.4", markers = "python_version < \"3.10\""} jinja2 = ">=2.11.1" markdown = ">=3.3.6" markupsafe = ">=2.0.1" @@ -2650,6 +2662,7 @@ files = [ ] [package.dependencies] +importlib-metadata = {version = ">=4.3", markers = "python_version < \"3.10\""} mergedeep = ">=1.3.4" platformdirs = ">=2.2.0" pyyaml = ">=5.1" @@ -2970,6 +2983,7 @@ files = [ beautifulsoup4 = "*" bleach = "!=5.0.0" defusedxml = "*" +importlib-metadata = {version = ">=3.6", markers = "python_version < \"3.10\""} jinja2 = ">=3.0" jupyter-core = ">=4.7" jupyterlab-pygments = "*" @@ -3046,21 +3060,20 @@ files = [ [[package]] name = "networkx" -version = "3.4.2" +version = "3.2.1" description = "Python package for creating and manipulating graphs and networks" optional = false -python-versions = ">=3.10" +python-versions = ">=3.9" files = [ - {file = "networkx-3.4.2-py3-none-any.whl", hash = "sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f"}, - {file = "networkx-3.4.2.tar.gz", hash = "sha256:307c3669428c5362aab27c8a1260aa8f47c4e91d3891f48be0141738d8d053e1"}, + {file = "networkx-3.2.1-py3-none-any.whl", hash = "sha256:f18c69adc97877c42332c170849c96cefa91881c99a7cb3e95b7c659ebdc1ec2"}, + {file = "networkx-3.2.1.tar.gz", hash = "sha256:9f1bb5cf3409bf324e0a722c20bdb4c20ee39bf1c30ce8ae499c8502b0b5e0c6"}, ] [package.extras] -default = ["matplotlib (>=3.7)", "numpy (>=1.24)", "pandas (>=2.0)", "scipy (>=1.10,!=1.11.0,!=1.11.1)"] -developer = ["changelist (==0.5)", "mypy (>=1.1)", "pre-commit (>=3.2)", "rtoml"] -doc = ["intersphinx-registry", "myst-nb (>=1.1)", "numpydoc (>=1.8.0)", "pillow (>=9.4)", "pydata-sphinx-theme (>=0.15)", "sphinx (>=7.3)", "sphinx-gallery (>=0.16)", "texext (>=0.6.7)"] -example = ["cairocffi (>=1.7)", "contextily (>=1.6)", "igraph (>=0.11)", "momepy (>=0.7.2)", "osmnx (>=1.9)", "scikit-learn (>=1.5)", "seaborn (>=0.13)"] -extra = ["lxml (>=4.6)", "pydot (>=3.0.1)", "pygraphviz (>=1.14)", "sympy (>=1.10)"] +default = ["matplotlib (>=3.5)", "numpy (>=1.22)", "pandas (>=1.4)", "scipy (>=1.9,!=1.11.0,!=1.11.1)"] +developer = ["changelist (==0.4)", "mypy (>=1.1)", "pre-commit (>=3.2)", "rtoml"] +doc = ["nb2plots (>=0.7)", "nbconvert (<7.9)", "numpydoc (>=1.6)", "pillow (>=9.4)", "pydata-sphinx-theme (>=0.14)", "sphinx (>=7)", "sphinx-gallery (>=0.14)", "texext (>=0.6.7)"] +extra = ["lxml (>=4.6)", "pydot (>=1.4.2)", "pygraphviz (>=1.11)", "sympy (>=1.10)"] test = ["pytest (>=7.2)", "pytest-cov (>=4.0)"] [[package]] @@ -3532,6 +3545,22 @@ files = [ {file = "nvidia_nvtx_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:641dccaaa1139f3ffb0d3164b4b84f9d253397e38246a4f2f36728b48566d485"}, ] +[[package]] +name = "ocrmac" +version = "1.0.0" +description = "A python wrapper to extract text from images on a mac system. Uses the vision framework from Apple." +optional = true +python-versions = ">=3.6" +files = [ + {file = "ocrmac-1.0.0-py2.py3-none-any.whl", hash = "sha256:0b5a072aa23a9ead48132cb2d595b680aa6c3c5a6cb69525155e35ca95610c3a"}, + {file = "ocrmac-1.0.0.tar.gz", hash = "sha256:5b299e9030c973d1f60f82db000d6c2e5ff271601878c7db0885e850597d1d2e"}, +] + +[package.dependencies] +Click = ">=7.0" +pillow = "*" +pyobjc-framework-Vision = "*" + [[package]] name = "opencv-python-headless" version = "4.10.0.84" @@ -3551,9 +3580,11 @@ files = [ [package.dependencies] numpy = [ {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, + {version = ">=1.21.0", markers = "python_version == \"3.9\" and platform_system == \"Darwin\" and platform_machine == \"arm64\""}, + {version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""}, {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""}, {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""}, - {version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""}, + {version = ">=1.19.3", markers = "platform_system == \"Linux\" and platform_machine == \"aarch64\" and python_version >= \"3.8\" and python_version < \"3.10\" or python_version > \"3.9\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_system != \"Darwin\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_machine != \"arm64\" and python_version < \"3.10\""}, ] [[package]] @@ -3751,13 +3782,13 @@ xml = ["lxml (>=4.9.2)"] [[package]] name = "pandas-stubs" -version = "2.2.3.241009" +version = "2.2.2.240807" description = "Type annotations for pandas" optional = false -python-versions = ">=3.10" +python-versions = ">=3.9" files = [ - {file = "pandas_stubs-2.2.3.241009-py3-none-any.whl", hash = "sha256:3a6f8f142105a42550be677ba741ba532621f4e0acad2155c0e7b2450f114cfa"}, - {file = "pandas_stubs-2.2.3.241009.tar.gz", hash = "sha256:d4ab618253f0acf78a5d0d2bfd6dffdd92d91a56a69bdc8144e5a5c6d25be3b5"}, + {file = "pandas_stubs-2.2.2.240807-py3-none-any.whl", hash = "sha256:893919ad82be4275f0d07bb47a95d08bae580d3fdea308a7acfcb3f02e76186e"}, + {file = "pandas_stubs-2.2.2.240807.tar.gz", hash = "sha256:64a559725a57a449f46225fbafc422520b7410bff9252b661a225b5559192a93"}, ] [package.dependencies] @@ -4492,6 +4523,7 @@ mccabe = ">=0.6,<0.8" platformdirs = ">=2.2.0" tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} tomlkit = ">=0.10.1" +typing-extensions = {version = ">=3.10.0", markers = "python_version < \"3.10\""} [package.extras] spelling = ["pyenchant (>=3.2,<4.0)"] @@ -4540,6 +4572,102 @@ bulk-writer = ["azure-storage-blob", "minio (>=7.0.0)", "pyarrow (>=12.0.0)", "r dev = ["black", "grpcio (==1.62.2)", "grpcio-testing (==1.62.2)", "grpcio-tools (==1.62.2)", "pytest (>=5.3.4)", "pytest-cov (>=2.8.1)", "pytest-timeout (>=1.3.4)", "ruff (>0.4.0)"] model = ["milvus-model (>=0.1.0)"] +[[package]] +name = "pyobjc-core" +version = "10.3.1" +description = "Python<->ObjC Interoperability Module" +optional = true +python-versions = ">=3.8" +files = [ + {file = "pyobjc_core-10.3.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ea46d2cda17921e417085ac6286d43ae448113158afcf39e0abe484c58fb3d78"}, + {file = "pyobjc_core-10.3.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:899d3c84d2933d292c808f385dc881a140cf08632907845043a333a9d7c899f9"}, + {file = "pyobjc_core-10.3.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:6ff5823d13d0a534cdc17fa4ad47cf5bee4846ce0fd27fc40012e12b46db571b"}, + {file = "pyobjc_core-10.3.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:2581e8e68885bcb0e11ec619e81ef28e08ee3fac4de20d8cc83bc5af5bcf4a90"}, + {file = "pyobjc_core-10.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ea98d4c2ec39ca29e62e0327db21418696161fb138ee6278daf2acbedf7ce504"}, + {file = "pyobjc_core-10.3.1-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:4c179c26ee2123d0aabffb9dbc60324b62b6f8614fb2c2328b09386ef59ef6d8"}, + {file = "pyobjc_core-10.3.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:cb901fce65c9be420c40d8a6ee6fff5ff27c6945f44fd7191989b982baa66dea"}, + {file = "pyobjc_core-10.3.1.tar.gz", hash = "sha256:b204a80ccc070f9ab3f8af423a3a25a6fd787e228508d00c4c30f8ac538ba720"}, +] + +[[package]] +name = "pyobjc-framework-cocoa" +version = "10.3.1" +description = "Wrappers for the Cocoa frameworks on macOS" +optional = true +python-versions = ">=3.8" +files = [ + {file = "pyobjc_framework_Cocoa-10.3.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:4cb4f8491ab4d9b59f5187e42383f819f7a46306a4fa25b84f126776305291d1"}, + {file = "pyobjc_framework_Cocoa-10.3.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5f31021f4f8fdf873b57a97ee1f3c1620dbe285e0b4eaed73dd0005eb72fd773"}, + {file = "pyobjc_framework_Cocoa-10.3.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:11b4e0bad4bbb44a4edda128612f03cdeab38644bbf174de0c13129715497296"}, + {file = "pyobjc_framework_Cocoa-10.3.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:de5e62e5ccf2871a94acf3bf79646b20ea893cc9db78afa8d1fe1b0d0f7cbdb0"}, + {file = "pyobjc_framework_Cocoa-10.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c5af24610ab639bd1f521ce4500484b40787f898f691b7a23da3339e6bc8b90"}, + {file = "pyobjc_framework_Cocoa-10.3.1-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:a7151186bb7805deea434fae9a4423335e6371d105f29e73cc2036c6779a9dbc"}, + {file = "pyobjc_framework_Cocoa-10.3.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:743d2a1ac08027fd09eab65814c79002a1d0421d7c0074ffd1217b6560889744"}, + {file = "pyobjc_framework_cocoa-10.3.1.tar.gz", hash = "sha256:1cf20714daaa986b488fb62d69713049f635c9d41a60c8da97d835710445281a"}, +] + +[package.dependencies] +pyobjc-core = ">=10.3.1" + +[[package]] +name = "pyobjc-framework-coreml" +version = "10.3.1" +description = "Wrappers for the framework CoreML on macOS" +optional = true +python-versions = ">=3.8" +files = [ + {file = "pyobjc_framework_CoreML-10.3.1-cp36-abi3-macosx_10_13_universal2.whl", hash = "sha256:c1fdcc0487807afa9cd0f88f25697e0e2e093d0219e8e1aa42aa3674dd78c2cb"}, + {file = "pyobjc_framework_CoreML-10.3.1-cp36-abi3-macosx_10_9_universal2.whl", hash = "sha256:21c87e84c807b5dbe61e0f016d9aefa32d3212f175cc4b976b5c08770be7a58c"}, + {file = "pyobjc_framework_CoreML-10.3.1-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:a0877aed5d4cdbb63d1246cd5384c09d78a0667e83c435a1257d10017c11c1a4"}, + {file = "pyobjc_framework_CoreML-10.3.1-cp36-abi3-macosx_11_0_universal2.whl", hash = "sha256:4bd3f1acfb3245727727b71cbcf7d21a33d7e00fa488e41ad01527764b969b92"}, + {file = "pyobjc_framework_coreml-10.3.1.tar.gz", hash = "sha256:6b7091142cfaafee76f1a804329e7a4e3aeca921eea8644e9ceba4cc2751f705"}, +] + +[package.dependencies] +pyobjc-core = ">=10.3.1" +pyobjc-framework-Cocoa = ">=10.3.1" + +[[package]] +name = "pyobjc-framework-quartz" +version = "10.3.1" +description = "Wrappers for the Quartz frameworks on macOS" +optional = true +python-versions = ">=3.8" +files = [ + {file = "pyobjc_framework_Quartz-10.3.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5ef4fd315ed2bc42ef77fdeb2bae28a88ec986bd7b8079a87ba3b3475348f96e"}, + {file = "pyobjc_framework_Quartz-10.3.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:96578d4a3e70164efe44ad7dc320ecd4e211758ffcde5dcd694de1bbdfe090a4"}, + {file = "pyobjc_framework_Quartz-10.3.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:ca35f92486869a41847a1703bb176aab8a53dbfd8e678d1f4d68d8e6e1581c71"}, + {file = "pyobjc_framework_Quartz-10.3.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:00a0933267e3a46ea4afcc35d117b2efb920f06de797fa66279c52e7057e3590"}, + {file = "pyobjc_framework_Quartz-10.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a161bedb4c5257a02ad56a910cd7eefb28bdb0ea78607df0d70ed4efe4ea54c1"}, + {file = "pyobjc_framework_Quartz-10.3.1-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:d7a8028e117a94923a511944bfa9daf9744e212f06cf89010c60934a479863a5"}, + {file = "pyobjc_framework_Quartz-10.3.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:de00c983b3267eb26fa42c6ed9f15e2bf006bde8afa7fe2b390646aa21a5d6fc"}, + {file = "pyobjc_framework_quartz-10.3.1.tar.gz", hash = "sha256:b6d7e346d735c9a7f147cd78e6da79eeae416a0b7d3874644c83a23786c6f886"}, +] + +[package.dependencies] +pyobjc-core = ">=10.3.1" +pyobjc-framework-Cocoa = ">=10.3.1" + +[[package]] +name = "pyobjc-framework-vision" +version = "10.3.1" +description = "Wrappers for the framework Vision on macOS" +optional = true +python-versions = ">=3.8" +files = [ + {file = "pyobjc_framework_Vision-10.3.1-cp36-abi3-macosx_10_13_universal2.whl", hash = "sha256:dff3582678930461a0bb11bf070854d49f6944a851dc89edc63fac93c75ddf39"}, + {file = "pyobjc_framework_Vision-10.3.1-cp36-abi3-macosx_10_9_universal2.whl", hash = "sha256:32626183c51674efb3b5738e2884c3fea37edca010117cf71bd72cb3c49c869a"}, + {file = "pyobjc_framework_Vision-10.3.1-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:2473b346a112c51ac485184305bd13c402e0db45f2df3d277315bd49efba18e9"}, + {file = "pyobjc_framework_Vision-10.3.1-cp36-abi3-macosx_11_0_universal2.whl", hash = "sha256:4302e2c5f68c9667ecd4273809cbc4611af6368b123d69596e5b088f1b1aa16b"}, + {file = "pyobjc_framework_vision-10.3.1.tar.gz", hash = "sha256:aa071656d395afc2d624600a9f30d6a3344aa747bf37f613ff3972158c40881c"}, +] + +[package.dependencies] +pyobjc-core = ">=10.3.1" +pyobjc-framework-Cocoa = ">=10.3.1" +pyobjc-framework-CoreML = ">=10.3.1" +pyobjc-framework-Quartz = ">=10.3.1" + [[package]] name = "pypdfium2" version = "4.30.0" @@ -5663,53 +5791,45 @@ tests = ["black (>=24.3.0)", "matplotlib (>=3.3.4)", "mypy (>=1.9)", "numpydoc ( [[package]] name = "scipy" -version = "1.14.1" +version = "1.13.1" description = "Fundamental algorithms for scientific computing in Python" optional = false -python-versions = ">=3.10" +python-versions = ">=3.9" files = [ - {file = "scipy-1.14.1-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:b28d2ca4add7ac16ae8bb6632a3c86e4b9e4d52d3e34267f6e1b0c1f8d87e389"}, - {file = "scipy-1.14.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:d0d2821003174de06b69e58cef2316a6622b60ee613121199cb2852a873f8cf3"}, - {file = "scipy-1.14.1-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:8bddf15838ba768bb5f5083c1ea012d64c9a444e16192762bd858f1e126196d0"}, - {file = "scipy-1.14.1-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:97c5dddd5932bd2a1a31c927ba5e1463a53b87ca96b5c9bdf5dfd6096e27efc3"}, - {file = "scipy-1.14.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ff0a7e01e422c15739ecd64432743cf7aae2b03f3084288f399affcefe5222d"}, - {file = "scipy-1.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8e32dced201274bf96899e6491d9ba3e9a5f6b336708656466ad0522d8528f69"}, - {file = "scipy-1.14.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8426251ad1e4ad903a4514712d2fa8fdd5382c978010d1c6f5f37ef286a713ad"}, - {file = "scipy-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:a49f6ed96f83966f576b33a44257d869756df6cf1ef4934f59dd58b25e0327e5"}, - {file = "scipy-1.14.1-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:2da0469a4ef0ecd3693761acbdc20f2fdeafb69e6819cc081308cc978153c675"}, - {file = "scipy-1.14.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:c0ee987efa6737242745f347835da2cc5bb9f1b42996a4d97d5c7ff7928cb6f2"}, - {file = "scipy-1.14.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3a1b111fac6baec1c1d92f27e76511c9e7218f1695d61b59e05e0fe04dc59617"}, - {file = "scipy-1.14.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:8475230e55549ab3f207bff11ebfc91c805dc3463ef62eda3ccf593254524ce8"}, - {file = "scipy-1.14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:278266012eb69f4a720827bdd2dc54b2271c97d84255b2faaa8f161a158c3b37"}, - {file = "scipy-1.14.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fef8c87f8abfb884dac04e97824b61299880c43f4ce675dd2cbeadd3c9b466d2"}, - {file = "scipy-1.14.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b05d43735bb2f07d689f56f7b474788a13ed8adc484a85aa65c0fd931cf9ccd2"}, - {file = "scipy-1.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:716e389b694c4bb564b4fc0c51bc84d381735e0d39d3f26ec1af2556ec6aad94"}, - {file = "scipy-1.14.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:631f07b3734d34aced009aaf6fedfd0eb3498a97e581c3b1e5f14a04164a456d"}, - {file = "scipy-1.14.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:af29a935803cc707ab2ed7791c44288a682f9c8107bc00f0eccc4f92c08d6e07"}, - {file = "scipy-1.14.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:2843f2d527d9eebec9a43e6b406fb7266f3af25a751aa91d62ff416f54170bc5"}, - {file = "scipy-1.14.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:eb58ca0abd96911932f688528977858681a59d61a7ce908ffd355957f7025cfc"}, - {file = "scipy-1.14.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:30ac8812c1d2aab7131a79ba62933a2a76f582d5dbbc695192453dae67ad6310"}, - {file = "scipy-1.14.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f9ea80f2e65bdaa0b7627fb00cbeb2daf163caa015e59b7516395fe3bd1e066"}, - {file = "scipy-1.14.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:edaf02b82cd7639db00dbff629995ef185c8df4c3ffa71a5562a595765a06ce1"}, - {file = "scipy-1.14.1-cp312-cp312-win_amd64.whl", hash = "sha256:2ff38e22128e6c03ff73b6bb0f85f897d2362f8c052e3b8ad00532198fbdae3f"}, - {file = "scipy-1.14.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1729560c906963fc8389f6aac023739ff3983e727b1a4d87696b7bf108316a79"}, - {file = "scipy-1.14.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:4079b90df244709e675cdc8b93bfd8a395d59af40b72e339c2287c91860deb8e"}, - {file = "scipy-1.14.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:e0cf28db0f24a38b2a0ca33a85a54852586e43cf6fd876365c86e0657cfe7d73"}, - {file = "scipy-1.14.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:0c2f95de3b04e26f5f3ad5bb05e74ba7f68b837133a4492414b3afd79dfe540e"}, - {file = "scipy-1.14.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b99722ea48b7ea25e8e015e8341ae74624f72e5f21fc2abd45f3a93266de4c5d"}, - {file = "scipy-1.14.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5149e3fd2d686e42144a093b206aef01932a0059c2a33ddfa67f5f035bdfe13e"}, - {file = "scipy-1.14.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e4f5a7c49323533f9103d4dacf4e4f07078f360743dec7f7596949149efeec06"}, - {file = "scipy-1.14.1-cp313-cp313-win_amd64.whl", hash = "sha256:baff393942b550823bfce952bb62270ee17504d02a1801d7fd0719534dfb9c84"}, - {file = "scipy-1.14.1.tar.gz", hash = "sha256:5a275584e726026a5699459aa72f828a610821006228e841b94275c4a7c08417"}, + {file = "scipy-1.13.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:20335853b85e9a49ff7572ab453794298bcf0354d8068c5f6775a0eabf350aca"}, + {file = "scipy-1.13.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:d605e9c23906d1994f55ace80e0125c587f96c020037ea6aa98d01b4bd2e222f"}, + {file = "scipy-1.13.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cfa31f1def5c819b19ecc3a8b52d28ffdcc7ed52bb20c9a7589669dd3c250989"}, + {file = "scipy-1.13.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f26264b282b9da0952a024ae34710c2aff7d27480ee91a2e82b7b7073c24722f"}, + {file = "scipy-1.13.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:eccfa1906eacc02de42d70ef4aecea45415f5be17e72b61bafcfd329bdc52e94"}, + {file = "scipy-1.13.1-cp310-cp310-win_amd64.whl", hash = "sha256:2831f0dc9c5ea9edd6e51e6e769b655f08ec6db6e2e10f86ef39bd32eb11da54"}, + {file = "scipy-1.13.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:27e52b09c0d3a1d5b63e1105f24177e544a222b43611aaf5bc44d4a0979e32f9"}, + {file = "scipy-1.13.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:54f430b00f0133e2224c3ba42b805bfd0086fe488835effa33fa291561932326"}, + {file = "scipy-1.13.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e89369d27f9e7b0884ae559a3a956e77c02114cc60a6058b4e5011572eea9299"}, + {file = "scipy-1.13.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a78b4b3345f1b6f68a763c6e25c0c9a23a9fd0f39f5f3d200efe8feda560a5fa"}, + {file = "scipy-1.13.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:45484bee6d65633752c490404513b9ef02475b4284c4cfab0ef946def50b3f59"}, + {file = "scipy-1.13.1-cp311-cp311-win_amd64.whl", hash = "sha256:5713f62f781eebd8d597eb3f88b8bf9274e79eeabf63afb4a737abc6c84ad37b"}, + {file = "scipy-1.13.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5d72782f39716b2b3509cd7c33cdc08c96f2f4d2b06d51e52fb45a19ca0c86a1"}, + {file = "scipy-1.13.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:017367484ce5498445aade74b1d5ab377acdc65e27095155e448c88497755a5d"}, + {file = "scipy-1.13.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:949ae67db5fa78a86e8fa644b9a6b07252f449dcf74247108c50e1d20d2b4627"}, + {file = "scipy-1.13.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de3ade0e53bc1f21358aa74ff4830235d716211d7d077e340c7349bc3542e884"}, + {file = "scipy-1.13.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2ac65fb503dad64218c228e2dc2d0a0193f7904747db43014645ae139c8fad16"}, + {file = "scipy-1.13.1-cp312-cp312-win_amd64.whl", hash = "sha256:cdd7dacfb95fea358916410ec61bbc20440f7860333aee6d882bb8046264e949"}, + {file = "scipy-1.13.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:436bbb42a94a8aeef855d755ce5a465479c721e9d684de76bf61a62e7c2b81d5"}, + {file = "scipy-1.13.1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:8335549ebbca860c52bf3d02f80784e91a004b71b059e3eea9678ba994796a24"}, + {file = "scipy-1.13.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d533654b7d221a6a97304ab63c41c96473ff04459e404b83275b60aa8f4b7004"}, + {file = "scipy-1.13.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:637e98dcf185ba7f8e663e122ebf908c4702420477ae52a04f9908707456ba4d"}, + {file = "scipy-1.13.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a014c2b3697bde71724244f63de2476925596c24285c7a637364761f8710891c"}, + {file = "scipy-1.13.1-cp39-cp39-win_amd64.whl", hash = "sha256:392e4ec766654852c25ebad4f64e4e584cf19820b980bc04960bca0b0cd6eaa2"}, + {file = "scipy-1.13.1.tar.gz", hash = "sha256:095a87a0312b08dfd6a6155cbbd310a8c51800fc931b8c0b84003014b874ed3c"}, ] [package.dependencies] -numpy = ">=1.23.5,<2.3" +numpy = ">=1.22.4,<2.3" [package.extras] -dev = ["cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy (==1.10.0)", "pycodestyle", "pydevtool", "rich-click", "ruff (>=0.0.292)", "types-psutil", "typing_extensions"] -doc = ["jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.13.1)", "jupytext", "matplotlib (>=3.5)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0,<=7.3.7)", "sphinx-design (>=0.4.0)"] -test = ["Cython", "array-api-strict (>=2.0)", "asv", "gmpy2", "hypothesis (>=6.30)", "meson", "mpmath", "ninja", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] +dev = ["cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy", "pycodestyle", "pydevtool", "rich-click", "ruff", "types-psutil", "typing_extensions"] +doc = ["jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.12.0)", "jupytext", "matplotlib (>=3.5)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0)", "sphinx-design (>=0.4.0)"] +test = ["array-api-strict", "asv", "gmpy2", "hypothesis (>=6.30)", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] [[package]] name = "secretstorage" @@ -6027,13 +6147,13 @@ files = [ [[package]] name = "tifffile" -version = "2024.9.20" +version = "2024.8.30" description = "Read and write TIFF files" optional = false -python-versions = ">=3.10" +python-versions = ">=3.9" files = [ - {file = "tifffile-2024.9.20-py3-none-any.whl", hash = "sha256:c54dc85bc1065d972cb8a6ffb3181389d597876aa80177933459733e4ed243dd"}, - {file = "tifffile-2024.9.20.tar.gz", hash = "sha256:3fbf3be2f995a7051a8ae05a4be70c96fc0789f22ed6f1c4104c973cf68a640b"}, + {file = "tifffile-2024.8.30-py3-none-any.whl", hash = "sha256:8bc59a8f02a2665cd50a910ec64961c5373bee0b8850ec89d3b7b485bf7be7ad"}, + {file = "tifffile-2024.8.30.tar.gz", hash = "sha256:2c9508fe768962e30f87def61819183fb07692c258cb175b3c114828368485a4"}, ] [package.dependencies] @@ -7232,10 +7352,10 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", type = ["pytest-mypy"] [extras] -paddleocr = [] +ocrmac = ["ocrmac"] tesserocr = ["tesserocr"] [metadata] lock-version = "2.0" -python-versions = "^3.10" -content-hash = "679c46aadb43260cba2dcfa91648456334dbce5d0fc7f515504ba4e555b4970c" +python-versions = "^3.9" +content-hash = "de2354d1c01d11017a742eb0bf826b08aaaeec5e84f62f0e2101c3bc685b7a6f" diff --git a/pyproject.toml b/pyproject.toml index 6809618b..70d8e4c3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "docling" -version = "2.6.0" # DO NOT EDIT, updated automatically +version = "2.7.0" # DO NOT EDIT, updated automatically description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications." authors = ["Christoph Auer ", "Michele Dolfi ", "Maxim Lysak ", "Nikos Livathinos ", "Ahmed Nassar ", "Panos Vagenas ", "Peter Staar "] license = "MIT" @@ -24,10 +24,10 @@ packages = [{include = "docling"}] ###################### # actual dependencies: ###################### -python = "^3.10" +python = "^3.9" pydantic = "^2.0.0" docling-core = "^2.4.0" -docling-ibm-models = "^2.0.3" +docling-ibm-models = "^2.0.6" deepsearch-glm = "^0.26.1" filetype = "^1.2.0" pypdfium2 = "^4.30.0" @@ -36,10 +36,10 @@ huggingface_hub = ">=0.23,<1" requests = "^2.32.3" easyocr = "^1.7" tesserocr = { version = "^2.7.1", optional = true } -docling-parse = "^2.0.2" +docling-parse = "^2.0.5" certifi = ">=2024.7.4" rtree = "^1.3.0" -scipy = "^1.14.1" +scipy = "^1.6.0" pyarrow = "^16.1.0" typer = "^0.12.5" python-docx = "^1.1.2" @@ -48,6 +48,7 @@ beautifulsoup4 = "^4.12.3" pandas = "^2.1.4" marko = "^2.1.2" openpyxl = "^3.1.5" +ocrmac = { version = "^1.0.0", markers = "sys_platform == 'darwin'", optional = true } [tool.poetry.group.dev.dependencies] black = {extras = ["jupyter"], version = "^24.4.2"} @@ -80,6 +81,12 @@ langchain-huggingface = "^0.0.3" langchain-milvus = "^0.1.4" langchain-text-splitters = "^0.2.4" +[tool.poetry.group.constraints.dependencies] +numpy = [ + { version = "^2.1.0", markers = 'python_version >= "3.13"' }, + { version = "^1.24.4", markers = 'python_version < "3.13"' }, +] + [tool.poetry.group.mac_intel] optional = true @@ -95,7 +102,7 @@ torchvision = [ [tool.poetry.extras] tesserocr = ["tesserocr"] -paddleocr = ["paddlepaddle", "paddleocr"] +ocrmac = ["ocrmac"] [tool.poetry.scripts] docling = "docling.cli.main:app" @@ -106,13 +113,13 @@ build-backend = "poetry.core.masonry.api" [tool.black] line-length = 88 -target-version = ["py310"] +target-version = ["py39"] include = '\.pyi?$' [tool.isort] profile = "black" line_length = 88 -py_version=311 +py_version=39 [tool.mypy] pretty = true @@ -131,6 +138,7 @@ module = [ "tesserocr.*", "docling_ibm_models.*", "easyocr.*", + "ocrmac.*", "deepsearch_glm.*", "lxml.*", "bs4.*", diff --git a/tests/test_e2e_ocr_conversion.py b/tests/test_e2e_ocr_conversion.py index 8da51888..99cfb26b 100644 --- a/tests/test_e2e_ocr_conversion.py +++ b/tests/test_e2e_ocr_conversion.py @@ -1,3 +1,4 @@ +import sys from pathlib import Path from typing import List @@ -6,6 +7,7 @@ from docling.datamodel.base_models import InputFormat from docling.datamodel.document import ConversionResult from docling.datamodel.pipeline_options import ( EasyOcrOptions, + OcrMacOptions, OcrOptions, PdfPipelineOptions, TesseractCliOcrOptions, @@ -62,6 +64,11 @@ def test_e2e_conversions(): PaddleOcrOptions(force_full_page_ocr=True), ] + # only works on mac + if "darwin" == sys.platform: + engines.append(OcrMacOptions()) + engines.append(OcrMacOptions(force_full_page_ocr=True)) + for ocr_options in engines: print(f"Converting with ocr_engine: {ocr_options.kind}") converter = get_converter(ocr_options=ocr_options)