From 4d7ea030da02ef7094bf8dba48f0cc9d27340133 Mon Sep 17 00:00:00 2001 From: Christoph Auer Date: Thu, 22 Aug 2024 18:56:34 +0200 Subject: [PATCH 1/2] Put safety-checks for failed parse of pages Signed-off-by: Christoph Auer --- docling/backend/docling_parse_backend.py | 11 ++- poetry.lock | 97 ++++++------------------ pyproject.toml | 2 +- 3 files changed, 35 insertions(+), 75 deletions(-) diff --git a/docling/backend/docling_parse_backend.py b/docling/backend/docling_parse_backend.py index 18f6c69e..905d3655 100644 --- a/docling/backend/docling_parse_backend.py +++ b/docling/backend/docling_parse_backend.py @@ -23,9 +23,15 @@ class DoclingParsePageBackend(PdfPageBackend): self._ppage = page_obj parsed_page = parser.parse_pdf_from_key_on_page(document_hash, page_no) - self._dpage = parsed_page["pages"][0] + + self._dpage = None + self.broken_page = "pages" not in parsed_page + if not self.broken_page: + self._dpage = parsed_page["pages"][0] def get_text_in_rect(self, bbox: BoundingBox) -> str: + if self.broken_page: + return "" # Find intersecting cells on the page text_piece = "" page_size = self.get_size() @@ -60,6 +66,9 @@ class DoclingParsePageBackend(PdfPageBackend): cells = [] cell_counter = 0 + if self.broken_page: + return cells + page_size = self.get_size() parser_width = self._dpage["width"] diff --git a/poetry.lock b/poetry.lock index 5aa8f20a..385832bf 100644 --- a/poetry.lock +++ b/poetry.lock @@ -78,17 +78,6 @@ docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphi tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"] -[[package]] -name = "bashlex" -version = "0.18" -description = "Python parser for bash" -optional = false -python-versions = ">=2.7, !=3.0, !=3.1, !=3.2, !=3.3, !=3.4" -files = [ - {file = "bashlex-0.18-py2.py3-none-any.whl", hash = "sha256:91d73a23a3e51711919c1c899083890cdecffc91d8c088942725ac13e9dcfffa"}, - {file = "bashlex-0.18.tar.gz", hash = "sha256:5bb03a01c6d5676338c36fd1028009c8ad07e7d61d8a1ce3f513b7fff52796ee"}, -] - [[package]] name = "black" version = "24.8.0" @@ -137,17 +126,6 @@ d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"] jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] uvloop = ["uvloop (>=0.15.2)"] -[[package]] -name = "bracex" -version = "2.5" -description = "Bash style brace expander." -optional = false -python-versions = ">=3.8" -files = [ - {file = "bracex-2.5-py3-none-any.whl", hash = "sha256:d2fcf4b606a82ac325471affe1706dd9bbaa3536c91ef86a31f6b766f3dad1d0"}, - {file = "bracex-2.5.tar.gz", hash = "sha256:0725da5045e8d37ea9592ab3614d8b561e22c3c5fde3964699be672e072ab611"}, -] - [[package]] name = "build" version = "1.2.1" @@ -394,34 +372,6 @@ files = [ {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, ] -[[package]] -name = "cibuildwheel" -version = "2.20.0" -description = "Build Python wheels on CI with minimal configuration." -optional = false -python-versions = ">=3.8" -files = [ - {file = "cibuildwheel-2.20.0-py3-none-any.whl", hash = "sha256:d90719cc386af540b52f3cd8c733972c1fe222bbb2a941e5f5cd87215a0c82a3"}, - {file = "cibuildwheel-2.20.0.tar.gz", hash = "sha256:5c3fd67e4417fe37021b595bedcaf0c87e5800ecf9d6096229967858a20cc6c8"}, -] - -[package.dependencies] -bashlex = "!=0.13" -bracex = "*" -certifi = "*" -filelock = "*" -packaging = ">=20.9" -platformdirs = "*" -tomli = {version = "*", markers = "python_version < \"3.11\""} -typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.11\""} - -[package.extras] -bin = ["click", "packaging (>=21.0)", "pip-tools", "pygithub", "pyyaml", "requests", "rich (>=9.6)"] -dev = ["build", "click", "jinja2", "packaging (>=21.0)", "pip-tools", "pygithub", "pytest (>=6)", "pytest-timeout", "pytest-xdist", "pyyaml", "requests", "rich (>=9.6)", "setuptools", "tomli-w", "validate-pyproject"] -docs = ["jinja2 (>=3.1.2)", "mkdocs (==1.3.1)", "mkdocs-include-markdown-plugin (==2.8.0)", "mkdocs-macros-plugin", "pymdown-extensions"] -test = ["build", "jinja2", "pytest (>=6)", "pytest-timeout", "pytest-xdist", "setuptools", "tomli-w", "validate-pyproject"] -uv = ["uv"] - [[package]] name = "cleo" version = "2.1.0" @@ -822,33 +772,34 @@ tqdm = ">=4.64.0,<5.0.0" [[package]] name = "docling-parse" -version = "1.0.0" +version = "1.1.0" description = "Simple package to extract text with coordinates from programmatic PDFs" optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "docling_parse-1.0.0-cp310-cp310-macosx_13_6_arm64.whl", hash = "sha256:068db83a192b21783cc7bc66e9d3efb9072a57edeb8c07ef1a83a93353efcc36"}, - {file = "docling_parse-1.0.0-cp310-cp310-macosx_13_6_x86_64.whl", hash = "sha256:f57f9bba3ac6a81fc30c34bb08261d7308b0a780d90cbee903821aec2f5fbd88"}, - {file = "docling_parse-1.0.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:ae02643485eb28cb54bac8523243a536751c561dddd86846a8dd9b3804a3c491"}, - {file = "docling_parse-1.0.0-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:01cbb011a337bc4dcdddb281841378af36cbce0898bdf528543c7c54d66e6ecc"}, - {file = "docling_parse-1.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fdf142dea82f0a5f5e1bcaa74cc9feeda12899077589e3eb6c728d334b43cdda"}, - {file = "docling_parse-1.0.0-cp311-cp311-macosx_13_6_arm64.whl", hash = "sha256:8834a8387a55b4082c20da184e7d09f705c17558c465da9a5f35974b19013fe5"}, - {file = "docling_parse-1.0.0-cp311-cp311-macosx_13_6_x86_64.whl", hash = "sha256:4d1cfe98a7594fac3c7afd8fb08b28e4b1aba8b317e60cc64a85fb19043230b0"}, - {file = "docling_parse-1.0.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:f5da27cd03f1ba8859ebde525db388dd1d862be2712f38a13b6985f95061280c"}, - {file = "docling_parse-1.0.0-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:8aa6bdda40483af52591bdff11a578837eb4d6be51c12d44b4e489f520757ae6"}, - {file = "docling_parse-1.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a5c4b80a8d5e8f832910f32188501a9a6718a0223fb9921ee7cc5cfe62adb857"}, - {file = "docling_parse-1.0.0-cp312-cp312-macosx_13_6_arm64.whl", hash = "sha256:c86b263b4b089c3a71cde2a4fb8314614350dd76b3769b0950b371c2964e10d6"}, - {file = "docling_parse-1.0.0-cp312-cp312-macosx_13_6_x86_64.whl", hash = "sha256:93ef15628d663c036d48d466bf3de7c90a172cf52ba11883990640c758331720"}, - {file = "docling_parse-1.0.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:37218472773ed94b8ed07eeccfa68457f064227759350404fea5f45c311242a7"}, - {file = "docling_parse-1.0.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:9f863d9788c62dd34b2cdfd79480785e9a6bb382144b630ceb8b527aaee56351"}, - {file = "docling_parse-1.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0358eb13822ce2120362d6e7d63eb80a50d819b5bed5a2ccb7bd9beee4d83a61"}, - {file = "docling_parse-1.0.0-cp39-cp39-macosx_13_6_arm64.whl", hash = "sha256:5651185fbec4357b7638e1a39a0854a712a0cc74d6644518e64f066ce38ed976"}, - {file = "docling_parse-1.0.0-cp39-cp39-macosx_13_6_x86_64.whl", hash = "sha256:d5efedf361b4c58e372d355c0bb3fa5a20dcd3d002952ccbafb09580a924f426"}, - {file = "docling_parse-1.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d4a67df4699b4ffc2b01e77395ef35843ab23f40ac62bcdf593b6cc1f443eca6"}, + {file = "docling_parse-1.1.0-cp310-cp310-macosx_13_6_arm64.whl", hash = "sha256:e9f561581e942640544e8b5375f30998eb8285ffa8627f513badfa2700f6970e"}, + {file = "docling_parse-1.1.0-cp310-cp310-macosx_13_6_x86_64.whl", hash = "sha256:93a485652a158a1abed2418953427c5487007cdf4b2d43f7152906fda2589e1d"}, + {file = "docling_parse-1.1.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:2445d1eb99735280ca6875babc344ccc44034cc21df7fd2e1adb0847076312e7"}, + {file = "docling_parse-1.1.0-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:c9bf79723fb9f1dde621c6e208f103039786a39e9147087762632f6744a93279"}, + {file = "docling_parse-1.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3821fe5625d683a59c65299a45f2cb28a11b798943763b2e812e437dc87bbef"}, + {file = "docling_parse-1.1.0-cp311-cp311-macosx_13_6_arm64.whl", hash = "sha256:d717fa920fb9c9dd36580a3b0671236690e40aae48cc510c3868f6a07d45dbfc"}, + {file = "docling_parse-1.1.0-cp311-cp311-macosx_13_6_x86_64.whl", hash = "sha256:9f73193af9a350ed69d288b5d6fea8ca98adfe1330e01cc1b1068fa8a175d3ad"}, + {file = "docling_parse-1.1.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:6b9f45ad27fe46bec2ccc946a37233f1859f169538a19a2e5357a9413c87f2c1"}, + {file = "docling_parse-1.1.0-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:0fe0e1b6ccdce0cb33b2e1570224c1d77288f8e3ce40d25e1eb9b526106fe59a"}, + {file = "docling_parse-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f88e9b6ce9b1ce4e862cf8f5af50558b6ed978370a02b7df3d24bd285a1b93c0"}, + {file = "docling_parse-1.1.0-cp312-cp312-macosx_13_6_arm64.whl", hash = "sha256:d7c35babea1f75a2846d5bf673044a3698274748f7c4909f1b3246de49b59f36"}, + {file = "docling_parse-1.1.0-cp312-cp312-macosx_13_6_x86_64.whl", hash = "sha256:b3259ddc2b5e262de97fb6905385d01b5b303a253699ac6d20cccd8609fed9f3"}, + {file = "docling_parse-1.1.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:5e50c760dbbb85cb24b09ed60c9c7a6916b3b0c406d25515986cc05220791a27"}, + {file = "docling_parse-1.1.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:9a7738ddf7485f074195d6705913e4e7eda5869cff356bc40a035b93124a90e6"}, + {file = "docling_parse-1.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:740a6b00b87dc101946881ff0089d139eb9a6c9470586ba331dc768991732977"}, + {file = "docling_parse-1.1.0-cp39-cp39-macosx_13_6_arm64.whl", hash = "sha256:222cb5dcf49107361ec14ff796279d0740be183ff889e4e190a40e499ed56bc9"}, + {file = "docling_parse-1.1.0-cp39-cp39-macosx_13_6_x86_64.whl", hash = "sha256:7e42e78fa7c0d4660db3af20cacf6ec4fed46d3cd5b928bf1b0f90b6c196caff"}, + {file = "docling_parse-1.1.0-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:0f80c9341a09e31d8da4ac4b5efbbd28eab035ca012345efd51a17d8b3023d75"}, + {file = "docling_parse-1.1.0-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:f20f2e2123e9604fcf97f281cf907c9a72b21e9cef2b794fce8770d2698267bb"}, + {file = "docling_parse-1.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:84707a40ce13cd9fd86001234dcf35477617f77d8d8a03bbeb59efbf94048f38"}, ] [package.dependencies] -cibuildwheel = ">=2.20.0,<3.0.0" tabulate = ">=0.9.0,<1.0.0" [[package]] @@ -2694,8 +2645,8 @@ files = [ numpy = [ {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""}, {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""}, - {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, {version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""}, + {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, ] [[package]] @@ -2750,8 +2701,8 @@ files = [ [package.dependencies] numpy = [ {version = ">=1.22.4", markers = "python_version < \"3.11\""}, - {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, {version = ">=1.23.2", markers = "python_version == \"3.11\""}, + {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -5141,4 +5092,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "98d40c4d763018d5aa79b8c0ec00adac2fc06a036a9850b60f8ecce14db7cbcc" +content-hash = "6bebfa28aff51b294d642e38638d3fe7d08875b4bcb81096b0efe4a8611ea240" diff --git a/pyproject.toml b/pyproject.toml index 746f471c..684a9f71 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ pydantic-settings = "^2.3.0" huggingface_hub = ">=0.23,<1" requests = "^2.32.3" easyocr = "^1.7" -docling-parse = "^1.0.0" +docling-parse = "^1.1.0" certifi = ">=2024.7.4" rtree = "^1.3.0" scipy = "^1.14.1" From b07881324cc564acb29a02f60c1a7b14ce8703f4 Mon Sep 17 00:00:00 2001 From: Christoph Auer Date: Fri, 23 Aug 2024 11:26:52 +0200 Subject: [PATCH 2/2] Bump to docling-parse 1.1.1 Signed-off-by: Christoph Auer --- poetry.lock | 103 ++++++++++++++++++++++++++++++++++++------------- pyproject.toml | 2 +- 2 files changed, 78 insertions(+), 27 deletions(-) diff --git a/poetry.lock b/poetry.lock index 385832bf..16b16290 100644 --- a/poetry.lock +++ b/poetry.lock @@ -78,6 +78,17 @@ docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphi tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"] +[[package]] +name = "bashlex" +version = "0.18" +description = "Python parser for bash" +optional = false +python-versions = ">=2.7, !=3.0, !=3.1, !=3.2, !=3.3, !=3.4" +files = [ + {file = "bashlex-0.18-py2.py3-none-any.whl", hash = "sha256:91d73a23a3e51711919c1c899083890cdecffc91d8c088942725ac13e9dcfffa"}, + {file = "bashlex-0.18.tar.gz", hash = "sha256:5bb03a01c6d5676338c36fd1028009c8ad07e7d61d8a1ce3f513b7fff52796ee"}, +] + [[package]] name = "black" version = "24.8.0" @@ -126,6 +137,17 @@ d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"] jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] uvloop = ["uvloop (>=0.15.2)"] +[[package]] +name = "bracex" +version = "2.5" +description = "Bash style brace expander." +optional = false +python-versions = ">=3.8" +files = [ + {file = "bracex-2.5-py3-none-any.whl", hash = "sha256:d2fcf4b606a82ac325471affe1706dd9bbaa3536c91ef86a31f6b766f3dad1d0"}, + {file = "bracex-2.5.tar.gz", hash = "sha256:0725da5045e8d37ea9592ab3614d8b561e22c3c5fde3964699be672e072ab611"}, +] + [[package]] name = "build" version = "1.2.1" @@ -372,6 +394,34 @@ files = [ {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, ] +[[package]] +name = "cibuildwheel" +version = "2.20.0" +description = "Build Python wheels on CI with minimal configuration." +optional = false +python-versions = ">=3.8" +files = [ + {file = "cibuildwheel-2.20.0-py3-none-any.whl", hash = "sha256:d90719cc386af540b52f3cd8c733972c1fe222bbb2a941e5f5cd87215a0c82a3"}, + {file = "cibuildwheel-2.20.0.tar.gz", hash = "sha256:5c3fd67e4417fe37021b595bedcaf0c87e5800ecf9d6096229967858a20cc6c8"}, +] + +[package.dependencies] +bashlex = "!=0.13" +bracex = "*" +certifi = "*" +filelock = "*" +packaging = ">=20.9" +platformdirs = "*" +tomli = {version = "*", markers = "python_version < \"3.11\""} +typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.11\""} + +[package.extras] +bin = ["click", "packaging (>=21.0)", "pip-tools", "pygithub", "pyyaml", "requests", "rich (>=9.6)"] +dev = ["build", "click", "jinja2", "packaging (>=21.0)", "pip-tools", "pygithub", "pytest (>=6)", "pytest-timeout", "pytest-xdist", "pyyaml", "requests", "rich (>=9.6)", "setuptools", "tomli-w", "validate-pyproject"] +docs = ["jinja2 (>=3.1.2)", "mkdocs (==1.3.1)", "mkdocs-include-markdown-plugin (==2.8.0)", "mkdocs-macros-plugin", "pymdown-extensions"] +test = ["build", "jinja2", "pytest (>=6)", "pytest-timeout", "pytest-xdist", "setuptools", "tomli-w", "validate-pyproject"] +uv = ["uv"] + [[package]] name = "cleo" version = "2.1.0" @@ -772,34 +822,35 @@ tqdm = ">=4.64.0,<5.0.0" [[package]] name = "docling-parse" -version = "1.1.0" +version = "1.1.1" description = "Simple package to extract text with coordinates from programmatic PDFs" optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "docling_parse-1.1.0-cp310-cp310-macosx_13_6_arm64.whl", hash = "sha256:e9f561581e942640544e8b5375f30998eb8285ffa8627f513badfa2700f6970e"}, - {file = "docling_parse-1.1.0-cp310-cp310-macosx_13_6_x86_64.whl", hash = "sha256:93a485652a158a1abed2418953427c5487007cdf4b2d43f7152906fda2589e1d"}, - {file = "docling_parse-1.1.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:2445d1eb99735280ca6875babc344ccc44034cc21df7fd2e1adb0847076312e7"}, - {file = "docling_parse-1.1.0-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:c9bf79723fb9f1dde621c6e208f103039786a39e9147087762632f6744a93279"}, - {file = "docling_parse-1.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3821fe5625d683a59c65299a45f2cb28a11b798943763b2e812e437dc87bbef"}, - {file = "docling_parse-1.1.0-cp311-cp311-macosx_13_6_arm64.whl", hash = "sha256:d717fa920fb9c9dd36580a3b0671236690e40aae48cc510c3868f6a07d45dbfc"}, - {file = "docling_parse-1.1.0-cp311-cp311-macosx_13_6_x86_64.whl", hash = "sha256:9f73193af9a350ed69d288b5d6fea8ca98adfe1330e01cc1b1068fa8a175d3ad"}, - {file = "docling_parse-1.1.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:6b9f45ad27fe46bec2ccc946a37233f1859f169538a19a2e5357a9413c87f2c1"}, - {file = "docling_parse-1.1.0-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:0fe0e1b6ccdce0cb33b2e1570224c1d77288f8e3ce40d25e1eb9b526106fe59a"}, - {file = "docling_parse-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f88e9b6ce9b1ce4e862cf8f5af50558b6ed978370a02b7df3d24bd285a1b93c0"}, - {file = "docling_parse-1.1.0-cp312-cp312-macosx_13_6_arm64.whl", hash = "sha256:d7c35babea1f75a2846d5bf673044a3698274748f7c4909f1b3246de49b59f36"}, - {file = "docling_parse-1.1.0-cp312-cp312-macosx_13_6_x86_64.whl", hash = "sha256:b3259ddc2b5e262de97fb6905385d01b5b303a253699ac6d20cccd8609fed9f3"}, - {file = "docling_parse-1.1.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:5e50c760dbbb85cb24b09ed60c9c7a6916b3b0c406d25515986cc05220791a27"}, - {file = "docling_parse-1.1.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:9a7738ddf7485f074195d6705913e4e7eda5869cff356bc40a035b93124a90e6"}, - {file = "docling_parse-1.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:740a6b00b87dc101946881ff0089d139eb9a6c9470586ba331dc768991732977"}, - {file = "docling_parse-1.1.0-cp39-cp39-macosx_13_6_arm64.whl", hash = "sha256:222cb5dcf49107361ec14ff796279d0740be183ff889e4e190a40e499ed56bc9"}, - {file = "docling_parse-1.1.0-cp39-cp39-macosx_13_6_x86_64.whl", hash = "sha256:7e42e78fa7c0d4660db3af20cacf6ec4fed46d3cd5b928bf1b0f90b6c196caff"}, - {file = "docling_parse-1.1.0-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:0f80c9341a09e31d8da4ac4b5efbbd28eab035ca012345efd51a17d8b3023d75"}, - {file = "docling_parse-1.1.0-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:f20f2e2123e9604fcf97f281cf907c9a72b21e9cef2b794fce8770d2698267bb"}, - {file = "docling_parse-1.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:84707a40ce13cd9fd86001234dcf35477617f77d8d8a03bbeb59efbf94048f38"}, + {file = "docling_parse-1.1.1-cp310-cp310-macosx_13_6_arm64.whl", hash = "sha256:a692eb79f173cec449eb66f618a1bc3dd66d13c8948d9a975cfba533b4ac5ff5"}, + {file = "docling_parse-1.1.1-cp310-cp310-macosx_13_6_x86_64.whl", hash = "sha256:a369c91b04852ff21fca27834f2f7db8fa024fd037f6089dd46943e3ca2d2a61"}, + {file = "docling_parse-1.1.1-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:b57b64ea2f33cc51f26f520cb69246c3a9bd06ac8b199f3decf02f8cd875446a"}, + {file = "docling_parse-1.1.1-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:a07ffcd3341f9609dcbb942e3e60fa7eab8fb3cb15507efae73a939a31ca8ed9"}, + {file = "docling_parse-1.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3fbf402666b429a290d0a1054f713aa8ebc390b29682c471acf98e0da996164f"}, + {file = "docling_parse-1.1.1-cp311-cp311-macosx_13_6_arm64.whl", hash = "sha256:82d5719df763bca8d13acc7c5dc006fc05140f50b80ab063307e846c9272fc5c"}, + {file = "docling_parse-1.1.1-cp311-cp311-macosx_13_6_x86_64.whl", hash = "sha256:537cdec2abb6e24124da5cfbbf67e3a56c3d61f32bffd0f8f0323107addbb343"}, + {file = "docling_parse-1.1.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:4e0f7965b5389f3c657841d1e04680899a9caf431c13e020b8c4c1bac637bc6c"}, + {file = "docling_parse-1.1.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:e37a36aa1f66d44d4a47d6412a19f1ffd5f44d6d7f18b7638e3e6125d83b453a"}, + {file = "docling_parse-1.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba139bfafce7dd281d0d0551415e915bbba4ed64f0827b752f99a0e717a13cd1"}, + {file = "docling_parse-1.1.1-cp312-cp312-macosx_13_6_arm64.whl", hash = "sha256:0d62ffc592017826d1bff6dad0c97d05129c118b0b37d724c643fed2f5c77798"}, + {file = "docling_parse-1.1.1-cp312-cp312-macosx_13_6_x86_64.whl", hash = "sha256:d2be36904005ccf5c4d44370ecd449f4e2d4df73c98c7dc88165b11028a8b6d8"}, + {file = "docling_parse-1.1.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:f8caf7d08ac96929eb59009ad397c4143ef21024829a91a19d07571f0d70d2bf"}, + {file = "docling_parse-1.1.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:a96286beabe65df64bc01285ecc893fae1513f6dda39898484da0fa7fb019123"}, + {file = "docling_parse-1.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:74fcbccbed154a3e3e76471273cd62daf99f736c965d05a7fa5b9f4b1b446c5f"}, + {file = "docling_parse-1.1.1-cp39-cp39-macosx_13_6_arm64.whl", hash = "sha256:133af429a329dad2c309ef3ed7538474c89c3a81e36adc720eeb62de7fff5a07"}, + {file = "docling_parse-1.1.1-cp39-cp39-macosx_13_6_x86_64.whl", hash = "sha256:181e7537e6118706697ffa120670b10d312ace2ae35d308d10264b4e722758a2"}, + {file = "docling_parse-1.1.1-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:a26745edf9d8651b4a625ebf667422292420ce31d7ba1c26bd78c8b4ea15cb53"}, + {file = "docling_parse-1.1.1-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:be93d954a29d38daa9c0485ef5c0b383c1f64d4dd4a6cdf22cd9d5fd782ccc9e"}, + {file = "docling_parse-1.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64ef45fc42e1c6a4a1e03c394e25ab7ed13191ba5b4994922efee02c79c51c19"}, ] [package.dependencies] +cibuildwheel = ">=2.20.0,<3.0.0" tabulate = ">=0.9.0,<1.0.0" [[package]] @@ -3055,17 +3106,17 @@ tests = ["pytest"] [[package]] name = "pybind11" -version = "2.13.4" +version = "2.13.5" description = "Seamless operability between C++11 and Python" optional = false python-versions = ">=3.7" files = [ - {file = "pybind11-2.13.4-py3-none-any.whl", hash = "sha256:5932d63d570b3a12ece2f6678adb3846cc1c229dc1f8518a46d5b540f240f959"}, - {file = "pybind11-2.13.4.tar.gz", hash = "sha256:75a9e1f967d3cd3fd59f981eb39406f9de05e33a4dd8f5f18b8e29cae023e1d5"}, + {file = "pybind11-2.13.5-py3-none-any.whl", hash = "sha256:dc35a98b61a0d23ee8599b317664f5be7e259fdc369a3b810b1ebbc3f5674d27"}, + {file = "pybind11-2.13.5.tar.gz", hash = "sha256:ae33f635322f9d9741abde0c5f348bf9373f6c22298883395e586cb43c55574e"}, ] [package.extras] -global = ["pybind11-global (==2.13.4)"] +global = ["pybind11-global (==2.13.5)"] [[package]] name = "pyclipper" @@ -5092,4 +5143,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "6bebfa28aff51b294d642e38638d3fe7d08875b4bcb81096b0efe4a8611ea240" +content-hash = "e0f8f29e02dcc980287efc0b946df1df4d149bfe498cc16abda897842b45b019" diff --git a/pyproject.toml b/pyproject.toml index 684a9f71..e24645a7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ pydantic-settings = "^2.3.0" huggingface_hub = ">=0.23,<1" requests = "^2.32.3" easyocr = "^1.7" -docling-parse = "^1.1.0" +docling-parse = "^1.1.1" certifi = ">=2024.7.4" rtree = "^1.3.0" scipy = "^1.14.1"