mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 04:24:45 +00:00
pin models, core and adapt example
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
parent
df3ff47914
commit
dd2982cce1
@ -3,7 +3,7 @@ from pathlib import Path
|
||||
from typing import Any, Iterable
|
||||
|
||||
from docling_core.types.doc import DoclingDocument, NodeItem
|
||||
from docling_core.types.doc.document import PictureClassificationData, PictureItem
|
||||
from docling_core.types.doc.document import PictureClassificationData, PictureItem, PictureClassificationClass
|
||||
|
||||
from docling.datamodel.base_models import InputFormat
|
||||
from docling.datamodel.pipeline_options import PdfPipelineOptions
|
||||
@ -38,8 +38,7 @@ class ExamplePictureClassifierEnrichmentModel(BaseEnrichmentModel):
|
||||
|
||||
element.data.classification = PictureClassificationData(
|
||||
provenance="example_classifier-0.0.1",
|
||||
predicted_class="dummy",
|
||||
confidence=0.42,
|
||||
predicted_classes=[PictureClassificationClass(class_name="dummy", confidence=0.42)]
|
||||
)
|
||||
|
||||
yield element
|
||||
|
143
poetry.lock
generated
143
poetry.lock
generated
@ -798,25 +798,6 @@ files = [
|
||||
docs = ["ipython", "matplotlib", "numpydoc", "sphinx"]
|
||||
tests = ["pytest", "pytest-cov", "pytest-xdist"]
|
||||
|
||||
[[package]]
|
||||
name = "dataclasses-json"
|
||||
version = "0.5.9"
|
||||
description = "Easily serialize dataclasses to and from JSON"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
files = [
|
||||
{file = "dataclasses-json-0.5.9.tar.gz", hash = "sha256:e9ac87b73edc0141aafbce02b44e93553c3123ad574958f0fe52a534b6707e8e"},
|
||||
{file = "dataclasses_json-0.5.9-py3-none-any.whl", hash = "sha256:1280542631df1c375b7bc92e5b86d39e06c44760d7e3571a537b3b8acabf2f0c"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
marshmallow = ">=3.3.0,<4.0.0"
|
||||
marshmallow-enum = ">=1.5.1,<2.0.0"
|
||||
typing-inspect = ">=0.4.0"
|
||||
|
||||
[package.extras]
|
||||
dev = ["flake8", "hypothesis", "ipython", "mypy (>=0.710)", "portray", "pytest (>=7.2.0)", "setuptools", "simplejson", "twine", "types-dataclasses", "wheel"]
|
||||
|
||||
[[package]]
|
||||
name = "datasets"
|
||||
version = "2.21.0"
|
||||
@ -917,7 +898,7 @@ files = []
|
||||
develop = false
|
||||
|
||||
[package.dependencies]
|
||||
docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "7c104d61aa5d003dd8d9711c37e23ce04799f4c9"}
|
||||
docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "33aa21408400c9c475db0f8c6be681b888388284"}
|
||||
docutils = "!=0.21"
|
||||
matplotlib = "^3.7.1"
|
||||
networkx = "^3.1"
|
||||
@ -941,8 +922,8 @@ toolkit = ["deepsearch-toolkit (>=0.31.0)"]
|
||||
[package.source]
|
||||
type = "git"
|
||||
url = "https://github.com/DS4SD/deepsearch-glm.git"
|
||||
reference = "c185c4f985ccd29a470a1cddd3bec43880b739ee"
|
||||
resolved_reference = "c185c4f985ccd29a470a1cddd3bec43880b739ee"
|
||||
reference = "8ab1b4372122c820a28badd3c6095c2ce2feaf61"
|
||||
resolved_reference = "8ab1b4372122c820a28badd3c6095c2ce2feaf61"
|
||||
|
||||
[[package]]
|
||||
name = "defusedxml"
|
||||
@ -991,7 +972,6 @@ files = []
|
||||
develop = false
|
||||
|
||||
[package.dependencies]
|
||||
json-schema-for-humans = "^1.0.0"
|
||||
jsonref = "^1.1.0"
|
||||
jsonschema = "^4.16.0"
|
||||
pandas = "^2.1.4"
|
||||
@ -1002,29 +982,31 @@ tabulate = "^0.9.0"
|
||||
[package.source]
|
||||
type = "git"
|
||||
url = "https://github.com/DS4SD/docling-core.git"
|
||||
reference = "7c104d61aa5d003dd8d9711c37e23ce04799f4c9"
|
||||
resolved_reference = "7c104d61aa5d003dd8d9711c37e23ce04799f4c9"
|
||||
reference = "33aa21408400c9c475db0f8c6be681b888388284"
|
||||
resolved_reference = "33aa21408400c9c475db0f8c6be681b888388284"
|
||||
|
||||
[[package]]
|
||||
name = "docling-ibm-models"
|
||||
version = "2.0.0"
|
||||
version = "2.0.1"
|
||||
description = "This package contains the AI models used by the Docling PDF conversion package"
|
||||
optional = false
|
||||
python-versions = "^3.10"
|
||||
files = []
|
||||
develop = false
|
||||
python-versions = "<4.0,>=3.10"
|
||||
files = [
|
||||
{file = "docling_ibm_models-2.0.1-py3-none-any.whl", hash = "sha256:f81c6002b7e102aa79afb8287fce48872f27d1cffb088ea4d1fbebe490364a1d"},
|
||||
{file = "docling_ibm_models-2.0.1.tar.gz", hash = "sha256:4fb0300022cfa0d0ac1fcbcb296c144e71ee9816654407f8a4d3a7b934f3065f"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
huggingface_hub = ">=0.23,<1"
|
||||
jsonlines = "^3.1.0"
|
||||
lxml = "^4.9.1"
|
||||
mean_average_precision = "^2021.4.26.0"
|
||||
jsonlines = ">=3.1.0,<4.0.0"
|
||||
lxml = ">=4.9.1,<5.0.0"
|
||||
mean_average_precision = ">=2021.4.26.0,<2022.0.0.0"
|
||||
numpy = [
|
||||
{version = ">=2.1.0,<3.0.0", markers = "python_version >= \"3.13\""},
|
||||
{version = ">=1.24.4,<2.0.0", markers = "python_version < \"3.13\""},
|
||||
]
|
||||
opencv-python-headless = "^4.6.0.66"
|
||||
Pillow = "^10.0.0"
|
||||
opencv-python-headless = ">=4.6.0.66,<5.0.0.0"
|
||||
Pillow = ">=10.0.0,<11.0.0"
|
||||
torch = [
|
||||
{version = ">=2.2.2,<3.0.0", markers = "sys_platform != \"darwin\" or platform_machine != \"x86_64\""},
|
||||
{version = ">=2.2.2,<2.3.0", markers = "sys_platform == \"darwin\" and platform_machine == \"x86_64\""},
|
||||
@ -1033,13 +1015,7 @@ torchvision = [
|
||||
{version = ">=0,<1", markers = "sys_platform != \"darwin\" or platform_machine != \"x86_64\""},
|
||||
{version = ">=0.17.2,<0.18.0", markers = "sys_platform == \"darwin\" and platform_machine == \"x86_64\""},
|
||||
]
|
||||
tqdm = "^4.64.0"
|
||||
|
||||
[package.source]
|
||||
type = "git"
|
||||
url = "https://github.com/DS4SD/docling-ibm-models.git"
|
||||
reference = "1d2e2a2e6eb152c237f1383cdba20cf85db80b97"
|
||||
resolved_reference = "1d2e2a2e6eb152c237f1383cdba20cf85db80b97"
|
||||
tqdm = ">=4.64.0,<5.0.0"
|
||||
|
||||
[[package]]
|
||||
name = "docling-parse"
|
||||
@ -1593,16 +1569,6 @@ files = [
|
||||
{file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "htmlmin"
|
||||
version = "0.1.12"
|
||||
description = "An HTML Minifier"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "htmlmin-0.1.12.tar.gz", hash = "sha256:50c1ef4630374a5d723900096a961cff426dff46b48f34d194a81bbe14eca178"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "httpcore"
|
||||
version = "1.0.6"
|
||||
@ -2015,29 +1981,6 @@ files = [
|
||||
{file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "json-schema-for-humans"
|
||||
version = "1.0.2"
|
||||
description = "Generate static HTML documentation from JSON schemas"
|
||||
optional = false
|
||||
python-versions = "<4.0,>=3.8"
|
||||
files = [
|
||||
{file = "json_schema_for_humans-1.0.2-py3-none-any.whl", hash = "sha256:d6ecb023b4f802b10b01abca1295a37e363d9f060e54c21aa2cddea44731c6e1"},
|
||||
{file = "json_schema_for_humans-1.0.2.tar.gz", hash = "sha256:8bd807a2bac31650226e451ad3b9583c27ce916375d6938ac9d0251eb6549ad5"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
click = ">=8.0.1,<9.0.0"
|
||||
dataclasses-json = ">=0.5.6,<0.6.0"
|
||||
htmlmin = ">=0.1.12,<0.2.0"
|
||||
Jinja2 = ">3"
|
||||
markdown2 = ">=2.4.1,<3.0.0"
|
||||
MarkupSafe = ">=2.0,<3.0"
|
||||
Pygments = ">=2.10.0,<3.0.0"
|
||||
pytz = "*"
|
||||
PyYAML = ">=5.4.1,<7"
|
||||
requests = ">=2.31.0,<3.0.0"
|
||||
|
||||
[[package]]
|
||||
name = "jsonlines"
|
||||
version = "3.1.0"
|
||||
@ -2671,23 +2614,6 @@ profiling = ["gprof2dot"]
|
||||
rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"]
|
||||
testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"]
|
||||
|
||||
[[package]]
|
||||
name = "markdown2"
|
||||
version = "2.5.1"
|
||||
description = "A fast and complete Python implementation of Markdown"
|
||||
optional = false
|
||||
python-versions = "<4,>=3.8"
|
||||
files = [
|
||||
{file = "markdown2-2.5.1-py2.py3-none-any.whl", hash = "sha256:190ae60a4bd0425c60c863bede18a9f3d45b1cbf3fbc9f40b4fac336ff2c520b"},
|
||||
{file = "markdown2-2.5.1.tar.gz", hash = "sha256:12fc04ea5a87f7bb4b65acf5bf3af1183b20838cc7d543b74c92ec7eea4bbc74"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
all = ["latex2mathml", "pygments (>=2.7.3)", "wavedrom"]
|
||||
code-syntax-highlighting = ["pygments (>=2.7.3)"]
|
||||
latex = ["latex2mathml"]
|
||||
wavedrom = ["wavedrom"]
|
||||
|
||||
[[package]]
|
||||
name = "markupsafe"
|
||||
version = "2.1.5"
|
||||
@ -2776,20 +2702,6 @@ dev = ["marshmallow[tests]", "pre-commit (>=3.5,<4.0)", "tox"]
|
||||
docs = ["alabaster (==1.0.0)", "autodocsumm (==0.2.13)", "sphinx (==8.0.2)", "sphinx-issues (==4.1.0)", "sphinx-version-warning (==1.1.2)"]
|
||||
tests = ["pytest", "pytz", "simplejson"]
|
||||
|
||||
[[package]]
|
||||
name = "marshmallow-enum"
|
||||
version = "1.5.1"
|
||||
description = "Enum field for Marshmallow"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "marshmallow-enum-1.5.1.tar.gz", hash = "sha256:38e697e11f45a8e64b4a1e664000897c659b60aa57bfa18d44e226a9920b6e58"},
|
||||
{file = "marshmallow_enum-1.5.1-py2.py3-none-any.whl", hash = "sha256:57161ab3dbfde4f57adeb12090f39592e992b9c86d206d02f6bd03ebec60f072"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
marshmallow = ">=2.0.0"
|
||||
|
||||
[[package]]
|
||||
name = "matplotlib"
|
||||
version = "3.9.2"
|
||||
@ -3797,9 +3709,9 @@ files = [
|
||||
[package.dependencies]
|
||||
numpy = [
|
||||
{version = ">=1.26.0", markers = "python_version >= \"3.12\""},
|
||||
{version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""},
|
||||
{version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""},
|
||||
{version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""},
|
||||
{version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -3948,8 +3860,8 @@ files = [
|
||||
[package.dependencies]
|
||||
numpy = [
|
||||
{version = ">=1.26.0", markers = "python_version >= \"3.12\""},
|
||||
{version = ">=1.23.2", markers = "python_version == \"3.11\""},
|
||||
{version = ">=1.22.4", markers = "python_version < \"3.11\""},
|
||||
{version = ">=1.23.2", markers = "python_version == \"3.11\""},
|
||||
]
|
||||
python-dateutil = ">=2.8.2"
|
||||
pytz = ">=2020.1"
|
||||
@ -7001,21 +6913,6 @@ files = [
|
||||
{file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "typing-inspect"
|
||||
version = "0.9.0"
|
||||
description = "Runtime inspection utilities for typing module."
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f"},
|
||||
{file = "typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
mypy-extensions = ">=0.3.0"
|
||||
typing-extensions = ">=3.7.4"
|
||||
|
||||
[[package]]
|
||||
name = "tzdata"
|
||||
version = "2024.2"
|
||||
@ -7599,4 +7496,4 @@ tesserocr = ["tesserocr"]
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.10"
|
||||
content-hash = "8a545ce70eb2001e47c79c102a494aa42d8f5efee5dfbf3dfd0acfb3fb0f8ec9"
|
||||
content-hash = "70620592368cfa1a6a8a7e32e1f98f5f9f253f0d99f7a8bdfb6c46a0363b2408"
|
||||
|
@ -37,9 +37,9 @@ torchvision = [
|
||||
######################
|
||||
python = "^3.10"
|
||||
pydantic = "^2.0.0"
|
||||
docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "7c104d61aa5d003dd8d9711c37e23ce04799f4c9"}
|
||||
docling-ibm-models = {git = "https://github.com/DS4SD/docling-ibm-models.git", rev = "1d2e2a2e6eb152c237f1383cdba20cf85db80b97"}
|
||||
deepsearch-glm = {git = "https://github.com/DS4SD/deepsearch-glm.git", rev = "c185c4f985ccd29a470a1cddd3bec43880b739ee"}
|
||||
docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "33aa21408400c9c475db0f8c6be681b888388284"}
|
||||
docling-ibm-models = "^2.0.1"
|
||||
deepsearch-glm = {git = "https://github.com/DS4SD/deepsearch-glm.git", rev = "8ab1b4372122c820a28badd3c6095c2ce2feaf61"}
|
||||
filetype = "^1.2.0"
|
||||
pypdfium2 = "^4.30.0"
|
||||
pydantic-settings = "^2.3.0"
|
||||
|
Loading…
Reference in New Issue
Block a user