Update GT test files for pages

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer 2024-09-25 15:54:55 +02:00
parent 48d8b7bf70
commit ad2bd714d4
10 changed files with 24 additions and 46 deletions

View File

@ -62,7 +62,7 @@ _EMPTY_DOC = DsDocument(
) )
_EMPTY_DOCLING_DOC = DoclingDocument( _EMPTY_DOCLING_DOC = DoclingDocument(
description={}, file_info=FileInfo(document_hash="123xyz") description={}, file_info=FileInfo(filename="dummy", document_hash="123xyz")
) # TODO: Stub ) # TODO: Stub

View File

@ -4,6 +4,7 @@ from typing import Iterable, List
import numpy import numpy
from docling_core.types.experimental.base import BoundingBox from docling_core.types.experimental.base import BoundingBox
from docling_core.types.experimental.document import TableCell from docling_core.types.experimental.document import TableCell
from docling_core.types.experimental.labels import DocItemLabel
from docling_ibm_models.tableformer.data_management.tf_predictor import TFPredictor from docling_ibm_models.tableformer.data_management.tf_predictor import TFPredictor
from PIL import ImageDraw from PIL import ImageDraw
@ -74,7 +75,7 @@ class TableStructureModel:
], ],
) )
for cluster in page.predictions.layout.clusters for cluster in page.predictions.layout.clusters
if cluster.label == "Table" if cluster.label == DocItemLabel.TABLE
] ]
if not len(in_tables): if not len(in_tables):
yield page yield page
@ -138,7 +139,7 @@ class TableStructureModel:
id=table_cluster.id, id=table_cluster.id,
page_no=page.page_no, page_no=page.page_no,
cluster=table_cluster, cluster=table_cluster,
label="Table", label=DocItemLabel.TABLE,
) )
page.predictions.tablestructure.table_map[table_cluster.id] = tbl page.predictions.tablestructure.table_map[table_cluster.id] = tbl

47
poetry.lock generated
View File

@ -862,7 +862,7 @@ files = []
develop = false develop = false
[package.dependencies] [package.dependencies]
docling-core = {git = "ssh://git@github.com/DS4SD/docling-core.git", rev = "b50d53c05bf755ddb73c7d33ececdb542877662a"} docling-core = {git = "ssh://git@github.com/DS4SD/docling-core.git", rev = "ce0b7ee64750944e530d03a1cf22a75636fa2775"}
docutils = "!=0.21" docutils = "!=0.21"
matplotlib = "^3.7.1" matplotlib = "^3.7.1"
networkx = "^3.1" networkx = "^3.1"
@ -881,8 +881,8 @@ toolkit = ["deepsearch-toolkit (>=0.31.0)"]
[package.source] [package.source]
type = "git" type = "git"
url = "ssh://git@github.com/DS4SD/deepsearch-glm.git" url = "ssh://git@github.com/DS4SD/deepsearch-glm.git"
reference = "f45ad08c02a4bc69f611fef540a8f10df53870b6" reference = "abab9eeb11dbb400c9f1c1c962baf350bbe6496f"
resolved_reference = "f45ad08c02a4bc69f611fef540a8f10df53870b6" resolved_reference = "abab9eeb11dbb400c9f1c1c962baf350bbe6496f"
[[package]] [[package]]
name = "deprecated" name = "deprecated"
@ -953,14 +953,13 @@ jsonref = "^1.1.0"
jsonschema = "^4.16.0" jsonschema = "^4.16.0"
pandas = "^2.2.2" pandas = "^2.2.2"
pydantic = "^2.6.0" pydantic = "^2.6.0"
pydantic-extra-types = "^2.9.0"
tabulate = "^0.9.0" tabulate = "^0.9.0"
[package.source] [package.source]
type = "git" type = "git"
url = "ssh://git@github.com/DS4SD/docling-core.git" url = "ssh://git@github.com/DS4SD/docling-core.git"
reference = "b50d53c05bf755ddb73c7d33ececdb542877662a" reference = "ce0b7ee64750944e530d03a1cf22a75636fa2775"
resolved_reference = "b50d53c05bf755ddb73c7d33ececdb542877662a" resolved_reference = "ce0b7ee64750944e530d03a1cf22a75636fa2775"
[[package]] [[package]]
name = "docling-ibm-models" name = "docling-ibm-models"
@ -4019,13 +4018,13 @@ virtualenv = ">=20.10.0"
[[package]] [[package]]
name = "prompt-toolkit" name = "prompt-toolkit"
version = "3.0.47" version = "3.0.48"
description = "Library for building powerful interactive command lines in Python" description = "Library for building powerful interactive command lines in Python"
optional = false optional = false
python-versions = ">=3.7.0" python-versions = ">=3.7.0"
files = [ files = [
{file = "prompt_toolkit-3.0.47-py3-none-any.whl", hash = "sha256:0d7bfa67001d5e39d02c224b663abc33687405033a8c422d0d675a5a13361d10"}, {file = "prompt_toolkit-3.0.48-py3-none-any.whl", hash = "sha256:f49a827f90062e411f1ce1f854f2aedb3c23353244f8108b89283587397ac10e"},
{file = "prompt_toolkit-3.0.47.tar.gz", hash = "sha256:1e1b29cb58080b1e69f207c893a1a7bf16d127a5c30c9d17a25a5d77792e5360"}, {file = "prompt_toolkit-3.0.48.tar.gz", hash = "sha256:d6623ab0477a80df74e646bdbc93621143f5caf104206aa29294d53de1a03d90"},
] ]
[package.dependencies] [package.dependencies]
@ -4352,28 +4351,6 @@ files = [
[package.dependencies] [package.dependencies]
typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0"
[[package]]
name = "pydantic-extra-types"
version = "2.9.0"
description = "Extra Pydantic types."
optional = false
python-versions = ">=3.8"
files = [
{file = "pydantic_extra_types-2.9.0-py3-none-any.whl", hash = "sha256:f0bb975508572ba7bf3390b7337807588463b7248587e69f43b1ad7c797530d0"},
{file = "pydantic_extra_types-2.9.0.tar.gz", hash = "sha256:e061c01636188743bb69f368dcd391f327b8cfbfede2fe1cbb1211b06601ba3b"},
]
[package.dependencies]
pydantic = ">=2.5.2"
[package.extras]
all = ["pendulum (>=3.0.0,<4.0.0)", "phonenumbers (>=8,<9)", "pycountry (>=23)", "python-ulid (>=1,<2)", "python-ulid (>=1,<3)", "pytz (>=2024.1)", "semver (>=3.0.2)", "tzdata (>=2024.1)"]
pendulum = ["pendulum (>=3.0.0,<4.0.0)"]
phonenumbers = ["phonenumbers (>=8,<9)"]
pycountry = ["pycountry (>=23)"]
python-ulid = ["python-ulid (>=1,<2)", "python-ulid (>=1,<3)"]
semver = ["semver (>=3.0.2)"]
[[package]] [[package]]
name = "pydantic-settings" name = "pydantic-settings"
version = "2.5.2" version = "2.5.2"
@ -4449,13 +4426,13 @@ testutils = ["gitpython (>3)"]
[[package]] [[package]]
name = "pymilvus" name = "pymilvus"
version = "2.4.6" version = "2.4.7"
description = "Python Sdk for Milvus" description = "Python Sdk for Milvus"
optional = true optional = true
python-versions = ">=3.8" python-versions = ">=3.8"
files = [ files = [
{file = "pymilvus-2.4.6-py3-none-any.whl", hash = "sha256:b4c43472edc313b845d313be50610e19054e6954b2c5c3b515565c596c2d3d97"}, {file = "pymilvus-2.4.7-py3-none-any.whl", hash = "sha256:1e5d377bd40fa7eb459d3958dbd96201758f5cf997d41eb3d2d169d0b7fa462e"},
{file = "pymilvus-2.4.6.tar.gz", hash = "sha256:6ac3eb91c92cc01bbe444fe83f895f02d7b2546d96ac67998630bf31ac074d66"}, {file = "pymilvus-2.4.7.tar.gz", hash = "sha256:9ef460b940782a42e1b7b8ae0da03d8cc02d9d80044d13f4b689a7c935ec7aa7"},
] ]
[package.dependencies] [package.dependencies]
@ -7285,4 +7262,4 @@ examples = ["langchain-huggingface", "langchain-milvus", "langchain-text-splitte
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = "^3.10" python-versions = "^3.10"
content-hash = "3760931d493042f67b037079400af3750ddeece0f0f5c2f9f3c93579f51c5d5c" content-hash = "4e1fc1b9ebcaba913340146a9ecd58da9c3a10c0843f5118dd66a061bb650e7e"

View File

@ -23,9 +23,9 @@ packages = [{include = "docling"}]
[tool.poetry.dependencies] [tool.poetry.dependencies]
python = "^3.10" python = "^3.10"
pydantic = "^2.0.0" pydantic = "^2.0.0"
docling-core = {git = "ssh://git@github.com/DS4SD/docling-core.git", rev = "b50d53c05bf755ddb73c7d33ececdb542877662a"} docling-core = {git = "ssh://git@github.com/DS4SD/docling-core.git", rev = "ce0b7ee64750944e530d03a1cf22a75636fa2775"}
docling-ibm-models = "^1.2.0" docling-ibm-models = "^1.2.0"
deepsearch-glm = {git = "ssh://git@github.com/DS4SD/deepsearch-glm.git", rev = "f45ad08c02a4bc69f611fef540a8f10df53870b6"} deepsearch-glm = {git = "ssh://git@github.com/DS4SD/deepsearch-glm.git", rev = "abab9eeb11dbb400c9f1c1c962baf350bbe6496f"}
filetype = "^1.2.0" filetype = "^1.2.0"
pypdfium2 = "^4.30.0" pypdfium2 = "^4.30.0"

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long