From 55908d6bb44abc18dce26e60591230b4826d86f7 Mon Sep 17 00:00:00 2001 From: Panos Vagenas Date: Wed, 12 Nov 2025 16:35:49 +0100 Subject: [PATCH] chore: pretest docling-core 2.51.0 Signed-off-by: Panos Vagenas --- pyproject.toml | 3 +- .../groundtruth/docling_v2/2203.01017v2.json | 2 +- .../groundtruth/docling_v2/2206.01062.json | 2 +- .../docling_v2/2305.03393v1-pg9.json | 2 +- .../groundtruth/docling_v2/2305.03393v1.json | 2 +- .../docling_v2/amt_handbook_sample.json | 2 +- .../docling_v2/code_and_formula.json | 2 +- .../docling_v2/csv-comma-in-cell.csv.json | 2 +- .../groundtruth/docling_v2/csv-comma.csv.json | 2 +- .../csv-inconsistent-header.csv.json | 2 +- .../groundtruth/docling_v2/csv-pipe.csv.json | 2 +- .../docling_v2/csv-semicolon.csv.json | 2 +- .../groundtruth/docling_v2/csv-tab.csv.json | 2 +- .../docling_v2/csv-too-few-columns.csv.json | 2 +- .../docling_v2/csv-too-many-columns.csv.json | 2 +- .../docling_v2/docx_rich_cells.docx.json | 2 +- .../docling_v2/elife-56337.nxml.json | 2 +- .../docling_v2/equations.docx.json | 2 +- .../docling_v2/escaped_characters.md.json | 2 +- .../docling_v2/example_01.html.json | 4 +- .../docling_v2/example_01_images.html.json | 2 +- .../docling_v2/example_02.html.json | 2 +- .../docling_v2/example_03.html.json | 2 +- .../docling_v2/example_04.html.json | 2 +- .../docling_v2/example_05.html.json | 2 +- .../docling_v2/example_06.html.json | 2 +- .../docling_v2/example_07.html.json | 2 +- .../docling_v2/example_08.html.json | 2 +- .../docling_v2/formatting.html.json | 2 +- .../docling_v2/html_code_snippets.html.json | 2 +- .../html_rich_table_cells.html.json | 2 +- .../docling_v2/hyperlink_01.html.json | 2 +- .../docling_v2/hyperlink_02.html.json | 2 +- .../docling_v2/hyperlink_03.html.json | 2 +- .../docling_v2/hyperlink_04.html.json | 2 +- .../docling_v2/hyperlink_05.html.json | 2 +- .../docling_v2/inline_and_formatting.md.yaml | 2 +- .../docling_v2/ipa20180000016.json | 2 +- .../docling_v2/ipa20200022300.json | 2 +- .../docling_v2/lorem_ipsum.docx.json | 2 +- .../docling_v2/mixed_without_h1.md.yaml | 2 +- .../groundtruth/docling_v2/multi_page.json | 2 +- .../groundtruth/docling_v2/pa20010031492.json | 2 +- .../docling_v2/pftaps057006474.json | 2 +- .../groundtruth/docling_v2/pg06442728.json | 2 +- .../docling_v2/picture_classification.json | 2 +- .../docling_v2/pntd.0008301.nxml.json | 2 +- .../docling_v2/pone.0234687.nxml.json | 2 +- .../docling_v2/powerpoint_bad_text.pptx.json | 2 +- .../docling_v2/powerpoint_sample.pptx.json | 2 +- .../powerpoint_with_image.pptx.json | 2 +- .../docling_v2/redp5110_sampled.json | 2 +- .../docling_v2/right_to_left_01.json | 2 +- .../docling_v2/right_to_left_02.json | 2 +- .../docling_v2/right_to_left_03.json | 2 +- .../groundtruth/docling_v2/table_01.html.json | 2 +- .../groundtruth/docling_v2/table_02.html.json | 2 +- .../groundtruth/docling_v2/table_03.html.json | 2 +- .../groundtruth/docling_v2/table_04.html.json | 2 +- .../groundtruth/docling_v2/table_05.html.json | 2 +- .../groundtruth/docling_v2/table_06.html.json | 2 +- .../docling_v2/table_with_equations.docx.json | 2 +- .../docling_v2/table_with_heading.html.json | 2 +- .../docling_v2/tablecell.docx.json | 2 +- .../docling_v2/test_emf_docx.docx.json | 2 +- .../groundtruth/docling_v2/textbox.docx.itxt | 113 ++- .../groundtruth/docling_v2/textbox.docx.json | 795 ++++++------------ .../groundtruth/docling_v2/textbox.docx.md | 2 - .../docling_v2/unit_test_01.html.json | 2 +- .../docling_v2/unit_test_headers.docx.json | 2 +- .../unit_test_headers_numbered.docx.json | 2 +- .../docling_v2/unit_test_lists.docx.json | 2 +- .../docling_v2/webvtt_example_01.vtt.json | 2 +- .../docling_v2/webvtt_example_02.vtt.json | 2 +- .../docling_v2/webvtt_example_03.vtt.json | 2 +- .../docling_v2/wiki_duck.html.json | 2 +- .../docling_v2/word_image_anchors.docx.json | 2 +- .../docling_v2/word_sample.docx.json | 2 +- .../docling_v2/word_tables.docx.json | 2 +- .../groundtruth/docling_v2/xlsx_01.xlsx.json | 6 +- .../xlsx_02_sample_sales_data.xlsm.json | 6 +- .../docling_v2/xlsx_03_chartsheet.xlsx.json | 6 +- .../docling_v2/xlsx_04_inflated.xlsx.json | 10 +- .../groundtruth/docling_v2/webp-test.json | 2 +- .../groundtruth/docling_v2/ocr_test.json | 2 +- .../docling_v2/ocr_test_rotated_180.json | 2 +- .../docling_v2/ocr_test_rotated_270.json | 2 +- .../docling_v2/ocr_test_rotated_90.json | 2 +- uv.lock | 191 +++-- 89 files changed, 541 insertions(+), 753 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 77a87857..8aaa5217 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,7 +45,8 @@ authors = [ requires-python = '>=3.9,<4.0' dependencies = [ 'pydantic (>=2.0.0,<3.0.0)', - 'docling-core[chunking] (>=2.50.1,<3.0.0)', + 'docling-core @ git+https://github.com/docling-project/docling-core.git@3d13b02756f1c0d1f1ccab5cfbd76f1f888a0dd9#egg=docling-core[chunking]', + # 'docling-core[chunking] (>=2.50.1,<3.0.0)', 'docling-parse (>=4.7.0,<5.0.0)', "docling-ibm-models>=3.9.1,<4", 'filetype (>=1.2.0,<2.0.0)', diff --git a/tests/data/groundtruth/docling_v2/2203.01017v2.json b/tests/data/groundtruth/docling_v2/2203.01017v2.json index d1f64f34..574269b8 100644 --- a/tests/data/groundtruth/docling_v2/2203.01017v2.json +++ b/tests/data/groundtruth/docling_v2/2203.01017v2.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "2203.01017v2", "origin": { "mimetype": "application/pdf", diff --git a/tests/data/groundtruth/docling_v2/2206.01062.json b/tests/data/groundtruth/docling_v2/2206.01062.json index 833a3629..3e07a4fa 100644 --- a/tests/data/groundtruth/docling_v2/2206.01062.json +++ b/tests/data/groundtruth/docling_v2/2206.01062.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "2206.01062", "origin": { "mimetype": "application/pdf", diff --git a/tests/data/groundtruth/docling_v2/2305.03393v1-pg9.json b/tests/data/groundtruth/docling_v2/2305.03393v1-pg9.json index 5a892b63..b901e001 100644 --- a/tests/data/groundtruth/docling_v2/2305.03393v1-pg9.json +++ b/tests/data/groundtruth/docling_v2/2305.03393v1-pg9.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "2305.03393v1-pg9", "origin": { "mimetype": "application/pdf", diff --git a/tests/data/groundtruth/docling_v2/2305.03393v1.json b/tests/data/groundtruth/docling_v2/2305.03393v1.json index aac828aa..2a0632b1 100644 --- a/tests/data/groundtruth/docling_v2/2305.03393v1.json +++ b/tests/data/groundtruth/docling_v2/2305.03393v1.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "2305.03393v1", "origin": { "mimetype": "application/pdf", diff --git a/tests/data/groundtruth/docling_v2/amt_handbook_sample.json b/tests/data/groundtruth/docling_v2/amt_handbook_sample.json index 6ba85fea..21af5c0d 100644 --- a/tests/data/groundtruth/docling_v2/amt_handbook_sample.json +++ b/tests/data/groundtruth/docling_v2/amt_handbook_sample.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "amt_handbook_sample", "origin": { "mimetype": "application/pdf", diff --git a/tests/data/groundtruth/docling_v2/code_and_formula.json b/tests/data/groundtruth/docling_v2/code_and_formula.json index 4294fcd7..fad98fc7 100644 --- a/tests/data/groundtruth/docling_v2/code_and_formula.json +++ b/tests/data/groundtruth/docling_v2/code_and_formula.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "code_and_formula", "origin": { "mimetype": "application/pdf", diff --git a/tests/data/groundtruth/docling_v2/csv-comma-in-cell.csv.json b/tests/data/groundtruth/docling_v2/csv-comma-in-cell.csv.json index f24f2c0d..a5a4b4a1 100644 --- a/tests/data/groundtruth/docling_v2/csv-comma-in-cell.csv.json +++ b/tests/data/groundtruth/docling_v2/csv-comma-in-cell.csv.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "csv-comma-in-cell", "origin": { "mimetype": "text/csv", diff --git a/tests/data/groundtruth/docling_v2/csv-comma.csv.json b/tests/data/groundtruth/docling_v2/csv-comma.csv.json index 482ea711..6cdf7561 100644 --- a/tests/data/groundtruth/docling_v2/csv-comma.csv.json +++ b/tests/data/groundtruth/docling_v2/csv-comma.csv.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "csv-comma", "origin": { "mimetype": "text/csv", diff --git a/tests/data/groundtruth/docling_v2/csv-inconsistent-header.csv.json b/tests/data/groundtruth/docling_v2/csv-inconsistent-header.csv.json index a3dc1c9b..e597c606 100644 --- a/tests/data/groundtruth/docling_v2/csv-inconsistent-header.csv.json +++ b/tests/data/groundtruth/docling_v2/csv-inconsistent-header.csv.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "csv-inconsistent-header", "origin": { "mimetype": "text/csv", diff --git a/tests/data/groundtruth/docling_v2/csv-pipe.csv.json b/tests/data/groundtruth/docling_v2/csv-pipe.csv.json index 2e05952d..0819d703 100644 --- a/tests/data/groundtruth/docling_v2/csv-pipe.csv.json +++ b/tests/data/groundtruth/docling_v2/csv-pipe.csv.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "csv-pipe", "origin": { "mimetype": "text/csv", diff --git a/tests/data/groundtruth/docling_v2/csv-semicolon.csv.json b/tests/data/groundtruth/docling_v2/csv-semicolon.csv.json index c781bf54..2fb7cb6b 100644 --- a/tests/data/groundtruth/docling_v2/csv-semicolon.csv.json +++ b/tests/data/groundtruth/docling_v2/csv-semicolon.csv.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "csv-semicolon", "origin": { "mimetype": "text/csv", diff --git a/tests/data/groundtruth/docling_v2/csv-tab.csv.json b/tests/data/groundtruth/docling_v2/csv-tab.csv.json index 5dd02b04..c4e277da 100644 --- a/tests/data/groundtruth/docling_v2/csv-tab.csv.json +++ b/tests/data/groundtruth/docling_v2/csv-tab.csv.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "csv-tab", "origin": { "mimetype": "text/csv", diff --git a/tests/data/groundtruth/docling_v2/csv-too-few-columns.csv.json b/tests/data/groundtruth/docling_v2/csv-too-few-columns.csv.json index ba3960b5..eb4d5156 100644 --- a/tests/data/groundtruth/docling_v2/csv-too-few-columns.csv.json +++ b/tests/data/groundtruth/docling_v2/csv-too-few-columns.csv.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "csv-too-few-columns", "origin": { "mimetype": "text/csv", diff --git a/tests/data/groundtruth/docling_v2/csv-too-many-columns.csv.json b/tests/data/groundtruth/docling_v2/csv-too-many-columns.csv.json index 43941b2c..7343643b 100644 --- a/tests/data/groundtruth/docling_v2/csv-too-many-columns.csv.json +++ b/tests/data/groundtruth/docling_v2/csv-too-many-columns.csv.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "csv-too-many-columns", "origin": { "mimetype": "text/csv", diff --git a/tests/data/groundtruth/docling_v2/docx_rich_cells.docx.json b/tests/data/groundtruth/docling_v2/docx_rich_cells.docx.json index 424f8e89..f25e6be7 100644 --- a/tests/data/groundtruth/docling_v2/docx_rich_cells.docx.json +++ b/tests/data/groundtruth/docling_v2/docx_rich_cells.docx.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "docx_rich_cells", "origin": { "mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", diff --git a/tests/data/groundtruth/docling_v2/elife-56337.nxml.json b/tests/data/groundtruth/docling_v2/elife-56337.nxml.json index d4927030..014a2076 100644 --- a/tests/data/groundtruth/docling_v2/elife-56337.nxml.json +++ b/tests/data/groundtruth/docling_v2/elife-56337.nxml.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "elife-56337", "origin": { "mimetype": "application/xml", diff --git a/tests/data/groundtruth/docling_v2/equations.docx.json b/tests/data/groundtruth/docling_v2/equations.docx.json index 8b045f83..fd93fa89 100644 --- a/tests/data/groundtruth/docling_v2/equations.docx.json +++ b/tests/data/groundtruth/docling_v2/equations.docx.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "equations", "origin": { "mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", diff --git a/tests/data/groundtruth/docling_v2/escaped_characters.md.json b/tests/data/groundtruth/docling_v2/escaped_characters.md.json index 8e410565..516fc946 100644 --- a/tests/data/groundtruth/docling_v2/escaped_characters.md.json +++ b/tests/data/groundtruth/docling_v2/escaped_characters.md.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "escaped_characters", "origin": { "mimetype": "text/html", diff --git a/tests/data/groundtruth/docling_v2/example_01.html.json b/tests/data/groundtruth/docling_v2/example_01.html.json index 212bc0d2..91c05eca 100644 --- a/tests/data/groundtruth/docling_v2/example_01.html.json +++ b/tests/data/groundtruth/docling_v2/example_01.html.json @@ -1,10 +1,10 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "example_01", "origin": { "mimetype": "text/html", - "binary_hash": 13726679883013609282, + "binary_hash": 3245959421868226348, "filename": "example_01.html" }, "furniture": { diff --git a/tests/data/groundtruth/docling_v2/example_01_images.html.json b/tests/data/groundtruth/docling_v2/example_01_images.html.json index 70b40c2b..7e443315 100644 --- a/tests/data/groundtruth/docling_v2/example_01_images.html.json +++ b/tests/data/groundtruth/docling_v2/example_01_images.html.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "example_01", "origin": { "mimetype": "text/html", diff --git a/tests/data/groundtruth/docling_v2/example_02.html.json b/tests/data/groundtruth/docling_v2/example_02.html.json index d37a774a..df3f43e0 100644 --- a/tests/data/groundtruth/docling_v2/example_02.html.json +++ b/tests/data/groundtruth/docling_v2/example_02.html.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "example_02", "origin": { "mimetype": "text/html", diff --git a/tests/data/groundtruth/docling_v2/example_03.html.json b/tests/data/groundtruth/docling_v2/example_03.html.json index a5e98b07..39eca894 100644 --- a/tests/data/groundtruth/docling_v2/example_03.html.json +++ b/tests/data/groundtruth/docling_v2/example_03.html.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "example_03", "origin": { "mimetype": "text/html", diff --git a/tests/data/groundtruth/docling_v2/example_04.html.json b/tests/data/groundtruth/docling_v2/example_04.html.json index bbd44157..bfc1006e 100644 --- a/tests/data/groundtruth/docling_v2/example_04.html.json +++ b/tests/data/groundtruth/docling_v2/example_04.html.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "example_04", "origin": { "mimetype": "text/html", diff --git a/tests/data/groundtruth/docling_v2/example_05.html.json b/tests/data/groundtruth/docling_v2/example_05.html.json index dc83d3fc..bd619c04 100644 --- a/tests/data/groundtruth/docling_v2/example_05.html.json +++ b/tests/data/groundtruth/docling_v2/example_05.html.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "example_05", "origin": { "mimetype": "text/html", diff --git a/tests/data/groundtruth/docling_v2/example_06.html.json b/tests/data/groundtruth/docling_v2/example_06.html.json index 599cfd3d..07845606 100644 --- a/tests/data/groundtruth/docling_v2/example_06.html.json +++ b/tests/data/groundtruth/docling_v2/example_06.html.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "example_06", "origin": { "mimetype": "text/html", diff --git a/tests/data/groundtruth/docling_v2/example_07.html.json b/tests/data/groundtruth/docling_v2/example_07.html.json index bef073ed..eb24860f 100644 --- a/tests/data/groundtruth/docling_v2/example_07.html.json +++ b/tests/data/groundtruth/docling_v2/example_07.html.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "example_07", "origin": { "mimetype": "text/html", diff --git a/tests/data/groundtruth/docling_v2/example_08.html.json b/tests/data/groundtruth/docling_v2/example_08.html.json index cad46653..de009c5f 100644 --- a/tests/data/groundtruth/docling_v2/example_08.html.json +++ b/tests/data/groundtruth/docling_v2/example_08.html.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "example_08", "origin": { "mimetype": "text/html", diff --git a/tests/data/groundtruth/docling_v2/formatting.html.json b/tests/data/groundtruth/docling_v2/formatting.html.json index 5f3317c7..52f3e5b3 100644 --- a/tests/data/groundtruth/docling_v2/formatting.html.json +++ b/tests/data/groundtruth/docling_v2/formatting.html.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "formatting", "origin": { "mimetype": "text/html", diff --git a/tests/data/groundtruth/docling_v2/html_code_snippets.html.json b/tests/data/groundtruth/docling_v2/html_code_snippets.html.json index bc20830c..17d71ffc 100644 --- a/tests/data/groundtruth/docling_v2/html_code_snippets.html.json +++ b/tests/data/groundtruth/docling_v2/html_code_snippets.html.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "html_code_snippets", "origin": { "mimetype": "text/html", diff --git a/tests/data/groundtruth/docling_v2/html_rich_table_cells.html.json b/tests/data/groundtruth/docling_v2/html_rich_table_cells.html.json index 388e5c86..6a4d3fd2 100644 --- a/tests/data/groundtruth/docling_v2/html_rich_table_cells.html.json +++ b/tests/data/groundtruth/docling_v2/html_rich_table_cells.html.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "html_rich_table_cells", "origin": { "mimetype": "text/html", diff --git a/tests/data/groundtruth/docling_v2/hyperlink_01.html.json b/tests/data/groundtruth/docling_v2/hyperlink_01.html.json index 78c55fbc..d9df06e4 100644 --- a/tests/data/groundtruth/docling_v2/hyperlink_01.html.json +++ b/tests/data/groundtruth/docling_v2/hyperlink_01.html.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "hyperlink_01", "origin": { "mimetype": "text/html", diff --git a/tests/data/groundtruth/docling_v2/hyperlink_02.html.json b/tests/data/groundtruth/docling_v2/hyperlink_02.html.json index 89383955..05d9ecdf 100644 --- a/tests/data/groundtruth/docling_v2/hyperlink_02.html.json +++ b/tests/data/groundtruth/docling_v2/hyperlink_02.html.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "hyperlink_02", "origin": { "mimetype": "text/html", diff --git a/tests/data/groundtruth/docling_v2/hyperlink_03.html.json b/tests/data/groundtruth/docling_v2/hyperlink_03.html.json index b5276fc0..edf12e9d 100644 --- a/tests/data/groundtruth/docling_v2/hyperlink_03.html.json +++ b/tests/data/groundtruth/docling_v2/hyperlink_03.html.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "hyperlink_03", "origin": { "mimetype": "text/html", diff --git a/tests/data/groundtruth/docling_v2/hyperlink_04.html.json b/tests/data/groundtruth/docling_v2/hyperlink_04.html.json index 6658e326..00595aa5 100644 --- a/tests/data/groundtruth/docling_v2/hyperlink_04.html.json +++ b/tests/data/groundtruth/docling_v2/hyperlink_04.html.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "hyperlink_04", "origin": { "mimetype": "text/html", diff --git a/tests/data/groundtruth/docling_v2/hyperlink_05.html.json b/tests/data/groundtruth/docling_v2/hyperlink_05.html.json index 34b9becd..c56e68e9 100644 --- a/tests/data/groundtruth/docling_v2/hyperlink_05.html.json +++ b/tests/data/groundtruth/docling_v2/hyperlink_05.html.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "hyperlink_05", "origin": { "mimetype": "text/html", diff --git a/tests/data/groundtruth/docling_v2/inline_and_formatting.md.yaml b/tests/data/groundtruth/docling_v2/inline_and_formatting.md.yaml index 0e76b324..1323b71a 100644 --- a/tests/data/groundtruth/docling_v2/inline_and_formatting.md.yaml +++ b/tests/data/groundtruth/docling_v2/inline_and_formatting.md.yaml @@ -890,4 +890,4 @@ texts: prov: [] self_ref: '#/texts/48' text: Table Heading -version: 1.7.0 +version: 1.8.0 diff --git a/tests/data/groundtruth/docling_v2/ipa20180000016.json b/tests/data/groundtruth/docling_v2/ipa20180000016.json index 251b68fb..5c0a6696 100644 --- a/tests/data/groundtruth/docling_v2/ipa20180000016.json +++ b/tests/data/groundtruth/docling_v2/ipa20180000016.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "ipa20180000016.xml", "origin": { "mimetype": "application/xml", diff --git a/tests/data/groundtruth/docling_v2/ipa20200022300.json b/tests/data/groundtruth/docling_v2/ipa20200022300.json index 1edbfe07..1bf13318 100644 --- a/tests/data/groundtruth/docling_v2/ipa20200022300.json +++ b/tests/data/groundtruth/docling_v2/ipa20200022300.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "ipa20200022300.xml", "origin": { "mimetype": "application/xml", diff --git a/tests/data/groundtruth/docling_v2/lorem_ipsum.docx.json b/tests/data/groundtruth/docling_v2/lorem_ipsum.docx.json index 9c50b915..d9b7a726 100644 --- a/tests/data/groundtruth/docling_v2/lorem_ipsum.docx.json +++ b/tests/data/groundtruth/docling_v2/lorem_ipsum.docx.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "lorem_ipsum", "origin": { "mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", diff --git a/tests/data/groundtruth/docling_v2/mixed_without_h1.md.yaml b/tests/data/groundtruth/docling_v2/mixed_without_h1.md.yaml index b54eff24..65e93b2d 100644 --- a/tests/data/groundtruth/docling_v2/mixed_without_h1.md.yaml +++ b/tests/data/groundtruth/docling_v2/mixed_without_h1.md.yaml @@ -136,4 +136,4 @@ texts: prov: [] self_ref: '#/texts/7' text: The end! -version: 1.7.0 +version: 1.8.0 diff --git a/tests/data/groundtruth/docling_v2/multi_page.json b/tests/data/groundtruth/docling_v2/multi_page.json index 76753e1d..c19fd048 100644 --- a/tests/data/groundtruth/docling_v2/multi_page.json +++ b/tests/data/groundtruth/docling_v2/multi_page.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "multi_page", "origin": { "mimetype": "application/pdf", diff --git a/tests/data/groundtruth/docling_v2/pa20010031492.json b/tests/data/groundtruth/docling_v2/pa20010031492.json index 72b46f6f..99db87e8 100644 --- a/tests/data/groundtruth/docling_v2/pa20010031492.json +++ b/tests/data/groundtruth/docling_v2/pa20010031492.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "pa20010031492.xml", "origin": { "mimetype": "application/xml", diff --git a/tests/data/groundtruth/docling_v2/pftaps057006474.json b/tests/data/groundtruth/docling_v2/pftaps057006474.json index e6aa043f..f2f87e36 100644 --- a/tests/data/groundtruth/docling_v2/pftaps057006474.json +++ b/tests/data/groundtruth/docling_v2/pftaps057006474.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "pftaps057006474.txt", "origin": { "mimetype": "text/plain", diff --git a/tests/data/groundtruth/docling_v2/pg06442728.json b/tests/data/groundtruth/docling_v2/pg06442728.json index 38d2e001..48ae8803 100644 --- a/tests/data/groundtruth/docling_v2/pg06442728.json +++ b/tests/data/groundtruth/docling_v2/pg06442728.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "pg06442728.xml", "origin": { "mimetype": "application/xml", diff --git a/tests/data/groundtruth/docling_v2/picture_classification.json b/tests/data/groundtruth/docling_v2/picture_classification.json index be184b16..3b1a6080 100644 --- a/tests/data/groundtruth/docling_v2/picture_classification.json +++ b/tests/data/groundtruth/docling_v2/picture_classification.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "picture_classification", "origin": { "mimetype": "application/pdf", diff --git a/tests/data/groundtruth/docling_v2/pntd.0008301.nxml.json b/tests/data/groundtruth/docling_v2/pntd.0008301.nxml.json index dbdd740e..b546ca37 100644 --- a/tests/data/groundtruth/docling_v2/pntd.0008301.nxml.json +++ b/tests/data/groundtruth/docling_v2/pntd.0008301.nxml.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "pntd.0008301", "origin": { "mimetype": "application/xml", diff --git a/tests/data/groundtruth/docling_v2/pone.0234687.nxml.json b/tests/data/groundtruth/docling_v2/pone.0234687.nxml.json index 035ceea8..05c988a1 100644 --- a/tests/data/groundtruth/docling_v2/pone.0234687.nxml.json +++ b/tests/data/groundtruth/docling_v2/pone.0234687.nxml.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "pone.0234687", "origin": { "mimetype": "application/xml", diff --git a/tests/data/groundtruth/docling_v2/powerpoint_bad_text.pptx.json b/tests/data/groundtruth/docling_v2/powerpoint_bad_text.pptx.json index 20f444ef..091813f8 100644 --- a/tests/data/groundtruth/docling_v2/powerpoint_bad_text.pptx.json +++ b/tests/data/groundtruth/docling_v2/powerpoint_bad_text.pptx.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "powerpoint_bad_text", "origin": { "mimetype": "application/vnd.ms-powerpoint", diff --git a/tests/data/groundtruth/docling_v2/powerpoint_sample.pptx.json b/tests/data/groundtruth/docling_v2/powerpoint_sample.pptx.json index 4447e8e0..dd86d24f 100644 --- a/tests/data/groundtruth/docling_v2/powerpoint_sample.pptx.json +++ b/tests/data/groundtruth/docling_v2/powerpoint_sample.pptx.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "powerpoint_sample", "origin": { "mimetype": "application/vnd.ms-powerpoint", diff --git a/tests/data/groundtruth/docling_v2/powerpoint_with_image.pptx.json b/tests/data/groundtruth/docling_v2/powerpoint_with_image.pptx.json index ccac08c9..ed67b5d0 100644 --- a/tests/data/groundtruth/docling_v2/powerpoint_with_image.pptx.json +++ b/tests/data/groundtruth/docling_v2/powerpoint_with_image.pptx.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "powerpoint_with_image", "origin": { "mimetype": "application/vnd.ms-powerpoint", diff --git a/tests/data/groundtruth/docling_v2/redp5110_sampled.json b/tests/data/groundtruth/docling_v2/redp5110_sampled.json index 8ce44c0d..b127d339 100644 --- a/tests/data/groundtruth/docling_v2/redp5110_sampled.json +++ b/tests/data/groundtruth/docling_v2/redp5110_sampled.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "redp5110_sampled", "origin": { "mimetype": "application/pdf", diff --git a/tests/data/groundtruth/docling_v2/right_to_left_01.json b/tests/data/groundtruth/docling_v2/right_to_left_01.json index 620db0d8..3abb6e0d 100644 --- a/tests/data/groundtruth/docling_v2/right_to_left_01.json +++ b/tests/data/groundtruth/docling_v2/right_to_left_01.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "right_to_left_01", "origin": { "mimetype": "application/pdf", diff --git a/tests/data/groundtruth/docling_v2/right_to_left_02.json b/tests/data/groundtruth/docling_v2/right_to_left_02.json index 23fbaf36..b24085f0 100644 --- a/tests/data/groundtruth/docling_v2/right_to_left_02.json +++ b/tests/data/groundtruth/docling_v2/right_to_left_02.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "right_to_left_02", "origin": { "mimetype": "application/pdf", diff --git a/tests/data/groundtruth/docling_v2/right_to_left_03.json b/tests/data/groundtruth/docling_v2/right_to_left_03.json index 40f216df..43360217 100644 --- a/tests/data/groundtruth/docling_v2/right_to_left_03.json +++ b/tests/data/groundtruth/docling_v2/right_to_left_03.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "right_to_left_03", "origin": { "mimetype": "application/pdf", diff --git a/tests/data/groundtruth/docling_v2/table_01.html.json b/tests/data/groundtruth/docling_v2/table_01.html.json index 53506206..f03208af 100644 --- a/tests/data/groundtruth/docling_v2/table_01.html.json +++ b/tests/data/groundtruth/docling_v2/table_01.html.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "table_01", "origin": { "mimetype": "text/html", diff --git a/tests/data/groundtruth/docling_v2/table_02.html.json b/tests/data/groundtruth/docling_v2/table_02.html.json index 3d243602..0a4ea2d6 100644 --- a/tests/data/groundtruth/docling_v2/table_02.html.json +++ b/tests/data/groundtruth/docling_v2/table_02.html.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "table_02", "origin": { "mimetype": "text/html", diff --git a/tests/data/groundtruth/docling_v2/table_03.html.json b/tests/data/groundtruth/docling_v2/table_03.html.json index 50b500cf..5b31177e 100644 --- a/tests/data/groundtruth/docling_v2/table_03.html.json +++ b/tests/data/groundtruth/docling_v2/table_03.html.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "table_03", "origin": { "mimetype": "text/html", diff --git a/tests/data/groundtruth/docling_v2/table_04.html.json b/tests/data/groundtruth/docling_v2/table_04.html.json index d9402988..466d53f4 100644 --- a/tests/data/groundtruth/docling_v2/table_04.html.json +++ b/tests/data/groundtruth/docling_v2/table_04.html.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "table_04", "origin": { "mimetype": "text/html", diff --git a/tests/data/groundtruth/docling_v2/table_05.html.json b/tests/data/groundtruth/docling_v2/table_05.html.json index e190729d..effdd5a3 100644 --- a/tests/data/groundtruth/docling_v2/table_05.html.json +++ b/tests/data/groundtruth/docling_v2/table_05.html.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "table_05", "origin": { "mimetype": "text/html", diff --git a/tests/data/groundtruth/docling_v2/table_06.html.json b/tests/data/groundtruth/docling_v2/table_06.html.json index 0f9723ef..5e6b20a8 100644 --- a/tests/data/groundtruth/docling_v2/table_06.html.json +++ b/tests/data/groundtruth/docling_v2/table_06.html.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "table_06", "origin": { "mimetype": "text/html", diff --git a/tests/data/groundtruth/docling_v2/table_with_equations.docx.json b/tests/data/groundtruth/docling_v2/table_with_equations.docx.json index 5799e5f8..2c4cec56 100644 --- a/tests/data/groundtruth/docling_v2/table_with_equations.docx.json +++ b/tests/data/groundtruth/docling_v2/table_with_equations.docx.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "table_with_equations", "origin": { "mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", diff --git a/tests/data/groundtruth/docling_v2/table_with_heading.html.json b/tests/data/groundtruth/docling_v2/table_with_heading.html.json index d5a1b94a..35c5a693 100644 --- a/tests/data/groundtruth/docling_v2/table_with_heading.html.json +++ b/tests/data/groundtruth/docling_v2/table_with_heading.html.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "table_with_heading", "origin": { "mimetype": "text/html", diff --git a/tests/data/groundtruth/docling_v2/tablecell.docx.json b/tests/data/groundtruth/docling_v2/tablecell.docx.json index 44710c2d..eb3d31dc 100644 --- a/tests/data/groundtruth/docling_v2/tablecell.docx.json +++ b/tests/data/groundtruth/docling_v2/tablecell.docx.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "tablecell", "origin": { "mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", diff --git a/tests/data/groundtruth/docling_v2/test_emf_docx.docx.json b/tests/data/groundtruth/docling_v2/test_emf_docx.docx.json index e6109397..ac1536f7 100644 --- a/tests/data/groundtruth/docling_v2/test_emf_docx.docx.json +++ b/tests/data/groundtruth/docling_v2/test_emf_docx.docx.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "test_emf_docx", "origin": { "mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", diff --git a/tests/data/groundtruth/docling_v2/textbox.docx.itxt b/tests/data/groundtruth/docling_v2/textbox.docx.itxt index 47e8871b..197cc9f1 100644 --- a/tests/data/groundtruth/docling_v2/textbox.docx.itxt +++ b/tests/data/groundtruth/docling_v2/textbox.docx.itxt @@ -12,79 +12,60 @@ item-0 at level 0: unspecified: group _root_ * Headache * Sore throat item-9 at level 1: text: - item-10 at level 1: text: - item-11 at level 1: section: group textbox - item-12 at level 2: text: If a caregiver suspects that wit ... the same suggested reportable symptoms + item-10 at level 1: section: group textbox + item-11 at level 2: text: If a caregiver suspects that wit ... the same suggested reportable symptoms + item-12 at level 1: text: item-13 at level 1: text: item-14 at level 1: text: - item-15 at level 1: text: - item-16 at level 1: text: - item-17 at level 1: section: group textbox - item-18 at level 2: text: Yes - item-19 at level 1: text: - item-20 at level 1: text: - item-21 at level 1: section: group textbox - item-22 at level 2: list: group list - item-23 at level 3: list_item: A report must be submitted withi ... saster Prevention Information Network. - item-24 at level 3: list_item: A report must also be submitted ... d Infectious Disease Reporting System. - item-25 at level 2: text: - item-26 at level 1: list: group list + item-15 at level 1: section: group textbox + item-16 at level 2: text: Yes + item-17 at level 1: text: + item-18 at level 1: section: group textbox + item-19 at level 2: list: group list + item-20 at level 3: list_item: A report must be submitted withi ... saster Prevention Information Network. + item-21 at level 3: list_item: A report must also be submitted ... d Infectious Disease Reporting System. + item-22 at level 2: text: + item-23 at level 1: text: + item-24 at level 1: text: + item-25 at level 1: text: + item-26 at level 1: text: item-27 at level 1: text: - item-28 at level 1: text: - item-29 at level 1: text: - item-30 at level 1: text: - item-31 at level 1: text: - item-32 at level 1: section: group textbox - item-33 at level 2: text: Health Bureau: - item-34 at level 2: text: Upon receiving a report from the ... rt to the Centers for Disease Control. - item-35 at level 2: list: group list - item-36 at level 3: list_item: If necessary, provide health edu ... vidual to undergo specimen collection. - item-37 at level 3: list_item: Implement appropriate epidemic p ... the Communicable Disease Control Act. - item-38 at level 2: text: - item-39 at level 1: list: group list - item-40 at level 1: text: - item-41 at level 1: section: group textbox - item-42 at level 2: text: Department of Education: + item-28 at level 1: section: group textbox + item-29 at level 2: text: Health Bureau: + item-30 at level 2: text: Upon receiving a report from the ... rt to the Centers for Disease Control. + item-31 at level 2: list: group list + item-32 at level 3: list_item: If necessary, provide health edu ... vidual to undergo specimen collection. + item-33 at level 3: list_item: Implement appropriate epidemic p ... the Communicable Disease Control Act. + item-34 at level 2: text: + item-35 at level 1: text: + item-36 at level 1: section: group textbox + item-37 at level 2: text: Department of Education: Collabo ... vention measures at all school levels. - item-43 at level 1: text: - item-44 at level 1: text: - item-45 at level 1: text: - item-46 at level 1: text: - item-47 at level 1: text: - item-48 at level 1: text: + item-38 at level 1: text: + item-39 at level 1: text: + item-40 at level 1: text: + item-41 at level 1: text: + item-42 at level 1: text: + item-43 at level 1: section: group textbox + item-44 at level 2: inline: group group + item-45 at level 3: text: The Health Bureau will handle + item-46 at level 3: text: reporting and specimen collection + item-47 at level 3: text: . + item-48 at level 2: text: item-49 at level 1: text: item-50 at level 1: section: group textbox - item-51 at level 2: inline: group group - item-52 at level 3: text: The Health Bureau will handle - item-53 at level 3: text: reporting and specimen collection - item-54 at level 3: text: . - item-55 at level 2: text: - item-56 at level 1: text: - item-57 at level 1: text: + item-51 at level 2: text: Whether the epidemic has eased. + item-52 at level 2: text: + item-53 at level 1: section: group textbox + item-54 at level 2: text: Whether the test results are pos ... legally designated infectious disease. + item-55 at level 2: text: No + item-56 at level 1: section: group textbox + item-57 at level 2: text: Yes item-58 at level 1: text: item-59 at level 1: section: group textbox - item-60 at level 2: text: Whether the epidemic has eased. + item-60 at level 2: text: Case closed. item-61 at level 2: text: - item-62 at level 1: text: + item-62 at level 2: text: The Health Bureau will carry out ... ters for Disease Control if necessary. item-63 at level 1: section: group textbox - item-64 at level 2: text: Whether the test results are pos ... legally designated infectious disease. - item-65 at level 2: text: No - item-66 at level 1: text: - item-67 at level 1: text: - item-68 at level 1: section: group textbox - item-69 at level 2: text: Yes - item-70 at level 1: text: - item-71 at level 1: section: group textbox - item-72 at level 2: text: Yes - item-73 at level 1: text: - item-74 at level 1: text: - item-75 at level 1: section: group textbox - item-76 at level 2: text: Case closed. - item-77 at level 2: text: - item-78 at level 2: text: The Health Bureau will carry out ... ters for Disease Control if necessary. - item-79 at level 1: text: - item-80 at level 1: section: group textbox - item-81 at level 2: text: No - item-82 at level 1: text: - item-83 at level 1: text: - item-84 at level 1: text: \ No newline at end of file + item-64 at level 2: text: No + item-65 at level 1: text: \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v2/textbox.docx.json b/tests/data/groundtruth/docling_v2/textbox.docx.json index ae124047..fb4175de 100644 --- a/tests/data/groundtruth/docling_v2/textbox.docx.json +++ b/tests/data/groundtruth/docling_v2/textbox.docx.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "textbox", "origin": { "mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", @@ -33,10 +33,10 @@ "$ref": "#/texts/6" }, { - "$ref": "#/texts/7" + "$ref": "#/groups/2" }, { - "$ref": "#/groups/2" + "$ref": "#/texts/8" }, { "$ref": "#/texts/9" @@ -45,25 +45,22 @@ "$ref": "#/texts/10" }, { - "$ref": "#/texts/11" + "$ref": "#/groups/3" }, { "$ref": "#/texts/12" }, - { - "$ref": "#/groups/3" - }, - { - "$ref": "#/texts/14" - }, - { - "$ref": "#/texts/15" - }, { "$ref": "#/groups/4" }, { - "$ref": "#/groups/6" + "$ref": "#/texts/16" + }, + { + "$ref": "#/texts/17" + }, + { + "$ref": "#/texts/18" }, { "$ref": "#/texts/19" @@ -72,25 +69,22 @@ "$ref": "#/texts/20" }, { - "$ref": "#/texts/21" + "$ref": "#/groups/6" }, { - "$ref": "#/texts/22" + "$ref": "#/texts/26" }, { - "$ref": "#/texts/23" + "$ref": "#/groups/8" }, { - "$ref": "#/groups/7" - }, - { - "$ref": "#/groups/9" + "$ref": "#/texts/28" }, { "$ref": "#/texts/29" }, { - "$ref": "#/groups/10" + "$ref": "#/texts/30" }, { "$ref": "#/texts/31" @@ -99,16 +93,7 @@ "$ref": "#/texts/32" }, { - "$ref": "#/texts/33" - }, - { - "$ref": "#/texts/34" - }, - { - "$ref": "#/texts/35" - }, - { - "$ref": "#/texts/36" + "$ref": "#/groups/9" }, { "$ref": "#/texts/37" @@ -117,61 +102,22 @@ "$ref": "#/groups/11" }, { - "$ref": "#/texts/42" - }, - { - "$ref": "#/texts/43" - }, - { - "$ref": "#/texts/44" + "$ref": "#/groups/12" }, { "$ref": "#/groups/13" }, { - "$ref": "#/texts/47" + "$ref": "#/texts/43" }, { "$ref": "#/groups/14" }, - { - "$ref": "#/texts/50" - }, - { - "$ref": "#/texts/51" - }, { "$ref": "#/groups/15" }, { - "$ref": "#/texts/53" - }, - { - "$ref": "#/groups/16" - }, - { - "$ref": "#/texts/55" - }, - { - "$ref": "#/texts/56" - }, - { - "$ref": "#/groups/17" - }, - { - "$ref": "#/texts/60" - }, - { - "$ref": "#/groups/18" - }, - { - "$ref": "#/texts/62" - }, - { - "$ref": "#/texts/63" - }, - { - "$ref": "#/texts/64" + "$ref": "#/texts/48" } ], "content_layer": "body", @@ -220,7 +166,7 @@ }, "children": [ { - "$ref": "#/texts/8" + "$ref": "#/texts/7" } ], "content_layer": "body", @@ -234,7 +180,7 @@ }, "children": [ { - "$ref": "#/texts/13" + "$ref": "#/texts/11" } ], "content_layer": "body", @@ -251,7 +197,7 @@ "$ref": "#/groups/5" }, { - "$ref": "#/texts/18" + "$ref": "#/texts/15" } ], "content_layer": "body", @@ -265,10 +211,10 @@ }, "children": [ { - "$ref": "#/texts/16" + "$ref": "#/texts/13" }, { - "$ref": "#/texts/17" + "$ref": "#/texts/14" } ], "content_layer": "body", @@ -280,28 +226,18 @@ "parent": { "$ref": "#/body" }, - "children": [], - "content_layer": "body", - "name": "list", - "label": "list" - }, - { - "self_ref": "#/groups/7", - "parent": { - "$ref": "#/body" - }, "children": [ { - "$ref": "#/texts/24" + "$ref": "#/texts/21" + }, + { + "$ref": "#/texts/22" + }, + { + "$ref": "#/groups/7" }, { "$ref": "#/texts/25" - }, - { - "$ref": "#/groups/8" - }, - { - "$ref": "#/texts/28" } ], "content_layer": "body", @@ -309,46 +245,73 @@ "label": "section" }, { - "self_ref": "#/groups/8", + "self_ref": "#/groups/7", "parent": { - "$ref": "#/groups/7" + "$ref": "#/groups/6" }, "children": [ { - "$ref": "#/texts/26" + "$ref": "#/texts/23" }, { - "$ref": "#/texts/27" + "$ref": "#/texts/24" } ], "content_layer": "body", "name": "list", "label": "list" }, + { + "self_ref": "#/groups/8", + "parent": { + "$ref": "#/body" + }, + "children": [ + { + "$ref": "#/texts/27" + } + ], + "content_layer": "body", + "name": "textbox", + "label": "section" + }, { "self_ref": "#/groups/9", "parent": { "$ref": "#/body" }, - "children": [], - "content_layer": "body", - "name": "list", - "label": "list" - }, - { - "self_ref": "#/groups/10", - "parent": { - "$ref": "#/body" - }, "children": [ { - "$ref": "#/texts/30" + "$ref": "#/groups/10" + }, + { + "$ref": "#/texts/36" } ], "content_layer": "body", "name": "textbox", "label": "section" }, + { + "self_ref": "#/groups/10", + "parent": { + "$ref": "#/groups/9" + }, + "children": [ + { + "$ref": "#/texts/33" + }, + { + "$ref": "#/texts/34" + }, + { + "$ref": "#/texts/35" + } + ], + "content_layer": "body", + "name": "group", + "label": "inline" + }, { "self_ref": "#/groups/11", "parent": { @@ -356,7 +319,24 @@ }, "children": [ { - "$ref": "#/groups/12" + "$ref": "#/texts/38" + }, + { + "$ref": "#/texts/39" + } + ], + "content_layer": "body", + "name": "textbox", + "label": "section" + }, + { + "self_ref": "#/groups/12", + "parent": { + "$ref": "#/body" + }, + "children": [ + { + "$ref": "#/texts/40" }, { "$ref": "#/texts/41" @@ -366,26 +346,6 @@ "name": "textbox", "label": "section" }, - { - "self_ref": "#/groups/12", - "parent": { - "$ref": "#/groups/11" - }, - "children": [ - { - "$ref": "#/texts/38" - }, - { - "$ref": "#/texts/39" - }, - { - "$ref": "#/texts/40" - } - ], - "content_layer": "body", - "name": "group", - "label": "inline" - }, { "self_ref": "#/groups/13", "parent": { @@ -393,10 +353,7 @@ }, "children": [ { - "$ref": "#/texts/45" - }, - { - "$ref": "#/texts/46" + "$ref": "#/texts/42" } ], "content_layer": "body", @@ -410,10 +367,13 @@ }, "children": [ { - "$ref": "#/texts/48" + "$ref": "#/texts/44" }, { - "$ref": "#/texts/49" + "$ref": "#/texts/45" + }, + { + "$ref": "#/texts/46" } ], "content_layer": "body", @@ -427,55 +387,7 @@ }, "children": [ { - "$ref": "#/texts/52" - } - ], - "content_layer": "body", - "name": "textbox", - "label": "section" - }, - { - "self_ref": "#/groups/16", - "parent": { - "$ref": "#/body" - }, - "children": [ - { - "$ref": "#/texts/54" - } - ], - "content_layer": "body", - "name": "textbox", - "label": "section" - }, - { - "self_ref": "#/groups/17", - "parent": { - "$ref": "#/body" - }, - "children": [ - { - "$ref": "#/texts/57" - }, - { - "$ref": "#/texts/58" - }, - { - "$ref": "#/texts/59" - } - ], - "content_layer": "body", - "name": "textbox", - "label": "section" - }, - { - "self_ref": "#/groups/18", - "parent": { - "$ref": "#/body" - }, - "children": [ - { - "$ref": "#/texts/61" + "$ref": "#/texts/47" } ], "content_layer": "body", @@ -600,18 +512,6 @@ }, { "self_ref": "#/texts/7", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "text", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/8", "parent": { "$ref": "#/groups/2" }, @@ -629,6 +529,18 @@ "script": "baseline" } }, + { + "self_ref": "#/texts/8", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "text", + "prov": [], + "orig": "", + "text": "" + }, { "self_ref": "#/texts/9", "parent": { @@ -655,30 +567,6 @@ }, { "self_ref": "#/texts/11", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "text", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/12", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "text", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/13", "parent": { "$ref": "#/groups/3" }, @@ -697,7 +585,7 @@ } }, { - "self_ref": "#/texts/14", + "self_ref": "#/texts/12", "parent": { "$ref": "#/body" }, @@ -709,19 +597,7 @@ "text": "" }, { - "self_ref": "#/texts/15", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "text", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/16", + "self_ref": "#/texts/13", "parent": { "$ref": "#/groups/5" }, @@ -742,7 +618,7 @@ "marker": "" }, { - "self_ref": "#/texts/17", + "self_ref": "#/texts/14", "parent": { "$ref": "#/groups/5" }, @@ -763,7 +639,7 @@ "marker": "" }, { - "self_ref": "#/texts/18", + "self_ref": "#/texts/15", "parent": { "$ref": "#/groups/4" }, @@ -774,6 +650,42 @@ "orig": "", "text": "" }, + { + "self_ref": "#/texts/16", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "text", + "prov": [], + "orig": "", + "text": "" + }, + { + "self_ref": "#/texts/17", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "text", + "prov": [], + "orig": "", + "text": "" + }, + { + "self_ref": "#/texts/18", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "text", + "prov": [], + "orig": "", + "text": "" + }, { "self_ref": "#/texts/19", "parent": { @@ -801,43 +713,7 @@ { "self_ref": "#/texts/21", "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "text", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/22", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "text", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/23", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "text", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/24", - "parent": { - "$ref": "#/groups/7" + "$ref": "#/groups/6" }, "children": [], "content_layer": "body", @@ -854,9 +730,9 @@ } }, { - "self_ref": "#/texts/25", + "self_ref": "#/texts/22", "parent": { - "$ref": "#/groups/7" + "$ref": "#/groups/6" }, "children": [], "content_layer": "body", @@ -873,9 +749,9 @@ } }, { - "self_ref": "#/texts/26", + "self_ref": "#/texts/23", "parent": { - "$ref": "#/groups/8" + "$ref": "#/groups/7" }, "children": [], "content_layer": "body", @@ -894,9 +770,9 @@ "marker": "1." }, { - "self_ref": "#/texts/27", + "self_ref": "#/texts/24", "parent": { - "$ref": "#/groups/8" + "$ref": "#/groups/7" }, "children": [], "content_layer": "body", @@ -914,10 +790,53 @@ "enumerated": true, "marker": "2." }, + { + "self_ref": "#/texts/25", + "parent": { + "$ref": "#/groups/6" + }, + "children": [], + "content_layer": "body", + "label": "text", + "prov": [], + "orig": "", + "text": "" + }, + { + "self_ref": "#/texts/26", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "text", + "prov": [], + "orig": "", + "text": "" + }, + { + "self_ref": "#/texts/27", + "parent": { + "$ref": "#/groups/8" + }, + "children": [], + "content_layer": "body", + "label": "text", + "prov": [], + "orig": "Department of Education:\nCollaborate with the Health Bureau in conducting epidemiological investigations and assist Health Bureau personnel in implementing necessary epidemic prevention measures at all school levels.", + "text": "Department of Education:\nCollaborate with the Health Bureau in conducting epidemiological investigations and assist Health Bureau personnel in implementing necessary epidemic prevention measures at all school levels.", + "formatting": { + "bold": false, + "italic": false, + "underline": false, + "strikethrough": false, + "script": "baseline" + } + }, { "self_ref": "#/texts/28", "parent": { - "$ref": "#/groups/7" + "$ref": "#/body" }, "children": [], "content_layer": "body", @@ -941,21 +860,14 @@ { "self_ref": "#/texts/30", "parent": { - "$ref": "#/groups/10" + "$ref": "#/body" }, "children": [], "content_layer": "body", "label": "text", "prov": [], - "orig": "Department of Education:\nCollaborate with the Health Bureau in conducting epidemiological investigations and assist Health Bureau personnel in implementing necessary epidemic prevention measures at all school levels.", - "text": "Department of Education:\nCollaborate with the Health Bureau in conducting epidemiological investigations and assist Health Bureau personnel in implementing necessary epidemic prevention measures at all school levels.", - "formatting": { - "bold": false, - "italic": false, - "underline": false, - "strikethrough": false, - "script": "baseline" - } + "orig": "", + "text": "" }, { "self_ref": "#/texts/31", @@ -984,43 +896,64 @@ { "self_ref": "#/texts/33", "parent": { - "$ref": "#/body" + "$ref": "#/groups/10" }, "children": [], "content_layer": "body", "label": "text", "prov": [], - "orig": "", - "text": "" + "orig": "The Health Bureau will handle", + "text": "The Health Bureau will handle", + "formatting": { + "bold": false, + "italic": false, + "underline": false, + "strikethrough": false, + "script": "baseline" + } }, { "self_ref": "#/texts/34", "parent": { - "$ref": "#/body" + "$ref": "#/groups/10" }, "children": [], "content_layer": "body", "label": "text", "prov": [], - "orig": "", - "text": "" + "orig": "reporting and specimen collection", + "text": "reporting and specimen collection", + "formatting": { + "bold": true, + "italic": false, + "underline": false, + "strikethrough": false, + "script": "baseline" + } }, { "self_ref": "#/texts/35", "parent": { - "$ref": "#/body" + "$ref": "#/groups/10" }, "children": [], "content_layer": "body", "label": "text", "prov": [], - "orig": "", - "text": "" + "orig": ".", + "text": ".", + "formatting": { + "bold": false, + "italic": false, + "underline": false, + "strikethrough": false, + "script": "baseline" + } }, { "self_ref": "#/texts/36", "parent": { - "$ref": "#/body" + "$ref": "#/groups/9" }, "children": [], "content_layer": "body", @@ -1043,63 +976,6 @@ }, { "self_ref": "#/texts/38", - "parent": { - "$ref": "#/groups/12" - }, - "children": [], - "content_layer": "body", - "label": "text", - "prov": [], - "orig": "The Health Bureau will handle", - "text": "The Health Bureau will handle", - "formatting": { - "bold": false, - "italic": false, - "underline": false, - "strikethrough": false, - "script": "baseline" - } - }, - { - "self_ref": "#/texts/39", - "parent": { - "$ref": "#/groups/12" - }, - "children": [], - "content_layer": "body", - "label": "text", - "prov": [], - "orig": "reporting and specimen collection", - "text": "reporting and specimen collection", - "formatting": { - "bold": true, - "italic": false, - "underline": false, - "strikethrough": false, - "script": "baseline" - } - }, - { - "self_ref": "#/texts/40", - "parent": { - "$ref": "#/groups/12" - }, - "children": [], - "content_layer": "body", - "label": "text", - "prov": [], - "orig": ".", - "text": ".", - "formatting": { - "bold": false, - "italic": false, - "underline": false, - "strikethrough": false, - "script": "baseline" - } - }, - { - "self_ref": "#/texts/41", "parent": { "$ref": "#/groups/11" }, @@ -1107,54 +983,6 @@ "content_layer": "body", "label": "text", "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/42", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "text", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/43", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "text", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/44", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "text", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/45", - "parent": { - "$ref": "#/groups/13" - }, - "children": [], - "content_layer": "body", - "label": "text", - "prov": [], "orig": "Whether the epidemic has eased.", "text": "Whether the epidemic has eased.", "formatting": { @@ -1166,9 +994,9 @@ } }, { - "self_ref": "#/texts/46", + "self_ref": "#/texts/39", "parent": { - "$ref": "#/groups/13" + "$ref": "#/groups/11" }, "children": [], "content_layer": "body", @@ -1178,21 +1006,9 @@ "text": "" }, { - "self_ref": "#/texts/47", + "self_ref": "#/texts/40", "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "text", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/48", - "parent": { - "$ref": "#/groups/14" + "$ref": "#/groups/12" }, "children": [], "content_layer": "body", @@ -1209,9 +1025,9 @@ } }, { - "self_ref": "#/texts/49", + "self_ref": "#/texts/41", "parent": { - "$ref": "#/groups/14" + "$ref": "#/groups/12" }, "children": [], "content_layer": "body", @@ -1228,33 +1044,9 @@ } }, { - "self_ref": "#/texts/50", + "self_ref": "#/texts/42", "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "text", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/51", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "text", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/52", - "parent": { - "$ref": "#/groups/15" + "$ref": "#/groups/13" }, "children": [], "content_layer": "body", @@ -1271,7 +1063,7 @@ } }, { - "self_ref": "#/texts/53", + "self_ref": "#/texts/43", "parent": { "$ref": "#/body" }, @@ -1283,52 +1075,9 @@ "text": "" }, { - "self_ref": "#/texts/54", + "self_ref": "#/texts/44", "parent": { - "$ref": "#/groups/16" - }, - "children": [], - "content_layer": "body", - "label": "text", - "prov": [], - "orig": "Yes", - "text": "Yes", - "formatting": { - "bold": false, - "italic": false, - "underline": false, - "strikethrough": false, - "script": "baseline" - } - }, - { - "self_ref": "#/texts/55", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "text", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/56", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "text", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/57", - "parent": { - "$ref": "#/groups/17" + "$ref": "#/groups/14" }, "children": [], "content_layer": "body", @@ -1345,9 +1094,9 @@ } }, { - "self_ref": "#/texts/58", + "self_ref": "#/texts/45", "parent": { - "$ref": "#/groups/17" + "$ref": "#/groups/14" }, "children": [], "content_layer": "body", @@ -1357,9 +1106,9 @@ "text": "" }, { - "self_ref": "#/texts/59", + "self_ref": "#/texts/46", "parent": { - "$ref": "#/groups/17" + "$ref": "#/groups/14" }, "children": [], "content_layer": "body", @@ -1376,21 +1125,9 @@ } }, { - "self_ref": "#/texts/60", + "self_ref": "#/texts/47", "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "text", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/61", - "parent": { - "$ref": "#/groups/18" + "$ref": "#/groups/15" }, "children": [], "content_layer": "body", @@ -1407,31 +1144,7 @@ } }, { - "self_ref": "#/texts/62", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "text", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/63", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "text", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/64", + "self_ref": "#/texts/48", "parent": { "$ref": "#/body" }, diff --git a/tests/data/groundtruth/docling_v2/textbox.docx.md b/tests/data/groundtruth/docling_v2/textbox.docx.md index d5ab56f5..4d55266c 100644 --- a/tests/data/groundtruth/docling_v2/textbox.docx.md +++ b/tests/data/groundtruth/docling_v2/textbox.docx.md @@ -42,8 +42,6 @@ No Yes -Yes - **Case closed.** The Health Bureau will carry out subsequent related epidemic prevention measures and follow-up, and will request assistance from the Centers for Disease Control if necessary. diff --git a/tests/data/groundtruth/docling_v2/unit_test_01.html.json b/tests/data/groundtruth/docling_v2/unit_test_01.html.json index 60308a12..75abfc80 100644 --- a/tests/data/groundtruth/docling_v2/unit_test_01.html.json +++ b/tests/data/groundtruth/docling_v2/unit_test_01.html.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "unit_test_01", "origin": { "mimetype": "text/html", diff --git a/tests/data/groundtruth/docling_v2/unit_test_headers.docx.json b/tests/data/groundtruth/docling_v2/unit_test_headers.docx.json index 2b490978..8dbd15aa 100644 --- a/tests/data/groundtruth/docling_v2/unit_test_headers.docx.json +++ b/tests/data/groundtruth/docling_v2/unit_test_headers.docx.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "unit_test_headers", "origin": { "mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", diff --git a/tests/data/groundtruth/docling_v2/unit_test_headers_numbered.docx.json b/tests/data/groundtruth/docling_v2/unit_test_headers_numbered.docx.json index eed58d66..0ba08e96 100644 --- a/tests/data/groundtruth/docling_v2/unit_test_headers_numbered.docx.json +++ b/tests/data/groundtruth/docling_v2/unit_test_headers_numbered.docx.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "unit_test_headers_numbered", "origin": { "mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", diff --git a/tests/data/groundtruth/docling_v2/unit_test_lists.docx.json b/tests/data/groundtruth/docling_v2/unit_test_lists.docx.json index 00dde7bd..a41aabcf 100644 --- a/tests/data/groundtruth/docling_v2/unit_test_lists.docx.json +++ b/tests/data/groundtruth/docling_v2/unit_test_lists.docx.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "unit_test_lists", "origin": { "mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", diff --git a/tests/data/groundtruth/docling_v2/webvtt_example_01.vtt.json b/tests/data/groundtruth/docling_v2/webvtt_example_01.vtt.json index 83118256..e9ed8261 100644 --- a/tests/data/groundtruth/docling_v2/webvtt_example_01.vtt.json +++ b/tests/data/groundtruth/docling_v2/webvtt_example_01.vtt.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "webvtt_example_01", "origin": { "mimetype": "text/vtt", diff --git a/tests/data/groundtruth/docling_v2/webvtt_example_02.vtt.json b/tests/data/groundtruth/docling_v2/webvtt_example_02.vtt.json index 72647d93..14b0816e 100644 --- a/tests/data/groundtruth/docling_v2/webvtt_example_02.vtt.json +++ b/tests/data/groundtruth/docling_v2/webvtt_example_02.vtt.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "webvtt_example_02", "origin": { "mimetype": "text/vtt", diff --git a/tests/data/groundtruth/docling_v2/webvtt_example_03.vtt.json b/tests/data/groundtruth/docling_v2/webvtt_example_03.vtt.json index 5df08e2b..80500ad9 100644 --- a/tests/data/groundtruth/docling_v2/webvtt_example_03.vtt.json +++ b/tests/data/groundtruth/docling_v2/webvtt_example_03.vtt.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "webvtt_example_03", "origin": { "mimetype": "text/vtt", diff --git a/tests/data/groundtruth/docling_v2/wiki_duck.html.json b/tests/data/groundtruth/docling_v2/wiki_duck.html.json index 1899a9bb..4f2f83eb 100644 --- a/tests/data/groundtruth/docling_v2/wiki_duck.html.json +++ b/tests/data/groundtruth/docling_v2/wiki_duck.html.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "wiki_duck", "origin": { "mimetype": "text/html", diff --git a/tests/data/groundtruth/docling_v2/word_image_anchors.docx.json b/tests/data/groundtruth/docling_v2/word_image_anchors.docx.json index 4dd79d7c..d3891c72 100644 --- a/tests/data/groundtruth/docling_v2/word_image_anchors.docx.json +++ b/tests/data/groundtruth/docling_v2/word_image_anchors.docx.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "word_image_anchors", "origin": { "mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", diff --git a/tests/data/groundtruth/docling_v2/word_sample.docx.json b/tests/data/groundtruth/docling_v2/word_sample.docx.json index 1d8d23f3..649161be 100644 --- a/tests/data/groundtruth/docling_v2/word_sample.docx.json +++ b/tests/data/groundtruth/docling_v2/word_sample.docx.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "word_sample", "origin": { "mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", diff --git a/tests/data/groundtruth/docling_v2/word_tables.docx.json b/tests/data/groundtruth/docling_v2/word_tables.docx.json index 56646bc5..3e79c8c8 100644 --- a/tests/data/groundtruth/docling_v2/word_tables.docx.json +++ b/tests/data/groundtruth/docling_v2/word_tables.docx.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "word_tables", "origin": { "mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", diff --git a/tests/data/groundtruth/docling_v2/xlsx_01.xlsx.json b/tests/data/groundtruth/docling_v2/xlsx_01.xlsx.json index 88808164..f7ef8ebc 100644 --- a/tests/data/groundtruth/docling_v2/xlsx_01.xlsx.json +++ b/tests/data/groundtruth/docling_v2/xlsx_01.xlsx.json @@ -1,11 +1,11 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", - "name": "test-01", + "version": "1.8.0", + "name": "xlsx_01", "origin": { "mimetype": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "binary_hash": 5648670872883457266, - "filename": "test-01.xlsx" + "filename": "xlsx_01.xlsx" }, "furniture": { "self_ref": "#/furniture", diff --git a/tests/data/groundtruth/docling_v2/xlsx_02_sample_sales_data.xlsm.json b/tests/data/groundtruth/docling_v2/xlsx_02_sample_sales_data.xlsm.json index 83e42f44..3c439d80 100644 --- a/tests/data/groundtruth/docling_v2/xlsx_02_sample_sales_data.xlsm.json +++ b/tests/data/groundtruth/docling_v2/xlsx_02_sample_sales_data.xlsm.json @@ -1,11 +1,11 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", - "name": "sample_sales_data", + "version": "1.8.0", + "name": "xlsx_02_sample_sales_data", "origin": { "mimetype": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "binary_hash": 14806485565397602516, - "filename": "sample_sales_data.xlsm" + "filename": "xlsx_02_sample_sales_data.xlsm" }, "furniture": { "self_ref": "#/furniture", diff --git a/tests/data/groundtruth/docling_v2/xlsx_03_chartsheet.xlsx.json b/tests/data/groundtruth/docling_v2/xlsx_03_chartsheet.xlsx.json index 66ede3e3..eb2a7c3b 100644 --- a/tests/data/groundtruth/docling_v2/xlsx_03_chartsheet.xlsx.json +++ b/tests/data/groundtruth/docling_v2/xlsx_03_chartsheet.xlsx.json @@ -1,10 +1,10 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "xlsx_03_chartsheet", "origin": { "mimetype": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", - "binary_hash": 548415533138925042, + "binary_hash": 472193488349663234, "filename": "xlsx_03_chartsheet.xlsx" }, "furniture": { @@ -855,4 +855,4 @@ "page_no": 2 } } -} +} \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v2/xlsx_04_inflated.xlsx.json b/tests/data/groundtruth/docling_v2/xlsx_04_inflated.xlsx.json index 4d7e24be..804c7475 100644 --- a/tests/data/groundtruth/docling_v2/xlsx_04_inflated.xlsx.json +++ b/tests/data/groundtruth/docling_v2/xlsx_04_inflated.xlsx.json @@ -1,11 +1,11 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", - "name": "test-02", + "version": "1.8.0", + "name": "xlsx_04_inflated", "origin": { "mimetype": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", - "binary_hash": 13967282146026440806, - "filename": "test-02.xlsx" + "binary_hash": 8997038978642400831, + "filename": "xlsx_04_inflated.xlsx" }, "furniture": { "self_ref": "#/furniture", @@ -140,7 +140,7 @@ "width": 421.0, "height": 430.0 }, - "uri": "" + "uri": "" }, "annotations": [] } diff --git a/tests/data/webp/groundtruth/docling_v2/webp-test.json b/tests/data/webp/groundtruth/docling_v2/webp-test.json index 1d9525d1..d32f4e04 100644 --- a/tests/data/webp/groundtruth/docling_v2/webp-test.json +++ b/tests/data/webp/groundtruth/docling_v2/webp-test.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "webp-test", "origin": { "mimetype": "application/pdf", diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test.json b/tests/data_scanned/groundtruth/docling_v2/ocr_test.json index 072c8a72..a71b30a1 100644 --- a/tests/data_scanned/groundtruth/docling_v2/ocr_test.json +++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "ocr_test", "origin": { "mimetype": "application/pdf", diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.json b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.json index c9474a30..3faa2e87 100644 --- a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.json +++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "ocr_test_rotated_180", "origin": { "mimetype": "application/pdf", diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.json b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.json index 8dea19e6..f1c03006 100644 --- a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.json +++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "ocr_test_rotated_270", "origin": { "mimetype": "application/pdf", diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.json b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.json index 56e86b69..921dc44e 100644 --- a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.json +++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.json @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.7.0", + "version": "1.8.0", "name": "ocr_test_rotated_90", "origin": { "mimetype": "application/pdf", diff --git a/uv.lock b/uv.lock index 21f18bcd..942f6369 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 3 +revision = 2 requires-python = ">=3.9, <4.0" resolution-markers = [ "python_full_version >= '3.12' and python_full_version < '3.14' and platform_machine == 'arm64' and sys_platform == 'darwin'", @@ -1446,7 +1446,7 @@ dependencies = [ { name = "accelerate", version = "1.11.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, { name = "beautifulsoup4" }, { name = "certifi" }, - { name = "docling-core", extra = ["chunking"] }, + { name = "docling-core" }, { name = "docling-ibm-models" }, { name = "docling-parse" }, { name = "easyocr", marker = "python_full_version >= '3.14'" }, @@ -1557,7 +1557,7 @@ requires-dist = [ { name = "accelerate", marker = "extra == 'vlm'", specifier = ">=1.2.1,<2.0.0" }, { name = "beautifulsoup4", specifier = ">=4.12.3,<5.0.0" }, { name = "certifi", specifier = ">=2024.7.4" }, - { name = "docling-core", extras = ["chunking"], specifier = ">=2.50.1,<3.0.0" }, + { name = "docling-core", git = "https://github.com/docling-project/docling-core.git?rev=3d13b02756f1c0d1f1ccab5cfbd76f1f888a0dd9" }, { name = "docling-ibm-models", specifier = ">=3.9.1,<4" }, { name = "docling-parse", specifier = ">=4.7.0,<5.0.0" }, { name = "easyocr", marker = "python_full_version >= '3.14' and python_full_version < '4'", specifier = ">=1.7,<2.0" }, @@ -1643,7 +1643,7 @@ examples = [ [[package]] name = "docling-core" version = "2.50.1" -source = { registry = "https://pypi.org/simple" } +source = { git = "https://github.com/docling-project/docling-core.git?rev=3d13b02756f1c0d1f1ccab5cfbd76f1f888a0dd9#3d13b02756f1c0d1f1ccab5cfbd76f1f888a0dd9" } dependencies = [ { name = "jsonref" }, { name = "jsonschema" }, @@ -1653,19 +1653,15 @@ dependencies = [ { name = "pydantic" }, { name = "pyyaml" }, { name = "tabulate" }, + { name = "tree-sitter" }, + { name = "tree-sitter-c" }, + { name = "tree-sitter-java" }, + { name = "tree-sitter-javascript" }, + { name = "tree-sitter-python" }, + { name = "tree-sitter-typescript" }, { name = "typer" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/4e/aa/67810ed6f425c597bc5779560c3e550d23ac8cc76f5f62eddae8406cdaf7/docling_core-2.50.1.tar.gz", hash = "sha256:8afae348abb7f7622899d8664195a5e0bef4b2a872f0df0ed40bcd023970a995", size = 168008, upload-time = "2025-11-04T13:20:07.092Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/39/af/b1dd00c28bedcebef3f54b4e86d427a951f78818dbd7ea854c37a1a971fe/docling_core-2.50.1-py3-none-any.whl", hash = "sha256:92a34b77e02ed4faad451be36a56c37142e8ae240d87a0dcf58e89df41e256ae", size = 169285, upload-time = "2025-11-04T13:20:05.167Z" }, -] - -[package.optional-dependencies] -chunking = [ - { name = "semchunk" }, - { name = "transformers" }, -] [[package]] name = "docling-ibm-models" @@ -4045,25 +4041,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a4/8e/469e5a4a2f5855992e425f3cb33804cc07bf18d48f2db061aec61ce50270/more_itertools-10.8.0-py3-none-any.whl", hash = "sha256:52d4362373dcf7c52546bc4af9a86ee7c4579df9a8dc268be0a2f949d376cc9b", size = 69667, upload-time = "2025-09-02T15:23:09.635Z" }, ] -[[package]] -name = "mpire" -version = "2.10.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pygments" }, - { name = "pywin32", marker = "sys_platform == 'win32'" }, - { name = "tqdm" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/3a/93/80ac75c20ce54c785648b4ed363c88f148bf22637e10c9863db4fbe73e74/mpire-2.10.2.tar.gz", hash = "sha256:f66a321e93fadff34585a4bfa05e95bd946cf714b442f51c529038eb45773d97", size = 271270, upload-time = "2024-05-07T14:00:31.815Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/20/14/1db1729ad6db4999c3a16c47937d601fcb909aaa4224f5eca5a2f145a605/mpire-2.10.2-py3-none-any.whl", hash = "sha256:d627707f7a8d02aa4c7f7d59de399dec5290945ddf7fbd36cbb1d6ebb37a51fb", size = 272756, upload-time = "2024-05-07T14:00:29.633Z" }, -] - -[package.optional-dependencies] -dill = [ - { name = "multiprocess" }, -] - [[package]] name = "mpmath" version = "1.3.0" @@ -6007,7 +5984,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/20/8a/b35a615ae6f04550d696bb179c414538b3b477999435fdd4ad75b76139e4/pybase64-1.4.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:a370dea7b1cee2a36a4d5445d4e09cc243816c5bc8def61f602db5a6f5438e52", size = 54320, upload-time = "2025-07-27T13:03:27.495Z" }, { url = "https://files.pythonhosted.org/packages/d3/a9/8bd4f9bcc53689f1b457ecefed1eaa080e4949d65a62c31a38b7253d5226/pybase64-1.4.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9aa4de83f02e462a6f4e066811c71d6af31b52d7484de635582d0e3ec3d6cc3e", size = 56482, upload-time = "2025-07-27T13:03:28.942Z" }, { url = "https://files.pythonhosted.org/packages/75/e5/4a7735b54a1191f61c3f5c2952212c85c2d6b06eb5fb3671c7603395f70c/pybase64-1.4.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83a1c2f9ed00fee8f064d548c8654a480741131f280e5750bb32475b7ec8ee38", size = 70959, upload-time = "2025-07-27T13:03:30.171Z" }, - { url = "https://files.pythonhosted.org/packages/ca/96/7ff718f87c67f4147c181b73d0928897cefa17dc75d7abc6e37730d5908f/pybase64-1.4.2-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:fb794502b4b1ec91c4ca5d283ae71aef65e3de7721057bd9e2b3ec79f7a62d7d", size = 38230, upload-time = "2025-07-27T13:03:41.637Z" }, + { url = "https://files.pythonhosted.org/packages/4c/09/f3f4b11fc9beda7e8625e29fb0f549958fcbb34fea3914e1c1d95116e344/pybase64-1.4.2-cp313-cp313-android_21_x86_64.whl", hash = "sha256:9dad20bf1f3ed9e6fe566c4c9d07d9a6c04f5a280daebd2082ffb8620b0a880d", size = 40796, upload-time = "2025-07-27T13:03:36.927Z" }, { url = "https://files.pythonhosted.org/packages/71/ab/db4dbdfccb9ca874d6ce34a0784761471885d96730de85cee3d300381529/pybase64-1.4.2-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:d377d48acf53abf4b926c2a7a24a19deb092f366a04ffd856bf4b3aa330b025d", size = 71608, upload-time = "2025-07-27T13:03:47.01Z" }, { url = "https://files.pythonhosted.org/packages/f2/58/7f2cef1ceccc682088958448d56727369de83fa6b29148478f4d2acd107a/pybase64-1.4.2-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.whl", hash = "sha256:ab9cdb6a8176a5cb967f53e6ad60e40c83caaa1ae31c5e1b29e5c8f507f17538", size = 56413, upload-time = "2025-07-27T13:03:49.908Z" }, { url = "https://files.pythonhosted.org/packages/08/7c/7e0af5c5728fa7e2eb082d88eca7c6bd17429be819d58518e74919d42e66/pybase64-1.4.2-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:adf0c103ad559dbfb9fe69edfd26a15c65d9c991a5ab0a25b04770f9eb0b9484", size = 59311, upload-time = "2025-07-27T13:03:51.238Z" }, @@ -6028,7 +6005,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/95/f0/c392c4ac8ccb7a34b28377c21faa2395313e3c676d76c382642e19a20703/pybase64-1.4.2-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:ad59362fc267bf15498a318c9e076686e4beeb0dfe09b457fabbc2b32468b97a", size = 58103, upload-time = "2025-07-27T13:04:29.996Z" }, { url = "https://files.pythonhosted.org/packages/32/30/00ab21316e7df8f526aa3e3dc06f74de6711d51c65b020575d0105a025b2/pybase64-1.4.2-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:01593bd064e7dcd6c86d04e94e44acfe364049500c20ac68ca1e708fbb2ca970", size = 60779, upload-time = "2025-07-27T13:04:31.549Z" }, { url = "https://files.pythonhosted.org/packages/a6/65/114ca81839b1805ce4a2b7d58bc16e95634734a2059991f6382fc71caf3e/pybase64-1.4.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5b81547ad8ea271c79fdf10da89a1e9313cb15edcba2a17adf8871735e9c02a0", size = 74684, upload-time = "2025-07-27T13:04:32.976Z" }, - { url = "https://files.pythonhosted.org/packages/e1/11/b28906fc2e330b8b1ab4bc845a7bef808b8506734e90ed79c6062b095112/pybase64-1.4.2-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:cea5aaf218fd9c5c23afacfe86fd4464dfedc1a0316dd3b5b4075b068cc67df0", size = 38212, upload-time = "2025-07-27T13:04:42.729Z" }, { url = "https://files.pythonhosted.org/packages/e4/2e/851eb51284b97354ee5dfa1309624ab90920696e91a33cd85b13d20cc5c1/pybase64-1.4.2-cp314-cp314-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a3e54dcf0d0305ec88473c9d0009f698cabf86f88a8a10090efeff2879c421bb", size = 71674, upload-time = "2025-07-27T13:04:49.294Z" }, { url = "https://files.pythonhosted.org/packages/a4/8e/3479266bc0e65f6cc48b3938d4a83bff045330649869d950a378f2ddece0/pybase64-1.4.2-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.whl", hash = "sha256:753da25d4fd20be7bda2746f545935773beea12d5cb5ec56ec2d2960796477b1", size = 56461, upload-time = "2025-07-27T13:04:52.37Z" }, { url = "https://files.pythonhosted.org/packages/20/b6/f2b6cf59106dd78bae8717302be5b814cec33293504ad409a2eb752ad60c/pybase64-1.4.2-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:a78c768ce4ca550885246d14babdb8923e0f4a848dfaaeb63c38fc99e7ea4052", size = 59446, upload-time = "2025-07-27T13:04:53.967Z" }, @@ -8005,19 +7981,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/91/ff/2e2eed29e02c14a5cb6c57f09b2d5b40e65d6cc71f45b52e0be295ccbc2f/secretstorage-3.4.0-py3-none-any.whl", hash = "sha256:0e3b6265c2c63509fb7415717607e4b2c9ab767b7f344a57473b779ca13bd02e", size = 15272, upload-time = "2025-09-09T16:42:12.744Z" }, ] -[[package]] -name = "semchunk" -version = "2.2.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "mpire", extra = ["dill"] }, - { name = "tqdm" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/62/96/c418c322730b385e81d4ab462e68dd48bb2dbda4d8efa17cad2ca468d9ac/semchunk-2.2.2.tar.gz", hash = "sha256:940e89896e64eeb01de97ba60f51c8c7b96c6a3951dfcf574f25ce2146752f52", size = 12271, upload-time = "2024-12-17T22:54:30.332Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/76/84/94ca7896c7df20032bcb09973e9a4d14c222507c0aadf22e89fa76bb0a04/semchunk-2.2.2-py3-none-any.whl", hash = "sha256:94ca19020c013c073abdfd06d79a7c13637b91738335f3b8cdb5655ee7cc94d2", size = 10271, upload-time = "2024-12-17T22:54:27.689Z" }, -] - [[package]] name = "semver" version = "2.13.0" @@ -8357,7 +8320,9 @@ dependencies = [ sdist = { url = "https://files.pythonhosted.org/packages/e1/41/9b873a8c055582859b239be17902a85339bec6a30ad162f98c9b0288a2cc/soundfile-0.13.1.tar.gz", hash = "sha256:b2c68dab1e30297317080a5b43df57e302584c49e2942defdde0acccc53f0e5b", size = 46156, upload-time = "2025-01-25T09:17:04.831Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/64/28/e2a36573ccbcf3d57c00626a21fe51989380636e821b341d36ccca0c1c3a/soundfile-0.13.1-py2.py3-none-any.whl", hash = "sha256:a23c717560da2cf4c7b5ae1142514e0fd82d6bbd9dfc93a50423447142f2c445", size = 25751, upload-time = "2025-01-25T09:16:44.235Z" }, + { url = "https://files.pythonhosted.org/packages/ea/ab/73e97a5b3cc46bba7ff8650a1504348fa1863a6f9d57d7001c6b67c5f20e/soundfile-0.13.1-py2.py3-none-macosx_10_9_x86_64.whl", hash = "sha256:82dc664d19831933fe59adad199bf3945ad06d84bc111a5b4c0d3089a5b9ec33", size = 1142250, upload-time = "2025-01-25T09:16:47.583Z" }, { url = "https://files.pythonhosted.org/packages/a0/e5/58fd1a8d7b26fc113af244f966ee3aecf03cb9293cb935daaddc1e455e18/soundfile-0.13.1-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:743f12c12c4054921e15736c6be09ac26b3b3d603aef6fd69f9dde68748f2593", size = 1101406, upload-time = "2025-01-25T09:16:49.662Z" }, + { url = "https://files.pythonhosted.org/packages/58/ae/c0e4a53d77cf6e9a04179535766b3321b0b9ced5f70522e4caf9329f0046/soundfile-0.13.1-py2.py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:9c9e855f5a4d06ce4213f31918653ab7de0c5a8d8107cd2427e44b42df547deb", size = 1235729, upload-time = "2025-01-25T09:16:53.018Z" }, { url = "https://files.pythonhosted.org/packages/57/5e/70bdd9579b35003a489fc850b5047beeda26328053ebadc1fb60f320f7db/soundfile-0.13.1-py2.py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:03267c4e493315294834a0870f31dbb3b28a95561b80b134f0bd3cf2d5f0e618", size = 1313646, upload-time = "2025-01-25T09:16:54.872Z" }, ] @@ -9016,6 +8981,129 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/71/d3/c16c3b3cf7655a67db1144da94b021c200ac1303f82428f2beef6c2e72bb/transformers-4.57.1-py3-none-any.whl", hash = "sha256:b10d05da8fa67dc41644dbbf9bc45a44cb86ae33da6f9295f5fbf5b7890bd267", size = 11990925, upload-time = "2025-10-14T15:39:23.085Z" }, ] +[[package]] +name = "tree-sitter" +version = "0.23.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0f/50/fd5fafa42b884f741b28d9e6fd366c3f34e15d2ed3aa9633b34e388379e2/tree-sitter-0.23.2.tar.gz", hash = "sha256:66bae8dd47f1fed7bdef816115146d3a41c39b5c482d7bad36d9ba1def088450", size = 166800, upload-time = "2024-10-24T15:31:02.238Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/91/04/2068a7b725265ecfcbf63ecdae038f1d4124ebccd55b8a7ce145b70e2b6a/tree_sitter-0.23.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3a937f5d8727bc1c74c4bf2a9d1c25ace049e8628273016ad0d45914ae904e10", size = 139289, upload-time = "2024-10-24T15:29:59.27Z" }, + { url = "https://files.pythonhosted.org/packages/a8/07/a5b943121f674fe1ac77694a698e71ce95353830c1f3f4ce45da7ef3e406/tree_sitter-0.23.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2c7eae7fe2af215645a38660d2d57d257a4c461fe3ec827cca99a79478284e80", size = 132379, upload-time = "2024-10-24T15:30:01.437Z" }, + { url = "https://files.pythonhosted.org/packages/d4/96/fcc72c33d464a2d722db1e95b74a53ced771a47b3cfde60aced29764a783/tree_sitter-0.23.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3a71d607595270b6870eaf778a1032d146b2aa79bfcfa60f57a82a7b7584a4c7", size = 552884, upload-time = "2024-10-24T15:30:02.672Z" }, + { url = "https://files.pythonhosted.org/packages/d0/af/b0e787a52767155b4643a55d6de03c1e4ae77abb61e1dc1629ad983e0a40/tree_sitter-0.23.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6fe9b9ea7a0aa23b52fd97354da95d1b2580065bc12a4ac868f9164a127211d6", size = 566561, upload-time = "2024-10-24T15:30:04.073Z" }, + { url = "https://files.pythonhosted.org/packages/65/fd/05e966b5317b1c6679c071c5b0203f28af9d26c9363700cb9682e1bcf343/tree_sitter-0.23.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d74d00a8021719eae14d10d1b1e28649e15d8b958c01c2b2c3dad7a2ebc4dbae", size = 558273, upload-time = "2024-10-24T15:30:06.177Z" }, + { url = "https://files.pythonhosted.org/packages/60/bc/19145efdf3f47711aa3f1bf06f0b50593f97f1108550d38694841fd97b7c/tree_sitter-0.23.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:6de18d8d8a7f67ab71f472d1fcb01cc506e080cbb5e13d52929e4b6fdce6bbee", size = 569176, upload-time = "2024-10-24T15:30:07.902Z" }, + { url = "https://files.pythonhosted.org/packages/32/08/3553d8e488ae9284a0762effafb7d2639a306e184963b7f99853923084d6/tree_sitter-0.23.2-cp310-cp310-win_amd64.whl", hash = "sha256:12b60dca70d2282af942b650a6d781be487485454668c7c956338a367b98cdee", size = 117902, upload-time = "2024-10-24T15:30:09.675Z" }, + { url = "https://files.pythonhosted.org/packages/1d/39/836fa485e985c33e8aa1cc3abbf7a84be1c2c382e69547a765631fdd7ce3/tree_sitter-0.23.2-cp310-cp310-win_arm64.whl", hash = "sha256:3346a4dd0447a42aabb863443b0fd8c92b909baf40ed2344fae4b94b625d5955", size = 102644, upload-time = "2024-10-24T15:30:11.484Z" }, + { url = "https://files.pythonhosted.org/packages/55/8d/2d4fb04408772be0919441d66f700673ce7cb76b9ab6682e226d740fb88d/tree_sitter-0.23.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:91fda41d4f8824335cc43c64e2c37d8089c8c563bd3900a512d2852d075af719", size = 139142, upload-time = "2024-10-24T15:30:12.627Z" }, + { url = "https://files.pythonhosted.org/packages/32/52/b8a44bfff7b0203256e5dbc8d3a372ee8896128b8ed7d3a89e1ef17b2065/tree_sitter-0.23.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:92b2b489d5ce54b41f94c6f23fbaf592bd6e84dc2877048fd1cb060480fa53f7", size = 132198, upload-time = "2024-10-24T15:30:13.893Z" }, + { url = "https://files.pythonhosted.org/packages/5d/54/746f2ee5acf6191a4a0be7f5843329f0d713bfe5196f5fc6fe2ea69cb44c/tree_sitter-0.23.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:64859bd4aa1567d0d6016a811b2b49c59d4a4427d096e3d8c84b2521455f62b7", size = 554303, upload-time = "2024-10-24T15:30:15.334Z" }, + { url = "https://files.pythonhosted.org/packages/2f/5a/3169d9933be813776a9b4b3f2e671d3d50fa27e589dee5578f6ecef7ff6d/tree_sitter-0.23.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:614590611636044e071d3a0b748046d52676dbda3bc9fa431216231e11dd98f7", size = 567626, upload-time = "2024-10-24T15:30:17.12Z" }, + { url = "https://files.pythonhosted.org/packages/32/0d/23f363b3b0bc3fa0e7a4a294bf119957ac1ab02737d57815e1e8b7b3e196/tree_sitter-0.23.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:08466953c78ae57be61057188fb88c89791b0a562856010228e0ccf60e2ac453", size = 559803, upload-time = "2024-10-24T15:30:18.921Z" }, + { url = "https://files.pythonhosted.org/packages/6f/b3/1ffba0f17a7ff2c9114d91a1ecc15e0748f217817797564d31fbb61d7458/tree_sitter-0.23.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:8a33f03a562de91f7fd05eefcedd8994a06cd44c62f7aabace811ad82bc11cbd", size = 570987, upload-time = "2024-10-24T15:30:21.116Z" }, + { url = "https://files.pythonhosted.org/packages/59/4b/085bcb8a11ea18003aacc4dbc91c301d1536c5e2deedb95393e8ef26f1f7/tree_sitter-0.23.2-cp311-cp311-win_amd64.whl", hash = "sha256:03b70296b569ef64f7b92b42ca5da9bf86d81bee2afd480bea35092687f51dae", size = 117771, upload-time = "2024-10-24T15:30:22.38Z" }, + { url = "https://files.pythonhosted.org/packages/4b/e5/90adc4081f49ccb6bea89a800dc9b0dcc5b6953b0da423e8eff28f63fddf/tree_sitter-0.23.2-cp311-cp311-win_arm64.whl", hash = "sha256:7cb4bb953ea7c0b50eeafc4454783e030357179d2a93c3dd5ebed2da5588ddd0", size = 102555, upload-time = "2024-10-24T15:30:23.534Z" }, + { url = "https://files.pythonhosted.org/packages/07/a7/57e0fe87b49a78c670a7b4483f70e44c000c65c29b138001096b22e7dd87/tree_sitter-0.23.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a014498b6a9e6003fae8c6eb72f5927d62da9dcb72b28b3ce8cd15c6ff6a6572", size = 139259, upload-time = "2024-10-24T15:30:24.941Z" }, + { url = "https://files.pythonhosted.org/packages/b4/b9/bc8513d818ffb54993a017a36c8739300bc5739a13677acf90b54995e7db/tree_sitter-0.23.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:04f8699b131d4bcbe3805c37e4ef3d159ee9a82a0e700587625623999ba0ea53", size = 131951, upload-time = "2024-10-24T15:30:26.176Z" }, + { url = "https://files.pythonhosted.org/packages/d7/6a/eab01bb6b1ce3c9acf16d72922ffc29a904af485eb3e60baf3a3e04edd30/tree_sitter-0.23.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4471577df285059c71686ecb208bc50fb472099b38dcc8e849b0e86652891e87", size = 557952, upload-time = "2024-10-24T15:30:27.389Z" }, + { url = "https://files.pythonhosted.org/packages/bd/95/f2f73332623cf63200d57800f85273170bc5f99d28ea3f234afd5b0048df/tree_sitter-0.23.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f342c925290dd4e20ecd5787ef7ae8749981597ab364783a1eb73173efe65226", size = 571199, upload-time = "2024-10-24T15:30:28.879Z" }, + { url = "https://files.pythonhosted.org/packages/04/ac/bd6e6cfdd0421156e86f5c93848629af1c7323083077e1a95b27d32d5811/tree_sitter-0.23.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a4e9e53d07dd076bede72e4f7d3a0173d7b9ad6576572dd86da008a740a9bb22", size = 562129, upload-time = "2024-10-24T15:30:30.199Z" }, + { url = "https://files.pythonhosted.org/packages/7b/bd/8a9edcbcf8a76b0bf58e3b927ed291e3598e063d56667367762833cc8709/tree_sitter-0.23.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8caebe65bc358759dac2500d8f8feed3aed939c4ade9a684a1783fe07bc7d5db", size = 574307, upload-time = "2024-10-24T15:30:32.085Z" }, + { url = "https://files.pythonhosted.org/packages/0c/c2/3fb2c6c0ae2f59a7411dc6d3e7945e3cb6f34c8552688708acc8b2b13f83/tree_sitter-0.23.2-cp312-cp312-win_amd64.whl", hash = "sha256:fc5a72eb50d43485000dbbb309acb350467b7467e66dc747c6bb82ce63041582", size = 117858, upload-time = "2024-10-24T15:30:33.353Z" }, + { url = "https://files.pythonhosted.org/packages/e2/18/4ca2c0f4a0c802ebcb3a92264cc436f1d54b394fa24dfa76bf57cdeaca9e/tree_sitter-0.23.2-cp312-cp312-win_arm64.whl", hash = "sha256:a0320eb6c7993359c5f7b371d22719ccd273f440d41cf1bd65dac5e9587f2046", size = 102496, upload-time = "2024-10-24T15:30:34.782Z" }, + { url = "https://files.pythonhosted.org/packages/ba/c6/4ead9ce3113a7c27f37a2bdef163c09757efbaa85adbdfe7b3fbf0317c57/tree_sitter-0.23.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:eff630dddee7ba05accb439b17e559e15ce13f057297007c246237ceb6306332", size = 139266, upload-time = "2024-10-24T15:30:35.946Z" }, + { url = "https://files.pythonhosted.org/packages/76/c9/b4197c5b0c1d6ba648202a547846ac910a53163b69a459504b2aa6cdb76e/tree_sitter-0.23.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4780ba8f3894f2dea869fad2995c2aceab3fd5ab9e6a27c45475d2acd7f7e84e", size = 131959, upload-time = "2024-10-24T15:30:37.646Z" }, + { url = "https://files.pythonhosted.org/packages/99/94/0f7c5580d2adff3b57d36f1998725b0caf6cf1af50ceafc00c6cdbc2fef6/tree_sitter-0.23.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f0b609460b8e3e256361fb12e94fae5b728cb835b16f0f9d590b5aadbf9d109b", size = 557582, upload-time = "2024-10-24T15:30:39.019Z" }, + { url = "https://files.pythonhosted.org/packages/97/8a/f73ff06959d43fd47fc283cbcc4d8efa6550b2cc431d852b184504992447/tree_sitter-0.23.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78d070d8eaeaeb36cf535f55e5578fddbfc3bf53c1980f58bf1a99d57466b3b5", size = 570891, upload-time = "2024-10-24T15:30:40.432Z" }, + { url = "https://files.pythonhosted.org/packages/b8/86/bbda5ad09b88051ff7bf3275622a2f79bc4f728b4c283ff8b93b8fcdf36d/tree_sitter-0.23.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:878580b2ad5054c410ba3418edca4d34c81cc26706114d8f5b5541688bc2d785", size = 562343, upload-time = "2024-10-24T15:30:43.045Z" }, + { url = "https://files.pythonhosted.org/packages/ca/55/b404fa49cb5c2926ad6fe1cac033dd486ef69f1afeb7828452d21e1e05c1/tree_sitter-0.23.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:29224bdc2a3b9af535b7725e249d3ee291b2e90708e82832e73acc175e40dc48", size = 574407, upload-time = "2024-10-24T15:30:45.018Z" }, + { url = "https://files.pythonhosted.org/packages/c2/c8/eea2104443ab973091107ef3e730683bd8e6cb51dd025cef853d3fff9dae/tree_sitter-0.23.2-cp313-cp313-win_amd64.whl", hash = "sha256:c58d89348162fbc3aea1fe6511a66ee189fc0e4e4bbe937026f29e4ecef17763", size = 117854, upload-time = "2024-10-24T15:30:47.817Z" }, + { url = "https://files.pythonhosted.org/packages/89/4d/1728d9ce32a1d851081911b7e47830f5e740431f2bb920f54bb8c26175bc/tree_sitter-0.23.2-cp313-cp313-win_arm64.whl", hash = "sha256:0ff2037be5edab7801de3f6a721b9cf010853f612e2008ee454e0e0badb225a6", size = 102492, upload-time = "2024-10-24T15:30:48.892Z" }, + { url = "https://files.pythonhosted.org/packages/cb/ab/b39173a47d498cc6276e303c865f4a222134ceae890bd3c1b29427489805/tree_sitter-0.23.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a5db8e585205faef8bf219da77d8993e2ef04d08eda2e3c8ad7e4df8297ee344", size = 139550, upload-time = "2024-10-24T15:30:50.516Z" }, + { url = "https://files.pythonhosted.org/packages/4c/34/fa8f5b862dd7a6014fd5578810178e8f7601830cabb6d65d2aba050c2df1/tree_sitter-0.23.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9dbd110a30cf28be5da734ae4cd0e9031768228dbf6a79f2973962aa51de4ec7", size = 132686, upload-time = "2024-10-24T15:30:51.779Z" }, + { url = "https://files.pythonhosted.org/packages/98/b9/ccdddf35705fc23395caa71557f767e0753d38afe4b5bb99efddbf62bb22/tree_sitter-0.23.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:569514b9a996a0fd458b3a891c46ca125298be0c03cf82f2b6f0c13d5d8f25dc", size = 554958, upload-time = "2024-10-24T15:30:53.327Z" }, + { url = "https://files.pythonhosted.org/packages/8d/ba/20ae9079bdfc5cfac28b39d945a6c354c8e1385e73aec8142db6c53b635c/tree_sitter-0.23.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a357ed98a74e47787b812df99a74a2c35c0fe11e55c2095cc01d1cad144ef552", size = 568162, upload-time = "2024-10-24T15:30:54.667Z" }, + { url = "https://files.pythonhosted.org/packages/40/00/b16bf6cf88c47c1b6c8e1cce1eb9e90badb5db9e5252ae0970d858d02592/tree_sitter-0.23.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:c2dfb8e8f760f4cc67888d03ef9e2dbd3353245f67f5efba375c2a14d944ac0e", size = 560278, upload-time = "2024-10-24T15:30:56.49Z" }, + { url = "https://files.pythonhosted.org/packages/7a/8f/27ab9b96cc0261af78b080ec8a9846a38e216360ec38774ea27eba35bd3c/tree_sitter-0.23.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:3ead958df87a21d706903987e665e9e0e5df7b2c5021ff69ea349826840adc6a", size = 571255, upload-time = "2024-10-24T15:30:58.254Z" }, + { url = "https://files.pythonhosted.org/packages/44/e0/95a3d66a7e5bb229574484ab10c6dc99d1c7a32972b890d194076e30dc4f/tree_sitter-0.23.2-cp39-cp39-win_amd64.whl", hash = "sha256:611cae16be332213c0e6ece72c0bfca202e30ff320a8b309b1526c6cb79ee4ba", size = 118232, upload-time = "2024-10-24T15:30:59.965Z" }, + { url = "https://files.pythonhosted.org/packages/10/b5/9eaf794fc71490573ab14a366affca415bc1ddbf86a14d78e54583db4254/tree_sitter-0.23.2-cp39-cp39-win_arm64.whl", hash = "sha256:b848e0fdd522fbb8888cdb4f4d93f8fad97ae10d70c122fb922e51363c7febcd", size = 102787, upload-time = "2024-10-24T15:31:01.084Z" }, +] + +[[package]] +name = "tree-sitter-c" +version = "0.23.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/27/27/5218b7aadabcf8020d06a3b13f8f87dd0e6e958f43d9839847e3f12b02c7/tree_sitter_c-0.23.6.tar.gz", hash = "sha256:1d3b4a6ca8ebc7b0727857cc63a874118e0c04d353a4909b5c104e913fd69864", size = 221969, upload-time = "2025-05-24T16:05:16.753Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a5/2e/ba7d982c1b3c8a01e4b106cd9c8c292445366c77cb0fd9da598558d6b2a3/tree_sitter_c-0.23.6-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:0b46335c2603b86c75e7fc587e29c9299cf06e9634ce1a69ac1e928dfe568af2", size = 80847, upload-time = "2025-05-24T16:05:09.665Z" }, + { url = "https://files.pythonhosted.org/packages/57/ac/08081eb00119e528127a5a67008383e4730d099560f0f6e66f6e539710e2/tree_sitter_c-0.23.6-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:ffc36baf02f46744df354e4a00eab78d1034db480e649554c625ba79ee4b6b9c", size = 86208, upload-time = "2025-05-24T16:05:10.943Z" }, + { url = "https://files.pythonhosted.org/packages/2c/cb/98f0165f4cbdc6df35625358a9958176221bb098d38f58c25f5c6a04f9e5/tree_sitter_c-0.23.6-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96ef21fffd7135839010b37066d6653101ff74fa8961468ffbb0bcf3ae22d61", size = 109935, upload-time = "2025-05-24T16:05:12.126Z" }, + { url = "https://files.pythonhosted.org/packages/b6/eb/1bfae083aa5e6b04e36de75f55491eaa495e84a0d06a87257cbb7c404a08/tree_sitter_c-0.23.6-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cfa9044039460632ef333afd6e907fdc67a657890afe49c8592bd223de059712", size = 98063, upload-time = "2025-05-24T16:05:12.975Z" }, + { url = "https://files.pythonhosted.org/packages/be/1f/85d34bbedb09bacb21c3861bbb26129420f26af289972906b75277150dea/tree_sitter_c-0.23.6-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a25cc5f275109f59dd6d5e636355ff038e46fc1048404519b591935a2b5c96d3", size = 94072, upload-time = "2025-05-24T16:05:13.814Z" }, + { url = "https://files.pythonhosted.org/packages/e6/35/c78cbe4ac9426f2208bacf20a6de9c262af8b9e8d379a6249c6876916978/tree_sitter_c-0.23.6-cp39-abi3-win_amd64.whl", hash = "sha256:1fccc265a0fe1b09874321c20046b297b1513e2cef1af7e17ac53b9b5cf6878e", size = 84626, upload-time = "2025-05-24T16:05:14.65Z" }, + { url = "https://files.pythonhosted.org/packages/be/53/d0f910b86e9d270e76d45f7accabd9efe96448e89c9f713ca2501a8876bf/tree_sitter_c-0.23.6-cp39-abi3-win_arm64.whl", hash = "sha256:ac92b69880d9844c89253a352937dada56e3647fbb8d5acb33f820eeb7763fd7", size = 82655, upload-time = "2025-05-24T16:05:15.894Z" }, +] + +[[package]] +name = "tree-sitter-java" +version = "0.23.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fa/dc/eb9c8f96304e5d8ae1663126d89967a622a80937ad2909903569ccb7ec8f/tree_sitter_java-0.23.5.tar.gz", hash = "sha256:f5cd57b8f1270a7f0438878750d02ccc79421d45cca65ff284f1527e9ef02e38", size = 138121, upload-time = "2024-12-21T18:24:26.936Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/67/21/b3399780b440e1567a11d384d0ebb1aea9b642d0d98becf30fa55c0e3a3b/tree_sitter_java-0.23.5-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:355ce0308672d6f7013ec913dee4a0613666f4cda9044a7824240d17f38209df", size = 58926, upload-time = "2024-12-21T18:24:12.53Z" }, + { url = "https://files.pythonhosted.org/packages/57/ef/6406b444e2a93bc72a04e802f4107e9ecf04b8de4a5528830726d210599c/tree_sitter_java-0.23.5-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:24acd59c4720dedad80d548fe4237e43ef2b7a4e94c8549b0ca6e4c4d7bf6e69", size = 62288, upload-time = "2024-12-21T18:24:14.634Z" }, + { url = "https://files.pythonhosted.org/packages/4e/6c/74b1c150d4f69c291ab0b78d5dd1b59712559bbe7e7daf6d8466d483463f/tree_sitter_java-0.23.5-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9401e7271f0b333df39fc8a8336a0caf1b891d9a2b89ddee99fae66b794fc5b7", size = 85533, upload-time = "2024-12-21T18:24:16.695Z" }, + { url = "https://files.pythonhosted.org/packages/29/09/e0d08f5c212062fd046db35c1015a2621c2631bc8b4aae5740d7adb276ad/tree_sitter_java-0.23.5-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:370b204b9500b847f6d0c5ad584045831cee69e9a3e4d878535d39e4a7e4c4f1", size = 84033, upload-time = "2024-12-21T18:24:18.758Z" }, + { url = "https://files.pythonhosted.org/packages/43/56/7d06b23ddd09bde816a131aa504ee11a1bbe87c6b62ab9b2ed23849a3382/tree_sitter_java-0.23.5-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:aae84449e330363b55b14a2af0585e4e0dae75eb64ea509b7e5b0e1de536846a", size = 82564, upload-time = "2024-12-21T18:24:20.493Z" }, + { url = "https://files.pythonhosted.org/packages/da/d6/0528c7e1e88a18221dbd8ccee3825bf274b1fa300f745fd74eb343878043/tree_sitter_java-0.23.5-cp39-abi3-win_amd64.whl", hash = "sha256:1ee45e790f8d31d416bc84a09dac2e2c6bc343e89b8a2e1d550513498eedfde7", size = 60650, upload-time = "2024-12-21T18:24:22.902Z" }, + { url = "https://files.pythonhosted.org/packages/72/57/5bab54d23179350356515526fff3cc0f3ac23bfbc1a1d518a15978d4880e/tree_sitter_java-0.23.5-cp39-abi3-win_arm64.whl", hash = "sha256:402efe136104c5603b429dc26c7e75ae14faaca54cfd319ecc41c8f2534750f4", size = 59059, upload-time = "2024-12-21T18:24:24.934Z" }, +] + +[[package]] +name = "tree-sitter-javascript" +version = "0.23.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cd/dc/1c55c33cc6bbe754359b330534cf9f261c1b9b2c26ddf23aef3c5fa67759/tree_sitter_javascript-0.23.1.tar.gz", hash = "sha256:b2059ce8b150162cda05a457ca3920450adbf915119c04b8c67b5241cd7fcfed", size = 110058, upload-time = "2024-11-10T05:40:42.357Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/20/d3/c67d7d49967344b51208ad19f105233be1afdf07d3dcb35b471900265227/tree_sitter_javascript-0.23.1-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:6ca583dad4bd79d3053c310b9f7208cd597fd85f9947e4ab2294658bb5c11e35", size = 59333, upload-time = "2024-11-10T05:40:31.988Z" }, + { url = "https://files.pythonhosted.org/packages/a5/db/ea0ee1547679d1750e80a0c4bc60b3520b166eeaf048764cfdd1ba3fd5e5/tree_sitter_javascript-0.23.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:94100e491a6a247aa4d14caf61230c171b6376c863039b6d9cd71255c2d815ec", size = 61071, upload-time = "2024-11-10T05:40:33.458Z" }, + { url = "https://files.pythonhosted.org/packages/67/6e/07c4857e08be37bfb55bfb269863df8ec908b2f6a3f1893cd852b893ecab/tree_sitter_javascript-0.23.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5a6bc1055b061c5055ec58f39ee9b2e9efb8e6e0ae970838af74da0afb811f0a", size = 96999, upload-time = "2024-11-10T05:40:34.869Z" }, + { url = "https://files.pythonhosted.org/packages/5f/f5/4de730afe8b9422845bc2064020a8a8f49ebd1695c04261c38d1b3e3edec/tree_sitter_javascript-0.23.1-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:056dc04fb6b24293f8c5fec43c14e7e16ba2075b3009c643abf8c85edc4c7c3c", size = 94020, upload-time = "2024-11-10T05:40:35.735Z" }, + { url = "https://files.pythonhosted.org/packages/77/0a/f980520da86c4eff8392867840a945578ef43372c9d4a37922baa6b121fe/tree_sitter_javascript-0.23.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a11ca1c0f736da42967586b568dff8a465ee148a986c15ebdc9382806e0ce871", size = 92927, upload-time = "2024-11-10T05:40:37.92Z" }, + { url = "https://files.pythonhosted.org/packages/ff/5c/36a98d512aa1d1082409d6b7eda5d26b820bd4477a54100ad9f62212bc55/tree_sitter_javascript-0.23.1-cp39-abi3-win_amd64.whl", hash = "sha256:041fa22b34250ea6eb313d33104d5303f79504cb259d374d691e38bbdc49145b", size = 58824, upload-time = "2024-11-10T05:40:39.903Z" }, + { url = "https://files.pythonhosted.org/packages/dc/79/ceb21988e6de615355a63eebcf806cd2a0fe875bec27b429d58b63e7fb5f/tree_sitter_javascript-0.23.1-cp39-abi3-win_arm64.whl", hash = "sha256:eb28130cd2fb30d702d614cbf61ef44d1c7f6869e7d864a9cc17111e370be8f7", size = 57027, upload-time = "2024-11-10T05:40:40.841Z" }, +] + +[[package]] +name = "tree-sitter-python" +version = "0.23.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1c/30/6766433b31be476fda6569a3a374c2220e45ffee0bff75460038a57bf23b/tree_sitter_python-0.23.6.tar.gz", hash = "sha256:354bfa0a2f9217431764a631516f85173e9711af2c13dbd796a8815acfe505d9", size = 155868, upload-time = "2024-12-22T23:09:55.918Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ab/67/577a02acae5f776007c924ca86ef14c19c12e71de0aa9d2a036f3c248e7b/tree_sitter_python-0.23.6-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:28fbec8f74eeb2b30292d97715e60fac9ccf8a8091ce19b9d93e9b580ed280fb", size = 74361, upload-time = "2024-12-22T23:09:42.37Z" }, + { url = "https://files.pythonhosted.org/packages/d2/a6/194b3625a7245c532ad418130d63077ce6cd241152524152f533e4d6edb0/tree_sitter_python-0.23.6-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:680b710051b144fedf61c95197db0094f2245e82551bf7f0c501356333571f7a", size = 76436, upload-time = "2024-12-22T23:09:43.566Z" }, + { url = "https://files.pythonhosted.org/packages/d0/62/1da112689d6d282920e62c40e67ab39ea56463b0e7167bfc5e81818a770e/tree_sitter_python-0.23.6-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8a9dcef55507b6567207e8ee0a6b053d0688019b47ff7f26edc1764b7f4dc0a4", size = 112060, upload-time = "2024-12-22T23:09:44.721Z" }, + { url = "https://files.pythonhosted.org/packages/5d/62/c9358584c96e38318d69b6704653684fd8467601f7b74e88aa44f4e6903f/tree_sitter_python-0.23.6-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:29dacdc0cd2f64e55e61d96c6906533ebb2791972bec988450c46cce60092f5d", size = 112338, upload-time = "2024-12-22T23:09:48.323Z" }, + { url = "https://files.pythonhosted.org/packages/1a/58/c5e61add45e34fb8ecbf057c500bae9d96ed7c9ca36edb7985da8ae45526/tree_sitter_python-0.23.6-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:7e048733c36f564b379831689006801feb267d8194f9e793fbb395ef1723335d", size = 109382, upload-time = "2024-12-22T23:09:49.49Z" }, + { url = "https://files.pythonhosted.org/packages/e9/f3/9b30893cae9b3811fe652dc6f90aaadfda12ae0b2757f5722fc7266f423c/tree_sitter_python-0.23.6-cp39-abi3-win_amd64.whl", hash = "sha256:a24027248399fb41594b696f929f9956828ae7cc85596d9f775e6c239cd0c2be", size = 75904, upload-time = "2024-12-22T23:09:51.597Z" }, + { url = "https://files.pythonhosted.org/packages/87/cb/ce35a65f83a47b510d8a2f1eddf3bdbb0d57aabc87351c8788caf3309f76/tree_sitter_python-0.23.6-cp39-abi3-win_arm64.whl", hash = "sha256:71334371bd73d5fe080aed39fbff49ed8efb9506edebe16795b0c7567ed6a272", size = 73649, upload-time = "2024-12-22T23:09:53.71Z" }, +] + +[[package]] +name = "tree-sitter-typescript" +version = "0.23.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1e/fc/bb52958f7e399250aee093751e9373a6311cadbe76b6e0d109b853757f35/tree_sitter_typescript-0.23.2.tar.gz", hash = "sha256:7b167b5827c882261cb7a50dfa0fb567975f9b315e87ed87ad0a0a3aedb3834d", size = 773053, upload-time = "2024-11-11T02:36:11.396Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/28/95/4c00680866280e008e81dd621fd4d3f54aa3dad1b76b857a19da1b2cc426/tree_sitter_typescript-0.23.2-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:3cd752d70d8e5371fdac6a9a4df9d8924b63b6998d268586f7d374c9fba2a478", size = 286677, upload-time = "2024-11-11T02:35:58.839Z" }, + { url = "https://files.pythonhosted.org/packages/8f/2f/1f36fda564518d84593f2740d5905ac127d590baf5c5753cef2a88a89c15/tree_sitter_typescript-0.23.2-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:c7cc1b0ff5d91bac863b0e38b1578d5505e718156c9db577c8baea2557f66de8", size = 302008, upload-time = "2024-11-11T02:36:00.733Z" }, + { url = "https://files.pythonhosted.org/packages/96/2d/975c2dad292aa9994f982eb0b69cc6fda0223e4b6c4ea714550477d8ec3a/tree_sitter_typescript-0.23.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b1eed5b0b3a8134e86126b00b743d667ec27c63fc9de1b7bb23168803879e31", size = 351987, upload-time = "2024-11-11T02:36:02.669Z" }, + { url = "https://files.pythonhosted.org/packages/49/d1/a71c36da6e2b8a4ed5e2970819b86ef13ba77ac40d9e333cb17df6a2c5db/tree_sitter_typescript-0.23.2-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e96d36b85bcacdeb8ff5c2618d75593ef12ebaf1b4eace3477e2bdb2abb1752c", size = 344960, upload-time = "2024-11-11T02:36:04.443Z" }, + { url = "https://files.pythonhosted.org/packages/7f/cb/f57b149d7beed1a85b8266d0c60ebe4c46e79c9ba56bc17b898e17daf88e/tree_sitter_typescript-0.23.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:8d4f0f9bcb61ad7b7509d49a1565ff2cc363863644a234e1e0fe10960e55aea0", size = 340245, upload-time = "2024-11-11T02:36:06.473Z" }, + { url = "https://files.pythonhosted.org/packages/8b/ab/dd84f0e2337296a5f09749f7b5483215d75c8fa9e33738522e5ed81f7254/tree_sitter_typescript-0.23.2-cp39-abi3-win_amd64.whl", hash = "sha256:3f730b66396bc3e11811e4465c41ee45d9e9edd6de355a58bbbc49fa770da8f9", size = 278015, upload-time = "2024-11-11T02:36:07.631Z" }, + { url = "https://files.pythonhosted.org/packages/9f/e4/81f9a935789233cf412a0ed5fe04c883841d2c8fb0b7e075958a35c65032/tree_sitter_typescript-0.23.2-cp39-abi3-win_arm64.whl", hash = "sha256:05db58f70b95ef0ea126db5560f3775692f609589ed6f8dd0af84b7f19f1cbb7", size = 274052, upload-time = "2024-11-11T02:36:09.514Z" }, +] + [[package]] name = "triton" version = "3.4.0" @@ -9051,12 +9139,19 @@ resolution-markers = [ "(python_full_version == '3.10.*' and platform_machine != 'arm64' and sys_platform == 'darwin') or (python_full_version == '3.10.*' and platform_machine != 'x86_64' and sys_platform == 'linux') or (python_full_version == '3.10.*' and sys_platform != 'darwin' and sys_platform != 'linux')", ] wheels = [ + { url = "https://files.pythonhosted.org/packages/dd/22/507b6f58a35e05e84381630b2dc2a3cee1a7a2a7eaf4cba857c638a18a24/triton-3.5.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6f90de6a6566bb619b4c0adc9855729e1b1b5e26533fca1bf6206e96b6d277a3", size = 159827599, upload-time = "2025-10-15T19:15:43.87Z" }, { url = "https://files.pythonhosted.org/packages/0b/eb/09e31d107a5d00eb281aa7e6635ca463e9bca86515944e399480eadb71f8/triton-3.5.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d5d3b3d480debf24eaa739623c9a42446b0b77f95593d30eb1f64cd2278cc1f0", size = 170333110, upload-time = "2025-10-13T16:37:49.588Z" }, + { url = "https://files.pythonhosted.org/packages/79/f9/b6f60f978397c616fd8dacca2305759fe4f80d397b20ef72534803244bd5/triton-3.5.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8457b22148defefdcb7fa8144b05ce211b9faefad650a1ce85b23df488d5549c", size = 159926731, upload-time = "2025-10-15T19:15:49.682Z" }, { url = "https://files.pythonhosted.org/packages/3d/78/949a04391c21956c816523678f0e5fa308eb5b1e7622d88c4e4ef5fceca0/triton-3.5.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f34bfa21c5b3a203c0f0eab28dcc1e49bd1f67d22724e77fb6665a659200a4ec", size = 170433488, upload-time = "2025-10-13T16:37:57.132Z" }, + { url = "https://files.pythonhosted.org/packages/87/9b/30988039e1e84df7554fba24e6a734d2d0e847af33cabdf9b532b3c51456/triton-3.5.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7da21fccceafc163e3a5e857abe34351ef76345af06cabf9637a914742671f0b", size = 159946647, upload-time = "2025-10-15T19:15:56.325Z" }, { url = "https://files.pythonhosted.org/packages/f5/3a/e991574f3102147b642e49637e0281e9bb7c4ba254edb2bab78247c85e01/triton-3.5.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9e71db82261c4ffa3921cd050cd5faa18322d2d405c30eb56084afaff3b0833", size = 170476535, upload-time = "2025-10-13T16:38:05.18Z" }, + { url = "https://files.pythonhosted.org/packages/cd/85/e37f1197acb04c8f3d83851d23d5d6ed5060ef74580668b112e23fdfa203/triton-3.5.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:188da5b81fa2f8322c27fec1627703eac24cb9bb7ab0dfbe9925973bc1b070d3", size = 159958970, upload-time = "2025-10-15T19:16:01.717Z" }, { url = "https://files.pythonhosted.org/packages/6c/29/10728de8a6e932e517c10773486b8e99f85d1b1d9dd87d9a9616e1fef4a1/triton-3.5.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e6bb9aa5519c084a333acdba443789e50012a4b851cd486c54f0b8dc2a8d3a12", size = 170487289, upload-time = "2025-10-13T16:38:11.662Z" }, + { url = "https://files.pythonhosted.org/packages/b8/1d/38258f05010ac17a7b058c022911c9cae6526e149b7397134a048cf5a6c2/triton-3.5.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:03127d9b33aaf979c856676b394bc059ec1d68cb6da68ae03f62dd8ad77a04ae", size = 160073012, upload-time = "2025-10-15T19:16:07.477Z" }, { url = "https://files.pythonhosted.org/packages/5c/38/db80e48b9220c9bce872b0f616ad0446cdf554a40b85c7865cbca99ab3c2/triton-3.5.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c83f2343e1a220a716c7b3ab9fccfcbe3ad4020d189549200e2d2e8d5868bed9", size = 170577179, upload-time = "2025-10-13T16:38:17.865Z" }, + { url = "https://files.pythonhosted.org/packages/91/fe/8f5771d00227f4eb1ee034f218ed427102b989366d2275fe3b3c105a3921/triton-3.5.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:468936651d383f4a6d10068d34a627505e13af55be5d002b9f27b987e7a5f0ac", size = 159957460, upload-time = "2025-10-15T19:16:12.626Z" }, { url = "https://files.pythonhosted.org/packages/ff/60/1810655d1d856c9a4fcc90ee8966d85f552d98c53a6589f95ab2cbe27bb8/triton-3.5.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:da0fa67ccd76c3dcfb0bffe1b1c57c685136a6bd33d141c24d9655d4185b1289", size = 170487949, upload-time = "2025-10-13T16:38:24.881Z" }, + { url = "https://files.pythonhosted.org/packages/78/59/99edd103958fe6e42b50b9ad8ce4f223ddf4ccf475259cf7d2b53381dc6c/triton-3.5.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c7ceef21410229ac23173a28eee5cfc0e37c1dfdb8b4bc11ecda2e3ecec7c686", size = 160075629, upload-time = "2025-10-15T19:16:18.746Z" }, { url = "https://files.pythonhosted.org/packages/fb/b7/1dec8433ac604c061173d0589d99217fe7bf90a70bdc375e745d044b8aad/triton-3.5.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:317fe477ea8fd4524a6a8c499fb0a36984a56d0b75bf9c9cb6133a1c56d5a6e7", size = 170580176, upload-time = "2025-10-13T16:38:31.14Z" }, ]