mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 20:14:47 +00:00
Merge remote-tracking branch 'origin/main' into feat-add-table-exports
This commit is contained in:
commit
132daedefd
@ -1,3 +1,9 @@
|
|||||||
|
## [v1.12.2](https://github.com/DS4SD/docling/releases/tag/v1.12.2) - 2024-09-17
|
||||||
|
|
||||||
|
### Fix
|
||||||
|
|
||||||
|
* **tests:** Adjust the test data to match the new version of LayoutPredictor ([#82](https://github.com/DS4SD/docling/issues/82)) ([`fa9699f`](https://github.com/DS4SD/docling/commit/fa9699fa3cd2d367382d7b952d0365983a870848))
|
||||||
|
|
||||||
## [v1.12.1](https://github.com/DS4SD/docling/releases/tag/v1.12.1) - 2024-09-16
|
## [v1.12.1](https://github.com/DS4SD/docling/releases/tag/v1.12.1) - 2024-09-16
|
||||||
|
|
||||||
### Fix
|
### Fix
|
||||||
|
737
poetry.lock
generated
737
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
|||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "docling"
|
name = "docling"
|
||||||
version = "1.12.1" # DO NOT EDIT, updated automatically
|
version = "1.12.2" # DO NOT EDIT, updated automatically
|
||||||
description = "Docling PDF conversion package"
|
description = "Docling PDF conversion package"
|
||||||
authors = ["Christoph Auer <cau@zurich.ibm.com>", "Michele Dolfi <dol@zurich.ibm.com>", "Maxim Lysak <mly@zurich.ibm.com>", "Nikos Livathinos <nli@zurich.ibm.com>", "Ahmed Nassar <ahn@zurich.ibm.com>", "Peter Staar <taa@zurich.ibm.com>"]
|
authors = ["Christoph Auer <cau@zurich.ibm.com>", "Michele Dolfi <dol@zurich.ibm.com>", "Maxim Lysak <mly@zurich.ibm.com>", "Nikos Livathinos <nli@zurich.ibm.com>", "Ahmed Nassar <ahn@zurich.ibm.com>", "Peter Staar <taa@zurich.ibm.com>"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
@ -23,9 +23,9 @@ packages = [{include = "docling"}]
|
|||||||
[tool.poetry.dependencies]
|
[tool.poetry.dependencies]
|
||||||
python = "^3.10"
|
python = "^3.10"
|
||||||
pydantic = "^2.0.0"
|
pydantic = "^2.0.0"
|
||||||
docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "feat-table-exports"}
|
docling-core = "^1.3.0"
|
||||||
docling-ibm-models = "^1.1.7"
|
docling-ibm-models = "^1.2.0"
|
||||||
deepsearch-glm = "^0.21.0"
|
deepsearch-glm = "^0.21.1"
|
||||||
filetype = "^1.2.0"
|
filetype = "^1.2.0"
|
||||||
pypdfium2 = "^4.30.0"
|
pypdfium2 = "^4.30.0"
|
||||||
pydantic-settings = "^2.3.0"
|
pydantic-settings = "^2.3.0"
|
||||||
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -1776,6 +1776,10 @@ An important design and implementation consideration is the fact that RCAC colum
|
|||||||
|
|
||||||
An example of this situation is shown in Figure 6-1. However, note that aggregate functions (a form of grouping) are based on masked values.
|
An example of this situation is shown in Figure 6-1. However, note that aggregate functions (a form of grouping) are based on masked values.
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
|
||||||
|
FROM GROUP BY ORDER BY
|
||||||
|
|
||||||
## Without RCAC Masking
|
## Without RCAC Masking
|
||||||
|
|
||||||
## With RCAC Masking
|
## With RCAC Masking
|
||||||
@ -1808,6 +1812,12 @@ Figure 6-1 Timing of column masking
|
|||||||
| **** **** **** 1234 | 750.33 |
|
| **** **** **** 1234 | 750.33 |
|
||||||
| **** **** **** 0001 | 10.00 |
|
| **** **** **** 0001 | 10.00 |
|
||||||
|
|
||||||
|
CREDIT_CARD_NUMBER, SUM(AMOUNT) AS TOTAL TRANSACTIONS
|
||||||
|
|
||||||
|
CREDIT_CARD_NUMBER
|
||||||
|
|
||||||
|
CREDIT_CARD_NUMBER;
|
||||||
|
|
||||||
Conversely, field procedure masking causes the column values to be changed (that is, masked) and stored in the row. When the table is queried and the masked columns are referenced, the masked data is used for any local selection, joining, grouping, or ordering operations. This situation can have a profound effect on the query's final result set and not just on the column values that are returned. Field procedure masking occurs when the column values are read from disk before any query processing. RCAC masking occurs when the column values are returned to the application after query processing. This difference in behavior is shown in Figure 6-2.
|
Conversely, field procedure masking causes the column values to be changed (that is, masked) and stored in the row. When the table is queried and the masked columns are referenced, the masked data is used for any local selection, joining, grouping, or ordering operations. This situation can have a profound effect on the query's final result set and not just on the column values that are returned. Field procedure masking occurs when the column values are read from disk before any query processing. RCAC masking occurs when the column values are returned to the application after query processing. This difference in behavior is shown in Figure 6-2.
|
||||||
|
|
||||||
Note: Column masks can influence an SQL INSERT or UPDATE . For example, you cannot insert or update a table with column access control activated with masked data generated from an expression within the same statement that is based on a column with a column mask.
|
Note: Column masks can influence an SQL INSERT or UPDATE . For example, you cannot insert or update a table with column access control activated with masked data generated from an expression within the same statement that is based on a column with a column mask.
|
||||||
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -96,10 +96,17 @@ def verify_tables(doc_pred: DsDocument, doc_true: DsDocument):
|
|||||||
for i, row in enumerate(true_item.data):
|
for i, row in enumerate(true_item.data):
|
||||||
for j, col in enumerate(true_item.data[i]):
|
for j, col in enumerate(true_item.data[i]):
|
||||||
|
|
||||||
|
# print("true: ", true_item.data[i][j])
|
||||||
|
# print("pred: ", pred_item.data[i][j])
|
||||||
|
|
||||||
assert (
|
assert (
|
||||||
true_item.data[i][j].text == pred_item.data[i][j].text
|
true_item.data[i][j].text == pred_item.data[i][j].text
|
||||||
), "table-cell does not have the same text"
|
), "table-cell does not have the same text"
|
||||||
|
|
||||||
|
assert (
|
||||||
|
true_item.data[i][j].obj_type == pred_item.data[i][j].obj_type
|
||||||
|
), "table-cell does not have the same type"
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
@ -156,9 +163,13 @@ def verify_conversion_result(
|
|||||||
), f"Mismatch in PDF cell prediction for {input_path}"
|
), f"Mismatch in PDF cell prediction for {input_path}"
|
||||||
|
|
||||||
# assert verify_output(
|
# assert verify_output(
|
||||||
# doc_pred, doc_true
|
# doc_pred, doc_true
|
||||||
# ), f"Mismatch in JSON prediction for {input_path}"
|
# ), f"Mismatch in JSON prediction for {input_path}"
|
||||||
|
|
||||||
|
assert verify_tables(
|
||||||
|
doc_pred, doc_true
|
||||||
|
), f"verify_tables(doc_pred, doc_true) mismatch for {input_path}"
|
||||||
|
|
||||||
assert verify_md(
|
assert verify_md(
|
||||||
doc_pred_md, doc_true_md
|
doc_pred_md, doc_true_md
|
||||||
), f"Mismatch in Markdown prediction for {input_path}"
|
), f"Mismatch in Markdown prediction for {input_path}"
|
||||||
|
Loading…
Reference in New Issue
Block a user