mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 20:58:11 +00:00
feat: support xlsm files (#1520)
* code for xlsm support * updated support for xlsm * updated code for xlsm support * Update docling_parse_v4_backend.py Signed-off-by: ShiroYasha18 <85089952+ShiroYasha18@users.noreply.github.com> * Update docling_parse_v4_backend.py Signed-off-by: ShiroYasha18 <85089952+ShiroYasha18@users.noreply.github.com> * Update test_backend_msexcel_xlsm.py updated the tests/test_backend_msexcel_xlsm.py: have a function starting with test removed all print statements ** To add an explicit assert {test}=={pred} Signed-off-by: ShiroYasha18 <85089952+ShiroYasha18@users.noreply.github.com> * Update base_models.py Signed-off-by: ShiroYasha18 <85089952+ShiroYasha18@users.noreply.github.com> * Update test_backend_msexcel.py Signed-off-by: ShiroYasha18 <85089952+ShiroYasha18@users.noreply.github.com> * Update test_backend_msexcel_xlsm.py Signed-off-by: ShiroYasha18 <85089952+ShiroYasha18@users.noreply.github.com> * Update document_converter.py Signed-off-by: ShiroYasha18 <85089952+ShiroYasha18@users.noreply.github.com> * Delete tests/test_backend_msexcel_xlsm.py Signed-off-by: ShiroYasha18 <85089952+ShiroYasha18@users.noreply.github.com> * xlsm file Signed-off-by: ShiroYasha18 <85089952+ShiroYasha18@users.noreply.github.com> * run tests * ran tests * Fix tests, upgrade XSLM example to a valid file Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: ShiroYasha18 <85089952+ShiroYasha18@users.noreply.github.com> Signed-off-by: Christoph Auer <cau@zurich.ibm.com> Co-authored-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
@@ -1,2 +1,2 @@
|
||||
<doctag><text><loc_60><loc_46><loc_424><loc_91>Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package</text>
|
||||
<doctag><text><loc_59><loc_46><loc_424><loc_90>Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package</text>
|
||||
</doctag>
|
||||
@@ -42,10 +42,10 @@
|
||||
{
|
||||
"page_no": 1,
|
||||
"bbox": {
|
||||
"l": 238.19302423176944,
|
||||
"l": 234.08627147881114,
|
||||
"t": 2570.0959833241664,
|
||||
"r": 1696.0985546594009,
|
||||
"b": 2315.204273887442,
|
||||
"r": 1696.0985042090742,
|
||||
"b": 2319.1220927976665,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
},
|
||||
"charspan": [
|
||||
|
||||
@@ -40,14 +40,14 @@
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 238.19302423176944,
|
||||
"r_y0": 415.36904822716525,
|
||||
"r_x1": 1696.0985546594009,
|
||||
"r_y1": 415.36904822716525,
|
||||
"r_x2": 1696.0985546594009,
|
||||
"r_y2": 345.20535775097477,
|
||||
"r_x3": 238.19302423176944,
|
||||
"r_y3": 345.20535775097477,
|
||||
"r_x0": 234.08627147881114,
|
||||
"r_y0": 419.5788697734327,
|
||||
"r_x1": 1696.0985042090742,
|
||||
"r_y1": 419.5788697734327,
|
||||
"r_x2": 1696.0985042090742,
|
||||
"r_y2": 349.4151792972422,
|
||||
"r_x3": 234.08627147881114,
|
||||
"r_y3": 349.4151792972422,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
@@ -65,14 +65,14 @@
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 245.43122061153045,
|
||||
"r_y0": 513.795726112558,
|
||||
"r_x1": 514.3223724413002,
|
||||
"r_y1": 513.795726112558,
|
||||
"r_x2": 514.3223724413002,
|
||||
"r_y2": 436.0574704074058,
|
||||
"r_x3": 245.43122061153045,
|
||||
"r_y3": 436.0574704074058,
|
||||
"r_x0": 242.29979922858777,
|
||||
"r_y0": 509.8779072023336,
|
||||
"r_x1": 513.3470125989277,
|
||||
"r_y1": 509.8779072023336,
|
||||
"r_x2": 513.3470125989277,
|
||||
"r_y2": 439.9752910477536,
|
||||
"r_x3": 242.29979922858777,
|
||||
"r_y3": 439.9752910477536,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
@@ -90,13 +90,13 @@
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 238.19302423176944,
|
||||
"l": 234.08627147881114,
|
||||
"t": 258.9040166758338,
|
||||
"r": 1696.0985546594009,
|
||||
"b": 513.795726112558,
|
||||
"r": 1696.0985042090742,
|
||||
"b": 509.8779072023336,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9721010327339172,
|
||||
"confidence": 0.9721011519432068,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
@@ -132,14 +132,14 @@
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 238.19302423176944,
|
||||
"r_y0": 415.36904822716525,
|
||||
"r_x1": 1696.0985546594009,
|
||||
"r_y1": 415.36904822716525,
|
||||
"r_x2": 1696.0985546594009,
|
||||
"r_y2": 345.20535775097477,
|
||||
"r_x3": 238.19302423176944,
|
||||
"r_y3": 345.20535775097477,
|
||||
"r_x0": 234.08627147881114,
|
||||
"r_y0": 419.5788697734327,
|
||||
"r_x1": 1696.0985042090742,
|
||||
"r_y1": 419.5788697734327,
|
||||
"r_x2": 1696.0985042090742,
|
||||
"r_y2": 349.4151792972422,
|
||||
"r_x3": 234.08627147881114,
|
||||
"r_y3": 349.4151792972422,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
@@ -157,14 +157,14 @@
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 245.43122061153045,
|
||||
"r_y0": 513.795726112558,
|
||||
"r_x1": 514.3223724413002,
|
||||
"r_y1": 513.795726112558,
|
||||
"r_x2": 514.3223724413002,
|
||||
"r_y2": 436.0574704074058,
|
||||
"r_x3": 245.43122061153045,
|
||||
"r_y3": 436.0574704074058,
|
||||
"r_x0": 242.29979922858777,
|
||||
"r_y0": 509.8779072023336,
|
||||
"r_x1": 513.3470125989277,
|
||||
"r_y1": 509.8779072023336,
|
||||
"r_x2": 513.3470125989277,
|
||||
"r_y2": 439.9752910477536,
|
||||
"r_x3": 242.29979922858777,
|
||||
"r_y3": 439.9752910477536,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
@@ -195,13 +195,13 @@
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 238.19302423176944,
|
||||
"l": 234.08627147881114,
|
||||
"t": 258.9040166758338,
|
||||
"r": 1696.0985546594009,
|
||||
"b": 513.795726112558,
|
||||
"r": 1696.0985042090742,
|
||||
"b": 509.8779072023336,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9721010327339172,
|
||||
"confidence": 0.9721011519432068,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
@@ -237,14 +237,14 @@
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 238.19302423176944,
|
||||
"r_y0": 415.36904822716525,
|
||||
"r_x1": 1696.0985546594009,
|
||||
"r_y1": 415.36904822716525,
|
||||
"r_x2": 1696.0985546594009,
|
||||
"r_y2": 345.20535775097477,
|
||||
"r_x3": 238.19302423176944,
|
||||
"r_y3": 345.20535775097477,
|
||||
"r_x0": 234.08627147881114,
|
||||
"r_y0": 419.5788697734327,
|
||||
"r_x1": 1696.0985042090742,
|
||||
"r_y1": 419.5788697734327,
|
||||
"r_x2": 1696.0985042090742,
|
||||
"r_y2": 349.4151792972422,
|
||||
"r_x3": 234.08627147881114,
|
||||
"r_y3": 349.4151792972422,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
@@ -262,14 +262,14 @@
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 245.43122061153045,
|
||||
"r_y0": 513.795726112558,
|
||||
"r_x1": 514.3223724413002,
|
||||
"r_y1": 513.795726112558,
|
||||
"r_x2": 514.3223724413002,
|
||||
"r_y2": 436.0574704074058,
|
||||
"r_x3": 245.43122061153045,
|
||||
"r_y3": 436.0574704074058,
|
||||
"r_x0": 242.29979922858777,
|
||||
"r_y0": 509.8779072023336,
|
||||
"r_x1": 513.3470125989277,
|
||||
"r_y1": 509.8779072023336,
|
||||
"r_x2": 513.3470125989277,
|
||||
"r_y2": 439.9752910477536,
|
||||
"r_x3": 242.29979922858777,
|
||||
"r_y3": 439.9752910477536,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
@@ -293,13 +293,13 @@
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 238.19302423176944,
|
||||
"l": 234.08627147881114,
|
||||
"t": 258.9040166758338,
|
||||
"r": 1696.0985546594009,
|
||||
"b": 513.795726112558,
|
||||
"r": 1696.0985042090742,
|
||||
"b": 509.8779072023336,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9721010327339172,
|
||||
"confidence": 0.9721011519432068,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
@@ -335,14 +335,14 @@
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 238.19302423176944,
|
||||
"r_y0": 415.36904822716525,
|
||||
"r_x1": 1696.0985546594009,
|
||||
"r_y1": 415.36904822716525,
|
||||
"r_x2": 1696.0985546594009,
|
||||
"r_y2": 345.20535775097477,
|
||||
"r_x3": 238.19302423176944,
|
||||
"r_y3": 345.20535775097477,
|
||||
"r_x0": 234.08627147881114,
|
||||
"r_y0": 419.5788697734327,
|
||||
"r_x1": 1696.0985042090742,
|
||||
"r_y1": 419.5788697734327,
|
||||
"r_x2": 1696.0985042090742,
|
||||
"r_y2": 349.4151792972422,
|
||||
"r_x3": 234.08627147881114,
|
||||
"r_y3": 349.4151792972422,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
@@ -360,14 +360,14 @@
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 245.43122061153045,
|
||||
"r_y0": 513.795726112558,
|
||||
"r_x1": 514.3223724413002,
|
||||
"r_y1": 513.795726112558,
|
||||
"r_x2": 514.3223724413002,
|
||||
"r_y2": 436.0574704074058,
|
||||
"r_x3": 245.43122061153045,
|
||||
"r_y3": 436.0574704074058,
|
||||
"r_x0": 242.29979922858777,
|
||||
"r_y0": 509.8779072023336,
|
||||
"r_x1": 513.3470125989277,
|
||||
"r_y1": 509.8779072023336,
|
||||
"r_x2": 513.3470125989277,
|
||||
"r_y2": 439.9752910477536,
|
||||
"r_x3": 242.29979922858777,
|
||||
"r_y3": 439.9752910477536,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
|
||||
Reference in New Issue
Block a user