Fix tests, upgrade XSLM example to a valid file

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer 2025-06-10 13:14:32 +02:00
parent 000e7aa1ca
commit 3c922b4105
13 changed files with 616 additions and 646 deletions

View File

@ -213,10 +213,10 @@
"prov": [ "prov": [
{ {
"bbox": [ "bbox": [
139.6674041748047, 139.66741943359375,
322.5054626464844, 322.5054626464844,
475.00927734375, 475.00927734375,
454.4546203613281 454.45458984375
], ],
"page": 1, "page": 1,
"span": [ "span": [

View File

@ -2646,7 +2646,7 @@
"b": 102.78223000000003, "b": 102.78223000000003,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9373533129692078, "confidence": 0.9373534917831421,
"cells": [ "cells": [
{ {
"index": 0, "index": 0,
@ -2686,7 +2686,7 @@
"b": 102.78223000000003, "b": 102.78223000000003,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.8858679533004761, "confidence": 0.8858680725097656,
"cells": [ "cells": [
{ {
"index": 1, "index": 1,
@ -2726,7 +2726,7 @@
"b": 152.90697999999998, "b": 152.90697999999998,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9806435108184814, "confidence": 0.9806433916091919,
"cells": [ "cells": [
{ {
"index": 2, "index": 2,
@ -2881,7 +2881,7 @@
"b": 255.42400999999995, "b": 255.42400999999995,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9850425124168396, "confidence": 0.98504239320755,
"cells": [ "cells": [
{ {
"index": 7, "index": 7,
@ -3096,7 +3096,7 @@
"b": 327.98218, "b": 327.98218,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9591907262802124, "confidence": 0.9591909050941467,
"cells": [ "cells": [
{ {
"index": 15, "index": 15,
@ -3280,8 +3280,8 @@
"id": 0, "id": 0,
"label": "table", "label": "table",
"bbox": { "bbox": {
"l": 139.6674041748047, "l": 139.66741943359375,
"t": 337.5453796386719, "t": 337.54541015625,
"r": 475.00927734375, "r": 475.00927734375,
"b": 469.4945373535156, "b": 469.4945373535156,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
@ -7787,7 +7787,7 @@
"b": 518.17419, "b": 518.17419,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9589295387268066, "confidence": 0.9589294195175171,
"cells": [ "cells": [
{ {
"index": 91, "index": 91,
@ -7852,7 +7852,7 @@
"b": 618.3, "b": 618.3,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9849976301193237, "confidence": 0.9849975109100342,
"cells": [ "cells": [
{ {
"index": 93, "index": 93,
@ -8184,8 +8184,8 @@
"id": 0, "id": 0,
"label": "table", "label": "table",
"bbox": { "bbox": {
"l": 139.6674041748047, "l": 139.66741943359375,
"t": 337.5453796386719, "t": 337.54541015625,
"r": 475.00927734375, "r": 475.00927734375,
"b": 469.4945373535156, "b": 469.4945373535156,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
@ -13582,7 +13582,7 @@
"b": 102.78223000000003, "b": 102.78223000000003,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9373533129692078, "confidence": 0.9373534917831421,
"cells": [ "cells": [
{ {
"index": 0, "index": 0,
@ -13628,7 +13628,7 @@
"b": 102.78223000000003, "b": 102.78223000000003,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.8858679533004761, "confidence": 0.8858680725097656,
"cells": [ "cells": [
{ {
"index": 1, "index": 1,
@ -13674,7 +13674,7 @@
"b": 152.90697999999998, "b": 152.90697999999998,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9806435108184814, "confidence": 0.9806433916091919,
"cells": [ "cells": [
{ {
"index": 2, "index": 2,
@ -13841,7 +13841,7 @@
"b": 255.42400999999995, "b": 255.42400999999995,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9850425124168396, "confidence": 0.98504239320755,
"cells": [ "cells": [
{ {
"index": 7, "index": 7,
@ -14062,7 +14062,7 @@
"b": 327.98218, "b": 327.98218,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9591907262802124, "confidence": 0.9591909050941467,
"cells": [ "cells": [
{ {
"index": 15, "index": 15,
@ -14252,8 +14252,8 @@
"id": 0, "id": 0,
"label": "table", "label": "table",
"bbox": { "bbox": {
"l": 139.6674041748047, "l": 139.66741943359375,
"t": 337.5453796386719, "t": 337.54541015625,
"r": 475.00927734375, "r": 475.00927734375,
"b": 469.4945373535156, "b": 469.4945373535156,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
@ -19642,7 +19642,7 @@
"b": 518.17419, "b": 518.17419,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9589295387268066, "confidence": 0.9589294195175171,
"cells": [ "cells": [
{ {
"index": 91, "index": 91,
@ -19713,7 +19713,7 @@
"b": 618.3, "b": 618.3,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9849976301193237, "confidence": 0.9849975109100342,
"cells": [ "cells": [
{ {
"index": 93, "index": 93,
@ -20057,7 +20057,7 @@
"b": 152.90697999999998, "b": 152.90697999999998,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9806435108184814, "confidence": 0.9806433916091919,
"cells": [ "cells": [
{ {
"index": 2, "index": 2,
@ -20224,7 +20224,7 @@
"b": 255.42400999999995, "b": 255.42400999999995,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9850425124168396, "confidence": 0.98504239320755,
"cells": [ "cells": [
{ {
"index": 7, "index": 7,
@ -20445,7 +20445,7 @@
"b": 327.98218, "b": 327.98218,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9591907262802124, "confidence": 0.9591909050941467,
"cells": [ "cells": [
{ {
"index": 15, "index": 15,
@ -20635,8 +20635,8 @@
"id": 0, "id": 0,
"label": "table", "label": "table",
"bbox": { "bbox": {
"l": 139.6674041748047, "l": 139.66741943359375,
"t": 337.5453796386719, "t": 337.54541015625,
"r": 475.00927734375, "r": 475.00927734375,
"b": 469.4945373535156, "b": 469.4945373535156,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
@ -26025,7 +26025,7 @@
"b": 518.17419, "b": 518.17419,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9589295387268066, "confidence": 0.9589294195175171,
"cells": [ "cells": [
{ {
"index": 91, "index": 91,
@ -26096,7 +26096,7 @@
"b": 618.3, "b": 618.3,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9849976301193237, "confidence": 0.9849975109100342,
"cells": [ "cells": [
{ {
"index": 93, "index": 93,
@ -26440,7 +26440,7 @@
"b": 102.78223000000003, "b": 102.78223000000003,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9373533129692078, "confidence": 0.9373534917831421,
"cells": [ "cells": [
{ {
"index": 0, "index": 0,
@ -26486,7 +26486,7 @@
"b": 102.78223000000003, "b": 102.78223000000003,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.8858679533004761, "confidence": 0.8858680725097656,
"cells": [ "cells": [
{ {
"index": 1, "index": 1,

View File

@ -336,8 +336,8 @@
{ {
"page_no": 1, "page_no": 1,
"bbox": { "bbox": {
"l": 139.6674041748047, "l": 139.66741943359375,
"t": 454.4546203613281, "t": 454.45458984375,
"r": 475.00927734375, "r": 475.00927734375,
"b": 322.5054626464844, "b": 322.5054626464844,
"coord_origin": "BOTTOMLEFT" "coord_origin": "BOTTOMLEFT"

View File

@ -2646,7 +2646,7 @@
"b": 102.78223000000003, "b": 102.78223000000003,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9373533129692078, "confidence": 0.9373534917831421,
"cells": [ "cells": [
{ {
"index": 0, "index": 0,
@ -2686,7 +2686,7 @@
"b": 102.78223000000003, "b": 102.78223000000003,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.8858679533004761, "confidence": 0.8858680725097656,
"cells": [ "cells": [
{ {
"index": 1, "index": 1,
@ -2726,7 +2726,7 @@
"b": 152.90697999999998, "b": 152.90697999999998,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9806435108184814, "confidence": 0.9806433916091919,
"cells": [ "cells": [
{ {
"index": 2, "index": 2,
@ -2881,7 +2881,7 @@
"b": 255.42400999999995, "b": 255.42400999999995,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9850425124168396, "confidence": 0.98504239320755,
"cells": [ "cells": [
{ {
"index": 7, "index": 7,
@ -3096,7 +3096,7 @@
"b": 327.98218, "b": 327.98218,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9591907262802124, "confidence": 0.9591909050941467,
"cells": [ "cells": [
{ {
"index": 15, "index": 15,
@ -3280,8 +3280,8 @@
"id": 0, "id": 0,
"label": "table", "label": "table",
"bbox": { "bbox": {
"l": 139.6674041748047, "l": 139.66741943359375,
"t": 337.5453796386719, "t": 337.54541015625,
"r": 475.00927734375, "r": 475.00927734375,
"b": 469.4945373535156, "b": 469.4945373535156,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
@ -7787,7 +7787,7 @@
"b": 518.17419, "b": 518.17419,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9589295387268066, "confidence": 0.9589294195175171,
"cells": [ "cells": [
{ {
"index": 91, "index": 91,
@ -7852,7 +7852,7 @@
"b": 618.3, "b": 618.3,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9849976301193237, "confidence": 0.9849975109100342,
"cells": [ "cells": [
{ {
"index": 93, "index": 93,
@ -8184,8 +8184,8 @@
"id": 0, "id": 0,
"label": "table", "label": "table",
"bbox": { "bbox": {
"l": 139.6674041748047, "l": 139.66741943359375,
"t": 337.5453796386719, "t": 337.54541015625,
"r": 475.00927734375, "r": 475.00927734375,
"b": 469.4945373535156, "b": 469.4945373535156,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
@ -13582,7 +13582,7 @@
"b": 102.78223000000003, "b": 102.78223000000003,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9373533129692078, "confidence": 0.9373534917831421,
"cells": [ "cells": [
{ {
"index": 0, "index": 0,
@ -13628,7 +13628,7 @@
"b": 102.78223000000003, "b": 102.78223000000003,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.8858679533004761, "confidence": 0.8858680725097656,
"cells": [ "cells": [
{ {
"index": 1, "index": 1,
@ -13674,7 +13674,7 @@
"b": 152.90697999999998, "b": 152.90697999999998,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9806435108184814, "confidence": 0.9806433916091919,
"cells": [ "cells": [
{ {
"index": 2, "index": 2,
@ -13841,7 +13841,7 @@
"b": 255.42400999999995, "b": 255.42400999999995,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9850425124168396, "confidence": 0.98504239320755,
"cells": [ "cells": [
{ {
"index": 7, "index": 7,
@ -14062,7 +14062,7 @@
"b": 327.98218, "b": 327.98218,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9591907262802124, "confidence": 0.9591909050941467,
"cells": [ "cells": [
{ {
"index": 15, "index": 15,
@ -14252,8 +14252,8 @@
"id": 0, "id": 0,
"label": "table", "label": "table",
"bbox": { "bbox": {
"l": 139.6674041748047, "l": 139.66741943359375,
"t": 337.5453796386719, "t": 337.54541015625,
"r": 475.00927734375, "r": 475.00927734375,
"b": 469.4945373535156, "b": 469.4945373535156,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
@ -19642,7 +19642,7 @@
"b": 518.17419, "b": 518.17419,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9589295387268066, "confidence": 0.9589294195175171,
"cells": [ "cells": [
{ {
"index": 91, "index": 91,
@ -19713,7 +19713,7 @@
"b": 618.3, "b": 618.3,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9849976301193237, "confidence": 0.9849975109100342,
"cells": [ "cells": [
{ {
"index": 93, "index": 93,
@ -20057,7 +20057,7 @@
"b": 152.90697999999998, "b": 152.90697999999998,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9806435108184814, "confidence": 0.9806433916091919,
"cells": [ "cells": [
{ {
"index": 2, "index": 2,
@ -20224,7 +20224,7 @@
"b": 255.42400999999995, "b": 255.42400999999995,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9850425124168396, "confidence": 0.98504239320755,
"cells": [ "cells": [
{ {
"index": 7, "index": 7,
@ -20445,7 +20445,7 @@
"b": 327.98218, "b": 327.98218,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9591907262802124, "confidence": 0.9591909050941467,
"cells": [ "cells": [
{ {
"index": 15, "index": 15,
@ -20635,8 +20635,8 @@
"id": 0, "id": 0,
"label": "table", "label": "table",
"bbox": { "bbox": {
"l": 139.6674041748047, "l": 139.66741943359375,
"t": 337.5453796386719, "t": 337.54541015625,
"r": 475.00927734375, "r": 475.00927734375,
"b": 469.4945373535156, "b": 469.4945373535156,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
@ -26025,7 +26025,7 @@
"b": 518.17419, "b": 518.17419,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9589295387268066, "confidence": 0.9589294195175171,
"cells": [ "cells": [
{ {
"index": 91, "index": 91,
@ -26096,7 +26096,7 @@
"b": 618.3, "b": 618.3,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9849976301193237, "confidence": 0.9849975109100342,
"cells": [ "cells": [
{ {
"index": 93, "index": 93,
@ -26440,7 +26440,7 @@
"b": 102.78223000000003, "b": 102.78223000000003,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9373533129692078, "confidence": 0.9373534917831421,
"cells": [ "cells": [
{ {
"index": 0, "index": 0,
@ -26486,7 +26486,7 @@
"b": 102.78223000000003, "b": 102.78223000000003,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.8858679533004761, "confidence": 0.8858680725097656,
"cells": [ "cells": [
{ {
"index": 1, "index": 1,

View File

@ -4,7 +4,7 @@
"name": "sample_sales_data", "name": "sample_sales_data",
"origin": { "origin": {
"mimetype": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "mimetype": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"binary_hash": 4984052357623711224, "binary_hash": 14806485565397602516,
"filename": "sample_sales_data.xlsm" "filename": "sample_sales_data.xlsm"
}, },
"furniture": { "furniture": {

View File

@ -5,92 +5,89 @@ item-0 at level 0: unspecified: group _root_
item-4 at level 1: section: group textbox item-4 at level 1: section: group textbox
item-5 at level 2: paragraph: Student falls ill item-5 at level 2: paragraph: Student falls ill
item-6 at level 2: paragraph: item-6 at level 2: paragraph:
item-7 at level 2: paragraph: item-7 at level 2: list: group list
item-8 at level 2: list: group list item-8 at level 3: list_item: Suggested Reportable Symptoms:
item-9 at level 3: list_item: Suggested Reportable Symptoms:
... sh ... sh
Blisters Blisters
Headache Headache
Sore throat Sore throat
item-10 at level 1: list_item: item-9 at level 1: list_item:
item-10 at level 1: paragraph:
item-11 at level 1: paragraph: item-11 at level 1: paragraph:
item-12 at level 1: paragraph: item-12 at level 1: section: group textbox
item-13 at level 1: section: group textbox item-13 at level 2: paragraph: If a caregiver suspects that wit ... the same suggested reportable symptoms
item-14 at level 2: paragraph: If a caregiver suspects that wit ... the same suggested reportable symptoms item-14 at level 1: paragraph:
item-15 at level 1: paragraph: item-15 at level 1: paragraph:
item-16 at level 1: paragraph: item-16 at level 1: paragraph:
item-17 at level 1: paragraph: item-17 at level 1: paragraph:
item-18 at level 1: paragraph: item-18 at level 1: section: group textbox
item-19 at level 1: section: group textbox item-19 at level 2: paragraph: Yes
item-20 at level 2: paragraph: Yes item-20 at level 1: paragraph:
item-21 at level 1: paragraph: item-21 at level 1: paragraph:
item-22 at level 1: paragraph: item-22 at level 1: section: group textbox
item-23 at level 1: section: group textbox item-23 at level 2: list: group list
item-24 at level 2: list: group list item-24 at level 3: list_item: A report must be submitted withi ... saster Prevention Information Network.
item-25 at level 3: list_item: A report must be submitted withi ... saster Prevention Information Network. item-25 at level 3: list_item: A report must also be submitted ... d Infectious Disease Reporting System.
item-26 at level 3: list_item: A report must also be submitted ... d Infectious Disease Reporting System. item-26 at level 2: paragraph:
item-27 at level 2: paragraph: item-27 at level 1: list: group list
item-28 at level 2: paragraph: item-28 at level 2: list_item:
item-29 at level 1: list: group list item-29 at level 1: paragraph:
item-30 at level 2: list_item: item-30 at level 1: paragraph:
item-31 at level 1: paragraph: item-31 at level 1: paragraph:
item-32 at level 1: paragraph: item-32 at level 1: paragraph:
item-33 at level 1: paragraph: item-33 at level 1: paragraph:
item-34 at level 1: paragraph: item-34 at level 1: section: group textbox
item-35 at level 1: paragraph: item-35 at level 2: paragraph: Health Bureau:
item-36 at level 1: section: group textbox item-36 at level 2: paragraph: Upon receiving a report from the ... rt to the Centers for Disease Control.
item-37 at level 2: paragraph: Health Bureau: item-37 at level 2: list: group list
item-38 at level 2: paragraph: Upon receiving a report from the ... rt to the Centers for Disease Control. item-38 at level 3: list_item: If necessary, provide health edu ... vidual to undergo specimen collection.
item-39 at level 2: list: group list item-39 at level 3: list_item: Implement appropriate epidemic p ... the Communicable Disease Control Act.
item-40 at level 3: list_item: If necessary, provide health edu ... vidual to undergo specimen collection. item-40 at level 2: paragraph:
item-41 at level 3: list_item: Implement appropriate epidemic p ... the Communicable Disease Control Act. item-41 at level 1: list: group list
item-42 at level 2: paragraph: item-42 at level 2: list_item:
item-43 at level 2: paragraph: item-43 at level 1: paragraph:
item-44 at level 1: list: group list item-44 at level 1: section: group textbox
item-45 at level 2: list_item: item-45 at level 2: paragraph: Department of Education:
item-46 at level 1: paragraph:
item-47 at level 1: section: group textbox
item-48 at level 2: paragraph: Department of Education:
Collabo ... vention measures at all school levels. Collabo ... vention measures at all school levels.
item-46 at level 1: paragraph:
item-47 at level 1: paragraph:
item-48 at level 1: paragraph:
item-49 at level 1: paragraph: item-49 at level 1: paragraph:
item-50 at level 1: paragraph: item-50 at level 1: paragraph:
item-51 at level 1: paragraph: item-51 at level 1: paragraph:
item-52 at level 1: paragraph: item-52 at level 1: paragraph:
item-53 at level 1: paragraph: item-53 at level 1: section: group textbox
item-54 at level 1: paragraph: item-54 at level 2: inline: group group
item-55 at level 1: paragraph: item-55 at level 3: paragraph: The Health Bureau will handle
item-56 at level 1: section: group textbox item-56 at level 3: paragraph: reporting and specimen collection
item-57 at level 2: inline: group group item-57 at level 3: paragraph: .
item-58 at level 3: paragraph: The Health Bureau will handle item-58 at level 2: paragraph:
item-59 at level 3: paragraph: reporting and specimen collection item-59 at level 1: paragraph:
item-60 at level 3: paragraph: . item-60 at level 1: paragraph:
item-61 at level 2: paragraph: item-61 at level 1: paragraph:
item-62 at level 2: paragraph: item-62 at level 1: section: group textbox
item-63 at level 1: paragraph: item-63 at level 2: paragraph: Whether the epidemic has eased.
item-64 at level 1: paragraph: item-64 at level 2: paragraph:
item-65 at level 1: paragraph: item-65 at level 1: paragraph:
item-66 at level 1: section: group textbox item-66 at level 1: section: group textbox
item-67 at level 2: paragraph: Whether the epidemic has eased. item-67 at level 2: paragraph: Whether the test results are pos ... legally designated infectious disease.
item-68 at level 2: paragraph: item-68 at level 2: paragraph: No
item-69 at level 2: paragraph: item-69 at level 1: paragraph:
item-70 at level 1: paragraph: item-70 at level 1: paragraph:
item-71 at level 1: section: group textbox item-71 at level 1: section: group textbox
item-72 at level 2: paragraph: Whether the test results are pos ... legally designated infectious disease. item-72 at level 2: paragraph: Yes
item-73 at level 2: paragraph: No item-73 at level 1: paragraph:
item-74 at level 1: paragraph: item-74 at level 1: section: group textbox
item-75 at level 1: paragraph: item-75 at level 2: paragraph: Yes
item-76 at level 1: section: group textbox item-76 at level 1: paragraph:
item-77 at level 1: paragraph: item-77 at level 1: paragraph:
item-78 at level 1: section: group textbox item-78 at level 1: section: group textbox
item-79 at level 1: paragraph: item-79 at level 2: paragraph: Case closed.
item-80 at level 1: paragraph: item-80 at level 2: paragraph:
item-81 at level 1: section: group textbox item-81 at level 2: paragraph: The Health Bureau will carry out ... ters for Disease Control if necessary.
item-82 at level 2: paragraph: Case closed. item-82 at level 1: paragraph:
item-83 at level 2: paragraph: item-83 at level 1: section: group textbox
item-84 at level 2: paragraph: item-84 at level 2: paragraph: No
item-85 at level 2: paragraph: The Health Bureau will carry out ... ters for Disease Control if necessary. item-85 at level 1: paragraph:
item-86 at level 1: paragraph: item-86 at level 1: paragraph:
item-87 at level 1: section: group textbox item-87 at level 1: paragraph:
item-88 at level 1: paragraph:
item-89 at level 1: paragraph:
item-90 at level 1: paragraph:

File diff suppressed because it is too large Load Diff

View File

@ -40,6 +40,12 @@ The Health Bureau will handle **reporting and specimen collection** .
No No
Yes
Yes
**Case closed.** **Case closed.**
The Health Bureau will carry out subsequent related epidemic prevention measures and follow-up, and will request assistance from the Centers for Disease Control if necessary. The Health Bureau will carry out subsequent related epidemic prevention measures and follow-up, and will request assistance from the Centers for Disease Control if necessary.
No

View File

@ -1,2 +1,2 @@
<doctag><text><loc_60><loc_46><loc_424><loc_91>Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package</text> <doctag><text><loc_59><loc_46><loc_424><loc_90>Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package</text>
</doctag> </doctag>

View File

@ -42,10 +42,10 @@
{ {
"page_no": 1, "page_no": 1,
"bbox": { "bbox": {
"l": 238.19302423176944, "l": 234.08627147881114,
"t": 2570.0959833241664, "t": 2570.0959833241664,
"r": 1696.0985546594009, "r": 1696.0985042090742,
"b": 2315.204273887442, "b": 2319.1220927976665,
"coord_origin": "BOTTOMLEFT" "coord_origin": "BOTTOMLEFT"
}, },
"charspan": [ "charspan": [

View File

@ -40,14 +40,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 238.19302423176944, "r_x0": 234.08627147881114,
"r_y0": 415.36904822716525, "r_y0": 419.5788697734327,
"r_x1": 1696.0985546594009, "r_x1": 1696.0985042090742,
"r_y1": 415.36904822716525, "r_y1": 419.5788697734327,
"r_x2": 1696.0985546594009, "r_x2": 1696.0985042090742,
"r_y2": 345.20535775097477, "r_y2": 349.4151792972422,
"r_x3": 238.19302423176944, "r_x3": 234.08627147881114,
"r_y3": 345.20535775097477, "r_y3": 349.4151792972422,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",
@ -65,14 +65,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 245.43122061153045, "r_x0": 242.29979922858777,
"r_y0": 513.795726112558, "r_y0": 509.8779072023336,
"r_x1": 514.3223724413002, "r_x1": 513.3470125989277,
"r_y1": 513.795726112558, "r_y1": 509.8779072023336,
"r_x2": 514.3223724413002, "r_x2": 513.3470125989277,
"r_y2": 436.0574704074058, "r_y2": 439.9752910477536,
"r_x3": 245.43122061153045, "r_x3": 242.29979922858777,
"r_y3": 436.0574704074058, "r_y3": 439.9752910477536,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "package", "text": "package",
@ -90,13 +90,13 @@
"id": 0, "id": 0,
"label": "text", "label": "text",
"bbox": { "bbox": {
"l": 238.19302423176944, "l": 234.08627147881114,
"t": 258.9040166758338, "t": 258.9040166758338,
"r": 1696.0985546594009, "r": 1696.0985042090742,
"b": 513.795726112558, "b": 509.8779072023336,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9721010327339172, "confidence": 0.9721011519432068,
"cells": [ "cells": [
{ {
"index": 0, "index": 0,
@ -132,14 +132,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 238.19302423176944, "r_x0": 234.08627147881114,
"r_y0": 415.36904822716525, "r_y0": 419.5788697734327,
"r_x1": 1696.0985546594009, "r_x1": 1696.0985042090742,
"r_y1": 415.36904822716525, "r_y1": 419.5788697734327,
"r_x2": 1696.0985546594009, "r_x2": 1696.0985042090742,
"r_y2": 345.20535775097477, "r_y2": 349.4151792972422,
"r_x3": 238.19302423176944, "r_x3": 234.08627147881114,
"r_y3": 345.20535775097477, "r_y3": 349.4151792972422,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",
@ -157,14 +157,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 245.43122061153045, "r_x0": 242.29979922858777,
"r_y0": 513.795726112558, "r_y0": 509.8779072023336,
"r_x1": 514.3223724413002, "r_x1": 513.3470125989277,
"r_y1": 513.795726112558, "r_y1": 509.8779072023336,
"r_x2": 514.3223724413002, "r_x2": 513.3470125989277,
"r_y2": 436.0574704074058, "r_y2": 439.9752910477536,
"r_x3": 245.43122061153045, "r_x3": 242.29979922858777,
"r_y3": 436.0574704074058, "r_y3": 439.9752910477536,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "package", "text": "package",
@ -195,13 +195,13 @@
"id": 0, "id": 0,
"label": "text", "label": "text",
"bbox": { "bbox": {
"l": 238.19302423176944, "l": 234.08627147881114,
"t": 258.9040166758338, "t": 258.9040166758338,
"r": 1696.0985546594009, "r": 1696.0985042090742,
"b": 513.795726112558, "b": 509.8779072023336,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9721010327339172, "confidence": 0.9721011519432068,
"cells": [ "cells": [
{ {
"index": 0, "index": 0,
@ -237,14 +237,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 238.19302423176944, "r_x0": 234.08627147881114,
"r_y0": 415.36904822716525, "r_y0": 419.5788697734327,
"r_x1": 1696.0985546594009, "r_x1": 1696.0985042090742,
"r_y1": 415.36904822716525, "r_y1": 419.5788697734327,
"r_x2": 1696.0985546594009, "r_x2": 1696.0985042090742,
"r_y2": 345.20535775097477, "r_y2": 349.4151792972422,
"r_x3": 238.19302423176944, "r_x3": 234.08627147881114,
"r_y3": 345.20535775097477, "r_y3": 349.4151792972422,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",
@ -262,14 +262,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 245.43122061153045, "r_x0": 242.29979922858777,
"r_y0": 513.795726112558, "r_y0": 509.8779072023336,
"r_x1": 514.3223724413002, "r_x1": 513.3470125989277,
"r_y1": 513.795726112558, "r_y1": 509.8779072023336,
"r_x2": 514.3223724413002, "r_x2": 513.3470125989277,
"r_y2": 436.0574704074058, "r_y2": 439.9752910477536,
"r_x3": 245.43122061153045, "r_x3": 242.29979922858777,
"r_y3": 436.0574704074058, "r_y3": 439.9752910477536,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "package", "text": "package",
@ -293,13 +293,13 @@
"id": 0, "id": 0,
"label": "text", "label": "text",
"bbox": { "bbox": {
"l": 238.19302423176944, "l": 234.08627147881114,
"t": 258.9040166758338, "t": 258.9040166758338,
"r": 1696.0985546594009, "r": 1696.0985042090742,
"b": 513.795726112558, "b": 509.8779072023336,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9721010327339172, "confidence": 0.9721011519432068,
"cells": [ "cells": [
{ {
"index": 0, "index": 0,
@ -335,14 +335,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 238.19302423176944, "r_x0": 234.08627147881114,
"r_y0": 415.36904822716525, "r_y0": 419.5788697734327,
"r_x1": 1696.0985546594009, "r_x1": 1696.0985042090742,
"r_y1": 415.36904822716525, "r_y1": 419.5788697734327,
"r_x2": 1696.0985546594009, "r_x2": 1696.0985042090742,
"r_y2": 345.20535775097477, "r_y2": 349.4151792972422,
"r_x3": 238.19302423176944, "r_x3": 234.08627147881114,
"r_y3": 345.20535775097477, "r_y3": 349.4151792972422,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",
@ -360,14 +360,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 245.43122061153045, "r_x0": 242.29979922858777,
"r_y0": 513.795726112558, "r_y0": 509.8779072023336,
"r_x1": 514.3223724413002, "r_x1": 513.3470125989277,
"r_y1": 513.795726112558, "r_y1": 509.8779072023336,
"r_x2": 514.3223724413002, "r_x2": 513.3470125989277,
"r_y2": 436.0574704074058, "r_y2": 439.9752910477536,
"r_x3": 245.43122061153045, "r_x3": 242.29979922858777,
"r_y3": 436.0574704074058, "r_y3": 439.9752910477536,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "package", "text": "package",

Binary file not shown.

View File

@ -79,30 +79,21 @@ def test_pages(documents) -> None:
documents: The paths and converted documents. documents: The paths and converted documents.
""" """
# number of pages from the backend method # number of pages from the backend method
# Logic to handle multiple files path = next(item for item in get_excel_paths() if item.stem == "test-01")
file_stems = ["sample_sales_data","test-01"] in_doc = InputDocument(
for stem in file_stems: path_or_stream=path,
path = next(item for item in get_excel_paths() if item.stem == stem) format=InputFormat.XLSX,
in_doc = InputDocument( filename=path.stem,
path_or_stream=path, backend=MsExcelDocumentBackend,
format=InputFormat.XLSX, )
filename=path.stem, backend = MsExcelDocumentBackend(in_doc=in_doc, path_or_stream=path)
backend=MsExcelDocumentBackend, assert backend.page_count() == 3
)
backend = MsExcelDocumentBackend(in_doc=in_doc, path_or_stream=path)
# Update the expected page count based on actual content
expected_page_count = 3 # Adjust this value based on the actual number of worksheets this needs to be adjusted for each xlsm and xlsx files independently
assert backend.page_count() == expected_page_count
# number of pages from the converted document # number of pages from the converted document
doc = next(item for path, item in documents if path.stem == stem) doc = next(item for path, item in documents if path.stem == "test-01")
assert len(doc.pages) == 3 assert len(doc.pages) == 3
# page sizes as number of cells # page sizes as number of cells
assert doc.pages.get(1).size.as_tuple() == (3.0, 7.0)
# for xlsm file just adjust this wrt the xlsm files for test xlsm enable this: assert doc.pages.get(2).size.as_tuple() == (9.0, 18.0)
#assert doc.pages.get(1).size.as_tuple() == (4.0, 21.0) assert doc.pages.get(3).size.as_tuple() == (13.0, 36.0)
# for xlsx file:
assert doc.pages.get(1).size.as_tuple() == (3.0, 7.0)
assert doc.pages.get(2).size.as_tuple() == (9.0, 18.0)
assert doc.pages.get(3).size.as_tuple() == (13.0, 36.0)