From 0ee690b5af547836b4d743de59a641327a371658 Mon Sep 17 00:00:00 2001 From: Nikos Livathinos Date: Thu, 9 Jan 2025 14:59:39 +0100 Subject: [PATCH] fix: WIP to fix the glm_utils.to_docling_document() and add a unit test Signed-off-by: Nikos Livathinos --- docling/utils/glm_utils.py | 2 +- tests/data/utils/01030000000016.json | 1 + tests/data/utils/01030000000016.pdf | Bin 0 -> 40042 bytes tests/test_glm_utils.py | 86 +++++++++++++++++++++++++++ 4 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 tests/data/utils/01030000000016.json create mode 100644 tests/data/utils/01030000000016.pdf create mode 100644 tests/test_glm_utils.py diff --git a/docling/utils/glm_utils.py b/docling/utils/glm_utils.py index 1c3b3f67..3752dd4c 100644 --- a/docling/utils/glm_utils.py +++ b/docling/utils/glm_utils.py @@ -165,7 +165,7 @@ def to_docling_document(doc_glm, update_name_label=False) -> DoclingDocument: pic.captions.extend(caption_refs) _add_child_elements(pic, doc, obj, pelem) - elif ptype == "table": + elif ptype in ["table", "table-of-contents"]: current_list = None text = "" caption_refs = [] diff --git a/tests/data/utils/01030000000016.json b/tests/data/utils/01030000000016.json new file mode 100644 index 00000000..0a134fe9 --- /dev/null +++ b/tests/data/utils/01030000000016.json @@ -0,0 +1 @@ +{"_s3_data": {}, "applied_models": [], "body": [{"$ref": "#/texts/0"}, {"$ref": "#/texts/1"}], "conversion_settings": {}, "description": {"logs": []}, "dloc": "34c43ca2f99dc95e789311626e8e2b3a5afea659112a742b0ca9e0b80a824c63#", "figures": [], "file-info": {"#-pages": 1, "document-hash": "34c43ca2f99dc95e789311626e8e2b3a5afea659112a742b0ca9e0b80a824c63", "filename": "01030000000016.pdf", "page-hashes": [{"hash": "bd1d3243c5eb9572db6f790807ca26d917289b839e3b0f12bc4a8a7e99bbc895", "model": "default", "page": 1}]}, "footnotes": [], "meta": [{"$ref": "#/other/0"}, {"$ref": "#/page-footers/0"}], "model-application": {"message": "success", "success": true}, "other": [{"dloc": "34c43ca2f99dc95e789311626e8e2b3a5afea659112a742b0ca9e0b80a824c63#/meta/2", "orig": "Table of contents", "prov": [{"$ref": "#/page-elements/2"}], "sref": "#/other/0", "subj_hash": 3945645157480977532, "text": "Table of contents", "text_hash": 10753538639672363281, "type": "table-of-contents"}], "page-dimensions": [{"height": 663.3070068359375, "page": 1, "width": 442.20501708984375}], "page-elements": [{"bbox": [152.0500030517578, 547.81103515625, 290.02099609375, 567.8159790039062], "iref": "#/texts/0", "name": "paragraph", "orig-order": 1, "page": 1, "span": [0, 17], "sref": "#/page-elements/0", "text-order": 0, "type": "paragraph"}, {"bbox": [56.69300079345703, 511.9410095214844, 117.2550048828125, 523.9440307617188], "iref": "#/texts/1", "name": "Text", "orig-order": 0, "page": 1, "span": [0, 12], "sref": "#/page-elements/1", "text-order": 1, "type": "paragraph"}, {"bbox": [56.37486267089844, 82.59698486328125, 386.371337890625, 521.0339965820312], "iref": "#/other/0", "name": "Document Index", "orig-order": 2, "page": 1, "span": [0, 0], "sref": "#/page-elements/2", "text-order": 2, "type": "table-of-contents"}, {"bbox": [381.4300231933594, 28.700000762939453, 385.4750061035156, 33.72200012207031], "iref": "#/page-footers/0", "name": "Page-footer", "orig-order": 3, "page": 1, "span": [0, 1], "sref": "#/page-elements/3", "text-order": 3, "type": "page-footer"}], "page-footers": [{"dloc": "34c43ca2f99dc95e789311626e8e2b3a5afea659112a742b0ca9e0b80a824c63#/page-footers/0", "orig": "5", "prov": [{"$ref": "#/page-elements/3"}], "sref": "#/page-footers/0", "subj_hash": 10924513919643460592, "text": "5", "text_hash": 17767354399704235157, "type": "page-footer"}], "page-headers": [], "payload": null, "sref": "#", "subj_hash": 18446744073709551615, "tables": [], "texts": [{"dloc": "34c43ca2f99dc95e789311626e8e2b3a5afea659112a742b0ca9e0b80a824c63#/texts/0", "orig": "Table of contents", "prov": [{"$ref": "#/page-elements/0"}], "sref": "#/texts/0", "subj_hash": 3945645157480977532, "text": "Table of contents", "text_hash": 10753538639672363281, "type": "paragraph"}, {"dloc": "34c43ca2f99dc95e789311626e8e2b3a5afea659112a742b0ca9e0b80a824c63#/texts/1", "orig": "Introduction", "prov": [{"$ref": "#/page-elements/1"}], "sref": "#/texts/1", "subj_hash": 16581708288081805343, "text": "Introduction", "text_hash": 14273225408978377972, "type": "paragraph"}]} \ No newline at end of file diff --git a/tests/data/utils/01030000000016.pdf b/tests/data/utils/01030000000016.pdf new file mode 100644 index 0000000000000000000000000000000000000000..42ef3bb8f7452a74f24702cc81ee93c23caced7a GIT binary patch literal 40042 zcmcG$1ymeM8|MuKm*5bb1PKI!4DJwIg1g(`?(R;o;1)EvTX1)WKp;SHcMFhU-^}DD z_uhTq-TijY*)wOJ>8`GN>Z$6UsqX);YN+Hz#9lKqa3E8yZ%lkfW+rDMw>7jt=H*3Z zRCc!mk~7L1m;xP<8HH_~ZJfxNkr}1UjUCCg$XP)d70Gpw`T3E7Hpbv0fPWN~cd#{f zHUc`3)5yDnDge;_BOl8@^8Knp*ucra%GMP5S6$@b7!?0e09@7Yn1mf{?cUhBfdXIx zy;xZR3;-rJa&~qW1{Nj`P&lACZJdBMPLAYk5HF01woV34KyoHz#(2<<|SK+^V034eG$nSpy2d?IKaAM|GPM|h3idliG6agC98Uv9TWq>xOPG;oH zES#+1HaR*u01d2>&6dm5q-{6YG1_m{c9kau$4>7D%g^|$L|r)qm##1_v+K9ibu>^Z z3BKOlK9!Vgpr<;65q+A|H{f+K#?iAjJHV61J5!rJd$=DcV^*^;UeuQ)`2pkRpkEmv zLx~w4)5~J!kQYE(c z=|wu@s`lnKto41Ye(;wHtD2hPoyRhtEQN^j1lC-$0Yw8~>RU?OdE^p4#LHrvD^j#7vM>Z=^=V9yZaqSM`%T32DUtc`23Q|#?5zaRHzdWzVK zj7>TuC!xFlSoLKYow81ZNpeqnXe;hZoDgw2lxEb0B0YxoaqEksiMV8gwb*=qJ) z%Jn+Mh}fQIE}F8LQCq9PbWQEzDFbkEpF5cb$5-PDc&-zKFT_rLnd0)^Cl+-l7($?jTdE$8oAbVt$2b9n75FAb9~OHN29zx#U{hG~Xy`ro%3b zs90KNtMV9qzg}WAq+6!*R1@3Undh?&PUX&i4=)B>0T0(o(+k__6g5-~c7}lKH?|+N zzw@Dp&`n+sjPfb(Z7@s(2z=O%4PI^gFxYLLC{5FqhBoBUjUnQ1%1wG#QoNf%7WapOGzFm^ zN_~D$uCdx4p*cj3=)C3qL^^tbLdXwTVFI(&(S9y>=W#(zi6o>fCgT%NPv^0D-Stk| z0BJ^2-V!6tMr@1}q7N{f8Z}P019e_LADw8f4EJ`QASs3+XkHS?YO&g^J&Jo$#JO|Y zCU`KE?H!hmd=B>iiCc$&y@z$tm##aWIv2huJOrD1qrYX2xyQ;#Ei&(6{sxNl<17^V zG&XSM89yrB(easlY_pecQz6EnypiU2dnlLW6LisqUaNxkH`{Od?PH|BiYr|(BXc3Y zJm~qM<)qVDuEG(?hIoUqk4#zU@0m;Eax8B;S1$SGJsuyDt~7`zT=ljFE3>xYj(!+#^!z)m1RRiUmA_$8uhVQ z^Ykfk%eFG{p~iT8Lrrj$SLBP+PN;6D`{-i)N(P;8T3jh~e`@yYmAnYg;YaoK_7f5{ zBLSYvzDxM%YSiTE34FsBVMo@HkwCHf&a}M-`@L5-O$<-J?_Q;=q6QbJ*K`T_>6bl| z8gwuCYE+Qf7OZ=&CqcGO``B}q`BCdpi;vl#P^GwMgx&A5Rb6q&#}PF^eo@yV=Rf3!;t zv9v4ewjm$w&5ER|EhoA}>o{fmygqhS`S*3gHZ*vsKcB+emT&OzURIx|)jq;LgYqBx zA_g9u{`$%NYnTH6wEzCW7Phsrbx^W1FanZ;2Rva%c5)8LFF)vqno$fil0cHhn8~?* zjR#@?a%S)_rwBUALeBix5F^G$&dm8&Dmyvg&+x?v8U>A%fKH&niV-vjk~1m;-JHN< zl&G7NxDsSo0@o_8#7qu=45#4n;qUPj90@p3P-#hF;Wq}3Kx1-9H6Ry%wFSKUyDk59 z@R22FL1q*Jk4iS+QIJst=wfaJR1|;nXIuiXadG@>T*^ptRaTzE3!P|l^~f1xnkMjw zXJGQXWFRMjRd{9s|7;kl2lcI0SdpLzCML9KY_=bCUXDarnBjApB7JlT40%JQh@v86 zu`g0y?(QdBTh{|^D+{J4j@Ms3v+5b3DCNi)7~zs&$WvdgB6x6qRtpxNtiFeWck_cT zfMU|vD{dD$?h>SA+ehx(`>TTI^*5_`v?jDYhsA&BziiL$Gs*yM;t}(mgz?b_D&HIdx;>@KZ zIrMlq9Ecnb>tz2;omP_e4L=`W;?Vje+9Pl8Bj;)5g*xG2_|_>GuYyRC*>Gbkr$URn zRfS)Acb=pBV{!N+)1jPZhGiVh_be#3VMK{<*)W?s{C>F@&{*#BOW#*+w*%4P*3M$HiFdG`$(SPjAc(YFw?vxsgQA4S9ejeD4ejLroau>bCmtsZ zM-|MCzdpv(FEDsv2t$7803n8XRAfQSdAO?pDfuV+LKJ4uF9i`e(HQ(jR=FFIj{Oky zVc!Mr=)<={Utqu*JO_yQZ$4L%KmquTlEWLLHwp2}ljA%U6}HDf$9mH=iC2X9D)?27 zW)bF3JZ?B0zlt|cCi%>8(gJ*fmwOOp0*b2uWoSe_sPjlyA%SabcElO*JDopHWgB5C z0%N+4PT_92y%0$KvwCU~1;Y^06ogSR;nxFho?}YTcZ;)=2TO1%hSrhmNH98}5C|2- zxzeCU1mxzSDm2i@DI-P%O9@*G;^w_l5LY-VBAt0XLt_E|9qFwYT@KnLyBXud4b$&Fo(XQ&||Q(fu?yv%DUu%p~E%-c`}xQ?VOegr+EhSlosWzb)q z9)to7;pSlueGZuqxem!)h;Q-4dyCgc?T#A}iP4io=?2)=#@FHPu^Zi6adc5^1Fw5( z&%WMZdGWQQ_&yPc&Uz_IttCJcg-;IdO$&b37EVi$rH^ zi|%I#Gzh)|YjSEdYE)`Wjm6Cn-==SiO6Mj{%1(xuog~vR+G}=e)EG&QyzioNbck?> zb|7wg)1=zO<8ZRIcW!eYe%`Xhvq?sqjdO_$g9E^Q`bt|dMK%A`!mAn_6kIu)Wwn6B z%S4#OFSI18`$}mlstQ-~vMS4!UJCA-!3FC@@3R#P98`f)iMbY)!z!u zeK|$9%Xdk@M#Uz_mZMp&*|E^t9PJV2@xi0?Ck0vnyfB4oo=l#Wk(iRoXEheJT8)c# zJl)6~+{${nQZ-9iEpZ+h&vcj2tNiGU=w#Vw+1hbootlb3n>3ruV{5*5q{55}Cw|Ls zz13IV26>@5aq2Tw;k3G-BpA&BoVoO)vm0Dp zU>|g5v!S~oM~z1IHYy~#%lsc zjy2`!yge$Ei8QX$op=7;L4uF4wH_} z-Z4+OK)t~JRhD5+V_ZYG!)sLbTeP{FC5NHhr#BQK^E%)7#@oJn_xE%gWe&K@ z)fIl)v6(Pn1TrS6<;{&Em*O6%UAo1$2DZ|*W!>!Fo_aTX8LZ&8dwP*o%xDg2eO*Ji zYrVU=;X9u@le>82hr(VZlgB+}on&R=sNx92!oi)vxul-wYNWgo3bP4g%I8rAumb9s zoft;h%2=(86VFP%msW(*1v&QcP*h>&V4`8xmVQ$`(cvRC$JD^!suMGP`z5W^688&r zIUhC^LEk%Wf~e@Hkv0(zotH*mm`#mZ4NHtn)EUaW%XX~9T?jlQj0R}8WO^6+lKN%) zzxEx4r$@MkF~($VH`~!QF=F##TVQQt-DAa2GEfFn)l+Z0&x@BkGj|`#xH&434cLcnw>8_dc%)8Qkt*5G6 z8&f6MXq(?%nCsZF{h)Izpxm#vT zreV%}E|ILCc%1XDj=!$`+OmHx3Jn=e6pbwAH2NJe8t=kqv)TFi{2AqGR1TE%!e~*_ zmLE|E9GR~}Ue}k`eaYQeZ9KiHwax1w;a>C3y`|hR9&y!NQTe2>fV`MG?eV6s7sWe;jJ-O~4x;yc?CfXz z7Wtt0?vq;?b(#)+ei&9Noix$8sLATpH+k$6ZfFiTj~pC%<9S;PsNYR~vHiN@)3YzO z^Y$QJ=gA1d_)~6F-Nr4CgLCW3#krHRRu(24fRihygWD#;#)C^q;vO0*G30nTZ5?zb5rdvh2%)zRrh~*OnCSldUjX1S-6E=f*loa5?@%j zRA|Gmb#Ha&zJKv!FCgoqZ-r0J?RtwIvF~2G1{=%6`$vbhxplSk%IH?zt7gwHzE@}I zn;aL8!M0l~q}~H=bGQ75_q|@*2#28&F&<T&0v54M9m!b$$e@P?Mh| z?cbLJGNU9&`!hEZvN5%SsC3{<0{qNGqs_#|p$s-=U;_jjd9bkp8w0R00vlVfF##KQ zuyF(%8?Z438xgQE2OCqcaRM7*uyF<(2e5eyHkM!`12(Q;BLp_qV50&yE@0ybbOAyf z+uA^aadL%J?P%@>KI&xV0E8ra03CjXWbOjlw=w^f-2|j8f-^$;~n?dvd42QM*`f6N$DL0(`Rm6uPy1 z(9hWyb+IZpp%A4?ZKt8O(_&&}a|HEDh~IVI18mx)qN{L_Moodij?Z(F#l z_@gjO&S1{`(&h48p1qm*gOl;AfXA*!-R}9Dd4Gffeopo)&kdujXJnpER);JbMtY<~ zw-K&~^X=96yhba8M7J38-Jl(gfd_Nmi%u5w$&>R#o*KCe?Td%rTj>?o?17xZoJI8Z zo7K@k-^b%C&xcb!fyJzq$E2+Dj_;FQq5RjA@h9INb=N!R&=F4#_z&H0ibD&@Tu9xG zIW`{CyP~Dxwrfyk08+2l=+4}D8v+=WFzQ=VGR3#?5*7dr+FI2cnT>&!O6eH0JB(I6 z_**FTZ|fS&D|tu;G(O!@D<9~|H^fD1X_Ul#A97v_=VA8z*Z@15lo0HG6CTc%GvFfM zAp8k2-E7t5P3WaaE0$<3v%jQ168y_>9DgNAcKB*$W~`7NMUluH&AOyIMLZiNYk0#2 zlAK7V%!uXwg=U>vt5SoyI);4bS8Z(bzJuO|hNpWuJI>cl-@+!cTKMh1?i75B|A0ai zRW5CpY>nw$CrdfQ^v3j@i!iV6-5^pc&9>O{uv{8M-YhtT5y0iyNW6;hXIvFkV3T58 zCkpXa1f?2%&leInMPl*&7aWzbYiL#!+F5*X;(eTp%~2{PpE>A%2|Eu+K>GbbFD=@`xuc_DDd-KScuV3n{>Rcn=m zHWXW;sKUNk?@_98B4{FhRNj+Z>{A!jkn1?B7K{C(k`@w60BDY z8;KI`O_$}<`JRhMq(tq!cM&I2$D!R(yo0)u$4Q}B~gFouH$5kK_C z+rLQj-yK9I6TwSL)O?0AXZW@?)OCoz_6hUi`s_*fam%NNqnEBzQJLlSnefI(+H*F0 znnJ=eZ_4#?vkSdNBWTYxDh6MOG{?SV2FUHCF46OmX?z%pr5^)wrWLwJybP|B2n&kE z*=VpBl+s|JBhOA+C}X9PuvnlX+H;0&h^kjlFTr;1mMF28h*Eh$cdFLpmuY#5H>_n9 z#hmi$V;q385Q#R}&g%JueySJs#HNs@<5%7=ZT5jEcp4ZnW7witE`;I74D!yAEnKRI zVFf+~X#%%Qiz78qD0O$Q|u%(&=3wt9Y-C6~6NeY#_3!WM6= zsGXL_$t~?jF6gd+nkgAyY`c^;Q&Q3}Cmx}^cbOSE{_$$7Ke}ndi)4ai!I_E!8_7rzR~QOHpQtexWw8Q%33W+hczVKE9eDv2w~kx+bUAmC zmGXf}Pn`vu@Z;iQyk5)OJ%Y^7Pir()u~~)2%-zw3OX0rxkrbD+DWE|irr0)ISuAVB zv&Ys)5AvkM*K0&pd=}G`7P2U*Nehyr9+w@<6W43JHBKy_hIgNoQdI~e+5!3z|? zTIF=9(-?hBW8RsMp^hQB&PbD%$qL!|UKN<<<{94>p}jbrrc-7_WQo6ex*8{#Y`&f? ztVZ!_UOR}&&MHwK8Y9Gk}e#n0jC z)YU^nH+Hy>CVD_Ulav>scB0J^I5~6PVnTE@a1!vhq7CeneO-hO!j{jX(o{ZKwG|M& zLTq}K?oh6nO!!{KT#_W?;}3;6V@EI27vrOLwGy1v=92U|YxQ-B4ZC=A{+!~6)}8pT z(tpC2*rxEcSbthjo$z~i+`J_IurJp$z-W{l&5`AW^WgAu5`XRoL)sC*dZB=5Pg5$C z)vNx!psIZw1ycY83#(+jOoU!=&5$5ef+(|64gf>9d_w0lWAx+JlpzZ%oHsS}+_*VbOg3Yt zjMl1zc}{0p`&gV2b1rAb`B<(UeMQ-pwIVOdGBcB(v!O-Jy2$=}hzoX;PZUL^0kth+ zbM^&WMV$ZL@pM~EqX0_Adcky|!n%_ebzLJb2U#k0&gJE&+6juD$u{&By5)2Z^>sp* z#QEs&f%p2R%iK>iOzV_ZLIjjh4~-cWwP@|KsYKr#Lmds~416 z4IKYOjM#^XsPVjByPjm1A44x&dn(aXMPCeuW&qioUGN?*LuQnLzqpC$nAA;KRKJG zhDLw@9juK!ejZ=K6m2jRh79Gff$=WERV&xPIFUpb(>TH4ct}2%x|%KfD1)&};`i6{ zC++-hJXN<>Yh8;yi5&26W1qQNoT6}Ga`sJ3KTmjC9-_8BxraSLnI#gUg3jKS%U*!R&e;U*&pAI^NYgIbdu@uD_^CeRD+P(*8}tg`yC* zdhEC)V5!pe_U+8E-jwb0$|I&BHmld>A=|5K1(hO1+TuYWq}NlOr;&v>GSOof2p4x; z(ZOd@&BNKL7CL4xN<1p^4JlsCQNEo9LPGp$#o1D+iQ) z;TY+_78G29N%vK@AxwAsj>c1K@l+N#vYRKivR|Ls@Yym0!OqD?dRZ((_Njg|?H4g` z%4ZpUtv^R8ZLmB%L=xS&!ZR|S<8`s&7k?gDOCBwb0y?>GNd3fHXO#wI^Ec&ijL}WJ z{fTgreiO$?5c#p2RU}mH1_KLu!~-=_B_3}|s=kP!1)7=2@bQKEn5YqU`BnM%hR+Tj zSMcg%{RIw~dkqeTPikQ!OHIkIx8+teuK13ZQCX42ghWv+bTL(m#2Mz=qguUO>Pl8$ zxbLUim1V@LDR32IsVKg*OyyqfZBDF>OKbYnE2YEsozDZ#Q1){v^=3@7Pcmw&yr;7@ zl8$yuf)AScS6~ygS+@1%;e%s*u}m6Ot#J~TSsYd|FfO+H5lb~-}ANWrVSlF44D=vB~Keq3yfuy^}Tp9kJ7>G zVutTy2qoatbN#tsC)!q;yRtlIL37M}9<4>^1O{bFr}8q+gd(fd$axCO>gjoD;0j46 zG*ABPu`Y$@<1nw|vh82I#VB=xDN1wGeiyOMg^BsYFmB-dhvN_JiWK2>w=`pdurVla z)C(%1@jQ18#+0yIxRdYv=@tg%38WwVd%hx)2p2z#jd0_|rA~%kr)NaE=p0Jeg2hX= z51vHI1f&r5rsSG;vkgsKD_21G&#I8;S@D!$Am_b#_iC5g>Q$0?UconW+X|tTdDP`Q z>Bg7NtO&CUY>PEz9-r-?G`PMEH<#exz`hI6lt16sHRBQvT0UJ?;T`abIg*8AkN;k~ zdy2B`L{DO>_-30N9fusH(5WU-C{;CPMa0iMu506cI$@{FHzp_S=`pFcrvhAVJ)hM2>wBCSg}Mh2@NW8|A0sbJ z-}m>PYJ6JdKlpHVSL%8*wx)3%6Y3u0Yj*DuvT{G+DK1V_jjZBche_~^A~>0=laN@+ zB-SlMdZ3C1Gh%dtYCT?G_;J8EZ9JSpTnZ=|Vmy6WWN0sMoX6{T@8!jac5W*vd^>0E zb}@VyhkPZVBp&ZOGP7cQ_(lI*EhaBJJD5-Q!B_pOtqn&u6cxj%v=he1KoLp;x&FLj zT&QBYu0uc#j`BNPdSekH$(MT=e1uSU?gkeyttwwx$MgE#OXpx>7p$#j$c^q>Yr-!l zOwF^^i)>18ZTrf@FoWJ@x7Hz&eb|kVZHRKP?k{*<@_}8EF#qGJ6qaNKvK4aASJtk6Ej(Zxv0lDmQ@gd&*@8ewq!MR1c!(@-83&Uq#T&fioQW2ck2b>;=` z>TXS+=yj5_IN%|TW&XIMwVEfO+OC?YLt9NR38^see1sZi5C*EeHV^wYO1bzY5iC{m97>jyG|+p;e7ibK7uCFL3Ui_Tyc4w&`T)@ zc5z%wqELew3RzX>#`!$3Sh5vv%x+WyZ6FzD7RO6f2-Y zeqW?Wh~VzMe19ATC22Z8wStQ}5neX9%_*qVx#%FlnX}l*Kd)Vh-ty@rK8VNNlymP` zfPG1z>}P}|y4t4bGktI7n1XsM;m=aIB(wQxCIsm(p>hOx&luY&82NZ^an74q<{mK} zeSJ)C3K%J=lnFno#M?d>E2p1hR;qaNh?1*{fWVZ>zIo+~jM4v?TdLrnx4edr!1AF* z_qy}AMar#ujh}TMf&b?>Msz9VRf3a~7M`!9WL&IEXT%pz1zdcOZ%#?bB`qJYNMTn?oG~^+|Kem*l-Jv)J=FTp~grpVQ|P0`I;|R%T7x-Yp$@K8zob zS@8k}VIQ>oprM|Z3xE7iVHGmn;J>RwzeK%%Dyv`(^q1>J+RsdR$?KlCs_IXpS&UYL1jQWAnGbu{{(G=v_UqI7Rk!N zMb5?n_@&)~^<1#-$_APy0n$?;nkq=|1?#+=pqklPK(&GQL5l4!Z>-E9h_z2h& z2iGqZ7OV(^_1E7m0w4RO*MfZkK)Jx$Eeojkz%2o%fwf~uYr*>$8_TOr_5?D!w=+koGEr=`)(ZwMW_TQ4YBKRmo`GzRk5d9pY zd_#0`h^!4!%^?ytMEQnj>h|Db5J}t!G(igD-45trZVL$yBA=VN+nN1R!|lL%|B}Q- z!Q0|sqXssTU}I!!Z4GIR9eDn%70?9IC`0feNQwiP4}kaVoq>){=D%wDOIrt;IvD&? z(VdOV9gLiBP=N+U&Q6dX69X4j1sjM`FALt12AelvBLOylq+?vp4xm zqBpPxwa(GN2GTqWNCu#_`Jbl$R`7o-`+q=y!M}t0t?+ArJxGC#vyCyxv5~C5r%eBw$Ng`Z0y}_-{a@3;THVZ*wbYU-?7S+C2UIgSIpQiZJnga6GJ3b%WY#Hh z<<1b@^iTbuS%~}v{RI7jf?(4y-p9gXVIfj>j*88joWk~FpjVUd>hrjS)NJf4C$^N( zjmu{`?&V#7;dxQ)aP4V1>tXr5oX-?}<>AWpN&(Sw8&y?80ETUa7dpx_A;en!aTGfOr?-459)A9=AE3=A7Vug(eWTeZC@IfZ&)FJ;n8asbLmD7Kh^pr)ATL_UpqNN0bkAZpU4BDi=!I0zN)H z7ihg+xna9r{@=Tk@Co(2uKJ8xxmqojUoVsW7`p3tU%aW)#ol!WN4JJ^jfTM|ieZbe zKC%9EkZPaKlJ#wZWM1hs%JjLTQbGZ;=X0G%bmuy>N$pcJiw%^Vd%L|2SF(cdb4A_9 z+(*2fO)R_s$>neE7Jch)F*}xL8*XP@N~&PBI+QZnqqN=KUm`ZSR#+Pb?Z?d=G;fv< ztA1Z|h+bEZTuaD?3r&U=8$^Ef9g9o+)vIY^6!t@&+pn>vMWyi#Uk};EqvV!o_~Rz2 z_OYF}9gtJYMJ(*!sN$(;YSND|(yE*Vz0AiC9g;&B&mcP*j>zq#QkR&i=D}Vkaz(n@ z+%!hyiR3Zk(ACW=wWRc3E?A~`Lbzp)3|!{?&L5{j1}%)dalw8*0+=Bnd|v1j>9MEj zP;X@YnSU7DmDec0Yid?laIIit;#%VPRqV9TW%wcs-~y#as+4X?YB;h5Hzj}sL9d(plt;g! z#nW$7EYL(=#;o1X-S8IS8F#}Y_uAAP4Q~RD+|a1Zd$r`b5i`bR-+cU8Mx0F!e z)w@UMdu*zHAbX%H%;YU&cVW+aVDVSweV@IB#7$K-8B2I7tMAF$STh@Q(Vp-;yLlU9 z=swam(-`u0tL1MK@K*FENbcfbrQv;V`=XZla&SHcBDv=*A(;MTV`I=S^WM#vneDtM z8e8vJAVKEh)yfH(_?RRaH#bYT&cIqbM$LpDbV+hVxb5jl>?6R1Is2H78ZO%$sCZbj z&$MC}|2=HgN=<0Lr0Sy`kMNeSXkLtP}ehA7*M`oRvmtH)5uTA|!31+<0d$cl_ z{?w3s`gxZhHe+DwJX-5hV?%*8JK75z1(*TmXCaQMqNC6-SL?{?Pj4)%szepDw`IPauL_r&CGr`tdny5AG)W@1~4<3NK zmrMs2L;@K}A0&C-s>~!cZK2TywlXp_?Kej|cpX(N+c>zBXUr3}aJ;ZO`EfAk*?+84 zz%7DJbSPy?-Jxl&N;Bj^iHTrLON&qp2+mcej(0jmUaY!dPJQHbx3^oJ?iJj3i5O`` zb3lsq>3l@as3$eFBg*hLygqsA`gQ+KIrK?jWu#s-B5nfRTN8rv;Q=S{rNl9q&gLoS zgBSNhnPR8J7_kRdA|*DaecVznONw4$mW^`4RoU{Y-O5fH-4wQqNK6+!ch!B)8?rDh zCKdA182Bgz7Nr=wngw__Fa^IaY5f{SD zDDDI-{U!Lz;rN$6GQGkgXL}8Ip`96+kH2)+7p-cU#vZS3fs|JL^_^;0@dq1Yq(h)b z$4i^)HO(ijKm{w^xiqDuxn$!O2+Fx9LdfT?uQt|@POi@T$}`K5Mc9m$*= zD{A6{KTf}?sVf*Q&wEIS{{D&xrDljEoM-jjN9AP}7>IG`GaO(EJ{epwc4NREYGk-n6t{~R80M%u{A zD@@fF4TbIBEOh`S7Z}6LaKlCcKh+B->Fb4}va75fZ^7wub-y7l-tpq(gy;qr&eC3d ziYgA6M`NNJh&pQ+B8Y>8co3=B>nedABJ^)sGz9_ z7u|){hFhA`Hi{pPM@4*jYOU7q)RUhn9T}DPDO1QPl=dyl)zl zj3j_%jfHZFsR@<7@f2wNjS0y9b?^3U6p!7q6sY;7fjg64OZ*Z4+1c! zjyt;_jvs#73w`>WZBU}9dToy0`?Sfc)wt>GiL^Yv#vxpyzDw&N{JkT_Xz$Cuh-hVcs%k1!AWagT=?y+$ps><=ey z_BMv!FnZHhyZ1=Fza_zb`UOK{c*a!)xluRIiO6OqU`3sN)x9TfHU-@^z*r`t&^m~5 z|NE?L)dYLAe_~j0Cfx$A3)2KQMoHHyh7DcwPmcUN2L;U3kDS-k7hMT{e)M%Y*l^yu zR$9-IM2c`c3#1joOT7)srR27D5Kny%Z86X1?p0g5k+&&i*qJ;;+@FQ=Y{Gvwx$&$X~C1zf4<1xrHq$H*vY*gMHAJ9cHp44+G@B7W~Nm-Ro-vyw$Fcvo!wmHxw zA~uz|dh3a#ex(#97fC&_XxkIfZ=pE$g z(EB4z#?n|P`zITW#=#bLQ|y@jZ6Vp3T!8Ek_PH1@|-Kig&|Do@UG)fxtU#>9TS z91WQK7eOv{`wQYJ-}FU0eN8Zkjh(&-?yKfty(OfX;_6aPq?w(dtaxA38JYqg>QjId z7-^q(E?Y*kN<+Iqft2=2c*xdVr8!gP)ploBqTmT_7H%2WXzQ94DjpIg^Apr8sB{ir zq~!7m`H%gITp+=dJ~Y0jSbyk%Qrpv&KpoZv2Jffs$JnCI07OoNajEbNmn}uwmzxxN z@#NN67=DiH#tJS(TXMybT5lboV}?u>!z(`vAlc?y)QTp-Let+*U>s#I$nr(l>a)-K_D9Tzr-YKujjOZPi}<4T{V@g8Af@|AW+ zceGSYK4zdwOD(6puDmjq7hP^0_-gaImrWt%a>~y}=JaH)!b{pQb(}WMFH-s0VVCgH zEapNmU&Thu{nR%KYhWnJw1__EQ^X&2D#n!n_BRW+pZUxcBG zOuo5-LE80_8re? zK74uTbBbijln69Qbu^9ag-z0PxN=)jvgUk!XyS&g5`B|mdPOWJ{Wl>E4W*dh5A_YK zIp*O*ZoQseUz_csV)R8l;|=XFD1AHEt?`ZF@kk7xQ`vf|6ZM89U*7{It+S(t(X3Tp zCke23k3}_8jOr3Q5hP=za|v%xC66<{?K4E*A}3cUY}jjE;j4gbc@dc zX~d^VmycWG*T!?^3)W;wT4VQy{4mBpCj5v^rb{YQHHp=JSiICG|9L5lmd~scTujWz zrYlUPZJRsi$?@=LaKY8RO|Shd3O8%^RyGd%o3Dj{RO;(-cr}X9bSRq@3e*NQD)o<= zIvNa|@~c^Os$6eLf1cT?T}&xuz*n}_X5Mj@Qt)z92&lfH3!c4|q;-AG6B$k1nnaN|u`pFUHsz`EWyhT_iN8uN>v06BF^^(1E ze@Sg#sC)bBs+pa|AfDXbSIL#3yxz#>YL%}Hbwc_T1AYFiMY*C-$hSyuuOlxdJu>IR zl-h}Ne1ygk7Buy=Z=bkIDG=d^(a!0^wj|G`HbhQDhqvum8(WE4>%A`>zY})pRgpTz zy;Q=;r1WuRDX-4SDZ`|+%&=>4GApJzH9d6HKB#m^)zrz3M%Z39o6}F?jW0<^s0N-w zZ}&~StIKDbGCI#&CSlc z^pJmLX!73fne|VP;r!3(ui*B$QV~>4qEuuTPyM5hKX+x3cTXCr@Ac!{9K#xjFayj! z>u+b2C=#R&e#xD}ZU@dA(=g(kC)Z0^Nm;5s(Q7A(&p4UPz{)tVB{j>{8lUmDMquO; z2}YyWP1cA{}8m9y`tu9}KS|SKT_Fluc+a(ctt_hOaCJrRehq zeor-yA_!;;?}Cf^_VbBqP$=1GSnOvtj8}coPmwizTlXz+H4x4N*p&3QW#Zoyuvyd_ z(-I^3kd`!GUhHsjEPPC%DlCx+k$M%nqoDUN_dcL7<@8o0L5znaaz%T62;)$4-1{-F zqv@$Mt~K4ymIKd5pKOb&2q);HI!oz@;#|c+M$x3Uo8UF0JT}(%KfkWxKub8@O)dsd z@mw?DdPIaRB5kyIEbl^p%#CNjvvi?4D8ZfS zuJLp~Nzg4UmQS?T?K$W&ud*Yyd{@dKE0|%R8*&>y*kf^RAZEs;_f5kWMF*q5NI%^d zhX_IVLtRtbRXFN8P`-GsH5u3GLnvbg@(&FtBOmhxIikv+jIikqQ&DLyM9+kCGZ(92 z$0z5@9z*=(=F=0Sr;8^`Q9r6_^QIR-tvwPfLp?y5v@qMSat={qe__qhpj0R5FdfNh zCiX@4c7F0DP9<4QVi@(~E3S`Ik0lzWVaJ;z9QqMwENRePQBy2)p)5a<7k7(FQ}TuP z_@vl1()QDe(<8QrvHOURUrkO}k&}mzYDKP0^vQ6kAX6f!ARFP;)6*pTc1Etvr6gr) zC@nMdPCD#i0jnN0ORCM%--*9Hy5p7)gC-V#QFjE5sln`Bs4ACsU@7+FB#c>FGO@yM z)Pybvi638Qb25fKKp=bCS`H_KZg)gN%veI8?$B7xd}Yw?;0S5BV2Tl4@!)0+$u?GV zf`uha*F{MsusV6T^D}^w$Be`iiln#1;`|+#GO*6;t!h5&+X@MKd=EEVzW#-$HGP{b zWCetRRMh<(D18RNOZC?#17o?vVaPY^CNs+BY@0lxoKhr(78c-?GZu!Du)mG!el)lGj?tMU9+;*Ps&t0|)9m*Zz3fLvnWy%Q8K*s&| zLiCJkaat1F?8@5to%Bl;!Ihoe*Sg=sn6kSU-!t2K>ZHf;(mNI?omwyHLyrjksN)S~ zeiS<7Yv3qHG-<}+pW%OGVIh-%qdenQe}dlmnt@II_UO9waX@>(-{H-&AJzH~u4Xq| zB=)ddO=gL#+grm7xezp1{KwnRbDS4Xa`*y8*X-~{o#?7mZ`K9U`X3e_xTik5olR6V z&#v~^`l?5@!KmFS={#&eE1~0I+K~?SK3z>S#+jEr%*ykdlDT8eTK_=#FkugS&4M2w z!*vrxJ68V8Z3D@B-J7aoGL)GYy&&w7q7%xMfk*Mb;Ou|?UCO_~#DC)Kzy5^f&qAU9 z#MxkQ3x>&HL<|AQV6Y4VyDR_@zy!U2L1YNn2JJyW^fp3Cat`*`QB$X4YSenLxlB zygUdDzB$3T9rOv#0|w&$ss<#y$Fg34AlQeu>POG_x~;h9M9jczZYKpI!k{a`QLAW z`}|j~|6B(4zb^gvgaJrP|C&Ak?!jLG{J)n5_bjO2AQrL(>cRhn-IBjxH?t8JfQgme z2y`+3S;X~!1-t+GFBShY*!`b%{tdgqKp!*}FXG|=q3=Jen*&7j|Ib-B1a3o+Cj<&Z*fIodLg+Jubwh|Ugi=E& zF@$nLU?_yoLU=WVW=615p~(=!4gu~En*J}04Z+<1#@G-%4q@OB0u8~*5GoHL!lGby`x_|#0m*;B^dH3h zYf;&6qWl*Q{~NmhCf9#)cOzRn_dgsRGMm8hH$eUazahjQ0?+@2yCH%{z_(vTSe5U&0|5NXGMl5z<7{wK%(o73C=i3kGg|0esOwT*vcaM3?p zA40kf9Sn>tflhxW4gA{u(^5y!-X(scKoohGw}e4~dqkRblzjCEChJ#^bfv0f)8e_;u!qugSP8=URxX z`_;}h?*tIX=+LacNFgumZTcHE8+9JjEz-08qF_m(NW9j1%Arp}?!^kr$ywJEK}lT!V%kZoJnrm#}5z&HvK-!>XsimbuYXdHpgnYQdIw);k*84 z8)D2K{y)y%0l2buU9^s^j_po5w%xI9+Z}am+qP}9W81cEb$HYH_Wt%h|Jk?he@>+; zNhNvbTs7CK)LLtdcRZPcS*HCQF#S+q`Y3_XgF1P1=wM*E;a!J%v@YmjT~lEcF#Q+k zVIO~C3?oE19ZHu#m-=Ohm7d&uwtR{yw=z7Let(T_nl4{|RUaNMF^z?D_Ll#} zE8%N}#9;cR$@WX|9a4uU-9=x=uei6EgM4|KHTR*jfYmZLir9hZ=|UuBq=x=5A~UN1nCc^p3BKQw!lHyKZ&Q70&a$(r2nn zuV$XKo%iA!qCt3?kSuUABs#A~6QXZ(nag0M2$ucAlR3TdeHW5u*4HOz`JM&6@7CY( z8*mmAD7`ror_K7j055~iO!#1&e{A^REi!Y?V+nSWjh^5$tyd$9(XpmGBju5>1G#S; z&_Zw1(Z*uH7wSbBkx$_<+F>By4V?qyfECd3XVppn-hFl}sE7BxNha-7fRWJcwtri3 zOVrZE<%jlIAJd_;5m+KK$=h?N=HFCEA!cq51%+|S>`c~9)J0kWxuPcLi^rYcAwSZ$ND_9CH>5ukS3@=>pWB{<+EpKKgx@Yn4{~hbjYGpqFOlTU(c|z0u4 z?~W}H2uuRQ`aN_H9lh1%o&FIQBZU9^)sJa7wFFYcD?4#9rsW*H#O66qVevBJzyz0A8&`p!>65xMjbj(Vq z)DGYxu!%X`Zp`~?Y*?7bZ0v|@VtMvG00Mkj7w*fJ*X>3L%{SRy(rj}bqf6yUyeII9 zK!&X{X*2-O&G(~39n@c=coEBAaFOd?>Cr*aNvStpU$5$|kF#tXOTaI`F%& zU!rGTC&gN~bm`s#-2xwB5y!hWkRkVnP3g`L)+z{oboIEr^f*M;=XRM^FQ`zJt!F&N z;zeaz@?6a;dDtkimP66S>u4Teg{{qYdT41#ZQ&Z?Dwise_r?_AK(?K=eGhN@CT%Ki z?907hoI<}wP?6~v9P?=KpgLGHGxz8Zyx3eZ9-BDWnhI`oJ~#zMXu4!p3TBQ~#DbkS zjyo8nnuuLX7n_4wS0zbfCb>Bv#yE)^*8spuxKRSRA_Ph_QchVoS;Go|O(}%~+2S z46J(`3sn~9M{+n_ZqAwKWwM}}KyKna!eXbVYL}zjT>RoHt|QdeD$DEMZU;&uLZK?a zS`~W8@9ytzkdSgLG3E!+OW20}YVgCtKnvnA0P-SSgCj8*c^UG15b$utDv8>usOiQ` z+=)(ObG57SawJR)e1ev<;Zy3!ft6#SR#1WTz_J z)GrjxM`-z>1@a>O3Rz2ZSd%4WD80<@MBi+M^ zk;9){2#7?rUZhmFh4wluX+Shx@qzQ4xZN*Cz;)uc$j*MSbya*i8l{o5rNeVzN}`Iz zTGDDIr$VGS>CvQv_zapgJ!((Uh#YyAVv$6#P=aK|x23hT)hg=WzSe3?-=pda_6Yxs zA)7cSEC*}t`_AL!b$73}A{}kLFLJ*@;R;-G8s0t(cMi`21P_R~sc}g@8jy>6z3v{k zo}TuFCRkr|8=A$EcxxN$V}mQYsP;Y+8jr%E<|$RZbH889+T9Uuc3}OH#y&7@0EoUi zUfOFAx8r`KCVUYdULPgbuw`&#CcmmXr7;;G#k&RQZnQKNo8LSZsjFDrA6aI4m1X^-YEx#X2p%TmaM%&qI4gb24Mh40&1Z zA&Q#|oR5)UUjNen@!QiRt9qvw*{VgK?r8+BgtCU?4(AZXXkRmN0gP84-Nc1PP!K_OBE^5iFDo)_ z1u3Q_6^D%~ms6%jsMNot$KyG9vLaxUYD`9jZYa%$EJOuAmNe};TQ8q%vLOH|c|=?~ zU9%J-ZiV{Rjg$b|RlioAG5g5_UKo^zZ?>Ziso#RJmAX#t-Tj(B)R`KAEN3W`QS#|nW; zXQh_T-?7Y3-kuv(u7HmM`-5{fTYxbt8W74Y4 z?0P-^cYDN_fNm1^KFfMzlpfSF7_Frf)YMML-eDr!w7()GZ%6LE+=eh3|gIx70&c_)}iGmDoHUb+@Of zIB-ZZ+u9fI1Tf*6rEh@IP`UdWgGeu=iC%a$6ufOVf;6-I@f>;N0rW(eq)NEoMHZ}s zT6cYT3ijXo-^( zbCZx;>h<2T_$|1sE-W-z6QGpP5)O237cx4)yid3n0F5us>X8@_wI-Z!E^<02mUSmN z!V6?4f{es(%&o50B3$!dCo+!oC)92f9K{li>2OzH-v-EQ3H`90apC1ed^3q-!85If zO>hZ+A-*&;)Hg|v9*Wq-mC{y* z9~WTfF5>ZZfW+k>(Pa9XlCq1jZPawlJWnFQeXoA?*?Rpgi9xn39oU!cpqO2}xjG&? z(ls>LvWBY_(Zo{gO6yCXeEfoeH6Q%jmR4kipe{`V>RgeEq?@@d(9yzaH~@4oQHT~F zQh8nQ8C!%YKN~e4gZQbeJ#(zF3w*};1xo*EVY#z|H`ntKOemAT(V?Tf^=SxGrO?te z9Elx=lVuK%g;Dz24l+p;mcBkq;nx-p`omLZ!Pv`~rXWjyKJ*K%#wTE?s&nmU=!FZ8 zi!5d7JOZ`mP>}p6em*goE!Ez=F4cPf#jzQ1%yD^sBC^p-i?>wns^(W1>0f00k9+5< zQMrTdkUh1Ty$!?Q09mi36TtMzQAJ6-@`2JdrB`rCj=%VDBtQoONIrF7PMRYZB zqTKRg`_`2YC&*4V4>{+C8R4znO}1{6)SqwK7$wQnPt@+WFM8R7nA)8djQBHX&0})6 z?x#=ux;b~;c3xesXWwaYwR{SVOLTecGu)NiI&lVdGep+Qa%r4l>Yk)2+X$eXBK75sp_4C zRuCoqM1mO!aV{k{4RXKN?(Wu?3`eqmaUl`C z%phzP6A%kEWDEXj(urM#J%cd}12%2RM2kwv zINA3gLi-&Xm+0K&Iu3Z3mLqkYRekhej6FqiCbHNPxq4}Yxu;&lAl=MmB9y3*?-?fG zI6%HrmkJT_=;D$jHSPVg`NEXBU!5xRI0yKp8zAFm^m(2rXWL@Fcg`Lh z6?T9{*RRdH_(tn9&ZVT5QFfGSv8Z^rBt1Nyhg78jGaW18lGiiYuk1Ym9Nc}`<;ZBTRS*RgDi69sXZ~?& zdtFoF#R2ycNngxm`gmY>0LDyKir%$>V69EfdL&A)#JTB~6HG*#)$! z^RPX%kwX&{h-e`tXsGneU#etXapR}UUNAnZ%~h#2^R9;a?aW&CcqH5-@thhYeEXP^ zNe8dbN)yUq94f1J#NIX<%Ka(EF4XE5n)1=C&Aeu7N;e#-=7!CyVBc{f(&p+W!Qf(C zgqXOO7zs1oT~dJe`|WgTp2Ou|87GM+=?`oK#l5x*!F6P_ZABvhJIp@RZ6*0_Q-bSD z^Bn+Enw(hFey8Tp9*^5womZGH7&1s=Y+$*`EXVM8ocuUvQqr3_xC5!IFQ`+9ClG3A zRH9PZJKM;R%gI}i7GFIG;C%fbq4K|pNgnvv!nZ38rz@L|khPFF5hRenRs5M=t9P^1$*y`F{s2YC0Nbr;i<(SG%eW)05UIisoYLFoah)?Q4CPo~&3bONy~hYde7L09(Dv7*eb>n3Yod3El8 zktDY08QEsCuK3>6aHHe1`@|Ijf zf2Z^f(B3yu9aE~o4$es!m52FoCaNA{g3m zCMFD177q9OVtoPW8(WU49J^^Fq@4=~W4wxiuJ#y707jm7To46lMu<_(R3?fdw`eA?9L^6< zK&eKMm^FLpgpYCG=fejtc6 z>19cU!pV%<6y7}!;>03BD`FIklTx!B-&88uMM0bu8uQQH1Y-UD)P0eumksum==bef znOyeN0~!t~hS6CRwcg~_vNz7=ZQl4mU0%sn_6YO7f*?(-)IweBt!<7`=dG;EO0%Qw z&@=~=^D*^_ljXQXX)BP+zJg?UE=!i#1{QU6-NeE=)h3Ma&Tww+%*@P;d{kM;obwS4nS0>% z)0(-ZPzmYF`o&BB2+IwBzvC0S{_+YB%%v zwtLRz$`4Ni=3qpo@3+WtRJ`RBFEEE=qd=^r;kCIL%Bovq#THoega)7){vL-z(O?Ri zQoFl+O(>!KZO2p&nk}(+`T&wKZ^wB{A_c$r6}46Pah85d+444tsV);5UbLyOt$_ zLk}5#%9`Er=21&Q2vo$B1rs}L0F=IebIhKX)v!(ExF0^XHc_I%%#iFAe^Q>JK1&R0 zX?1a$L#GJax`u}4HY$2Z$(+kwqc!_8tjGFLV3T%L_b>AaM;*hB^?gjS;u~4 zwd$Lw80IeurDouWuQSGdZlNbzDIxe6DN$^CR-EJk-P&)`vpGN2tBtFeO>fFEP?v4F zhN9m|z_jP6k{}bKE`P%-amG!#qIh4-fX7D!sxaMKkLbTPqcLsCv`;%dPmj)81>}%PnC;*A7t_W(t`e!9zN8DzqO$M+7JGh zTJlHM_)~xQXV2n8;`mT&{{OUv|E&7=k3V%`CRT>O`@(YOE z!+#W?zv{z(e)=K$uzrY5e+xbzNB_`!{=Gl^SxWwM#!uztLmK*T>WZY;A9Y1RfsLNk zfPqQ>j~wzZb>)B4AO3Uy{EfcR{kySnm`SNNB@!t|f|!ygy?Dc;BD=7Z|@F$oVKO)s1 zLv;S0vzF!z9kp_woN16sj6I~V>xTt(3D{!(D&i(}`WUk~PMpzHQFyT2+-sa!-YZuZ z%mbS?KEJ{HWr8meWb2js?XoNN$xUEcAzWxN2~giH9!|`V^pxbVG#n1QTV#*h=uHboTV{0jr-9n3>dRA;pY*I8 z>~=pn(YZ0XI2xYoYjM((IZ<$7cQW4ssZUorCTguVBp1#&UZUMStxYz5Rk*{St~?;r zUC=GG^ZfGIU|2YNM!fXgfPHR$eLrhCsc3zF0$%7@=y371?t!kDXc<`BxO%x3y6xW% zpYYrr0)B>EbwaDG+y2|Ep18=?g zcOl-0`%5j}yL5Yg^v1Km3sNGF%~<5`i(AJR9-eDXH`E_Q6ob!=tB&Hg3H}?;!W^m> z_4*xGhC49PxvDAIwJ^vT={fz;%|%5d)dF1W5!5@xLsi&4bj}4)SvwZQ@HWvRbUq+H zOl)EPdrb-S)^|O(8mWQdpOK-M8XUy#9kP`bJTA%l zmPA+@F#o)?^M=#Sex9040>R!RE6iGwB-%s3w%k3HH)Vv1AWE%_EK%{TGuA~C1w|J1 zB&330i^D4YCBrHpT_RIxrVz=vTYr+r2HIA!ECi)mt#sDmVY7`;vFjCOh>gbg#ITAc zGzubu{wWW%v=x|(@a#2bTO&}QnUYBSOhc%LQ7!~rw-RT)3in!dLEZTDgll|6c;mWn z(IAC)24uWHN=5F6k2PlVZ|c?feOO>i%o|&@qZEtf&J@42u&#AH*h-Z&D?%30@vPmS z^vJ1kcI{0`y^4-wQ~BHsq?-@1_aaM)Y0Y$%_%F3ek}R+?yz&p2Z^Yts4gS> zDR4*T2l-S6H*OIc1M`C@9|2{AWcVY7C~hiA$N+XNpq3aYluxfW2_ciBpw@hIyeW6UHwhogRuZ-4E@L_ozjyPi6Kgoob0fz;JRR}QmMGT zsv@!?7-spqc)!#9;X3RxV!WXrs4>s=Upa^AC*I!kiJ81(#QcXVYh;iLd7qOJ|EZ+k-l7zfTu* zFXFRx1>KNVkx_Vy0qD>o`}B47Pz5+?$0P}nEBt|~&i35QCGf0rEjdfZ3E2u1a_CYffS8G)@@^_PyGUx|oj@z2P-;?& zjY?I$QB}OZU<8*Vu^ZAN=%a%o(cPBh>BEZ3D@x80Yg+Iz<(f6=e<#NHFj+ypq_yXL$*sW+6u5l`R&np~f;3XF@S%h<9EBnMVwN|3US5 zwq6 z&+FMNax2{2A(j?4P9m)!#WFBdG&hfGfQ&1dMhgk5=_%^iSIBx@tNAWO^J)L6uXZS1 zSM}rP@2E^SH{wN%5vB7HxY7vd9h9Js_hL{K%aIA579skDd07dk;ftd0Jmg(whI8Yd z?o56M6M_egm&wk>#ry?ZoUfQ+f?nFF)(|s$M1V~zz+-32zMObOtvHyoLn{`o3bc8} z!cjp|$xu;FuwJQ>%}@ax%p`W~07@g8Vn5V5!;TquyOVl=e`%=rg%L4ib`~8~K9*e< z(Ws#3mcS^xiqnh5{A3wm4os+H=IrNlxi^?8h55F#fnEAnz%MbcPR11s#uDZ`GVT(b z_Sgt^V*`z$B$YQV)v{%13OZtZ`^ZxHyu1*gF~JH{{s>1viCu#l3E5;RdNa;I+j5*4 zUWI#X4BjN8phjLn!_6Tf#VGZ1;6sFGs)|5g(V_?k$v}}!4&Q5;)p&mCUZDDP+Kp4i zT(zo8SchKmSP?i_b@8#ij#>ZQH9di*b{&6>gN5^G?9*!PF>LO<^5xLDAK{;R$Qo;* z@T<}DRHPFwJ?lS&3y$J=$}+WS-Y^#fg{eUCLt69?<>uq*p0 z&ySs$G+2_PvB3uHViSBz%`c}goU*C&MpI8^U(Sh_uej+3zc0n#y^u_wD4Kbd`L%OS zx^!Gj01X|@E@(`3DMoDcDMR8@vN$);MsT`+Hw)9%53fy6-V3gu?AtZ`W|Q1}4fRs< zlG7aC(D1B&Vt!(t3XSGH+4LCmf@AOuBy6RFbxU527}z#HS|3=J`MbR@gIFrJw|KGl z6z29&nTNx2BWO_&L2>={iNeAKYeh;MqEecA;^zcrVfMbM^DVK(hTOH~-gvC}w`MSb z&|sGBV{93HJBDr$+voAz4u|*0=Nt72-3d#3tykr$hiw|+iZ!~nv!Y*mo?Rh$^*i@H z^Y^lmj!P*RT6as@4bvS~g=gOG43Be~n@{!p?s&3;$hB?_gI?Y{pq+!9RXMpT3|Z5B z$VK*pu>;6EtPZQJwX|GGn0_>yD+ujl8XlnSB^u1yyklp3E;gm37}Y;a}pSP?E^y#}u2P4BT$~+z>HRcWVGGNH`KB zzUo7AiuvP{4}unKiQ1@T3S!k7IT5UfQhB$4%bH`Yf(ls0QSdq|rOY)zs79)>tZ+px zsNuBJ>sc;eET@iJclDuuYoa>Yhd&4KxqX}vPBV8sm^YYCB<1VMh0f|lOY)iV@d~#C z@yl(tb)|P#E&<+)*H^)8Ie}GmBp-}Uk+Z8=8Zy@zS9Rw*UwD$StZr$6gx{!w^Sl&< zN2rAlUnDr8N_?FUe0%X9V7GkrA8DDhBt7-iufA`kwz^7e^*Gb0chPYPz-zrPj@F&- zVaUe@Pi&I&Kue*#@M20OJ>ziH={U1pIlR;d?F&BtLOS}@*-_rhv<>bo&yvOIFdg?~ zEB%OEb4;;`2&Fd?r>84&iah!k&sVN>vWc!>xtOKsim0^2#u3@`I8S)6I^=fTQ(sgK@c~Ui; z+odTjUAj5JNu~Qh(oV4~GG2Jt1c(vPPV^MAK$=^EGnn&}Y9DKGx^sLZ(Z+Zi@~+@Da$xT|WcdHt${rGb@x?6-^MtpvD@wl#pEA9V~H=9a{CXIi`>IJG&j;Lz^}C1hro1DbSMWceMWfjw{7Q+-&#M zQNs1_>W^i%V9g6~&EYX4*5Z1i2!3{oyTb+a@G2v-U=uxF*?<}6b!rs@r}O9h{OA5s z!Qk%TR+lb(G$y~<3vIJVjk3l>ZO_4iU2E5^UZM{KR!INlY1Z#BGjIEx| zCSV4E#m6Zy)2io1&0ts(L&bp70>tbM zmJ^9stfnS56vi6@c{f2dSh=SlMfT0P86V|qRdAaMawwveb-7z91WK5ZsIZbtj*%Xf zB;9gV#=;`|aFHoh#oCj*XJ-&PBv*D}YMj)O1;d8lwIuE#HSnM^FbWK$NteqAZA$=& z?|Ls?7MIsk5hI06^AdSu^pC&;Z&vaKXdEeOAReb^$9uR$?=k==z7hWQ(D?~~fpd*| zD_pJmj;lB8^)2N{#Rt$w8M*+}+}{G!pvU?tK(DBx`9&h+0%QC5%+?LoP#Eb5dHBo} z{&iKs=6MNo#b7g|$<0dbnx}kyKTJ&l;H_%mE>`z`?^0|$R?=9Ry>c&Kuu!+bz+V7` z*`y9UN8E+xedrZq+Rr&7-`D2TIi$S?jY<_nZ6Oc*pft#o$rX`zpLlS&w`yMSXWrkI zVKJ;n&gS%)YUTd9PeSAazjQKOW73}s`rHVoP#|BPZ-i`N`E7j zi1F)#N5s=fsW0_wx@bwUDFBInc1(~S?ez^)f~{D}bu7^r+`@vq4T6|E6P@>&&NT-~ z^nnS@V(oH8$RO?g{BE$cBzhbAIM++yaq;y&-sk~Y4Yb)n zW#kIIDTI7lc7tj$R*WoClG({*oao^~gnVv@RsFlHEFpO_dr(nEC_0J&4^YrPyo#b< zS{$a?bt;|0?m~yZ8dUX8@j}d`|D>1_W6Uu_kn^eUB(}iYcUxJy14%k@s)jgxfcfPjw#Tz!zh?+N*Hb?Rv_{wiTc46W9eYB7f;%l)1^L=V103ZYUT#!Re zAc^Kg>lxf#Fme*1y?k)WI|9ti`{i7-*G`hfc>^iSMM@zKe}sS)pvY?QCaxBfgi z74rD--9m38Ti3JTY|lq^?Y4{T2Y&GOt+R;sL$J?rHx<^F@^`p7&YaR$_l|IsuvJGsRBidQj9blUnGUbg->oRLjdZzMW$n5R?gDum4@J5b_@cnUTPk`$>a zT4l0%r#Q4-J4`+Iu2&YC2zNtfv|#~SdY=IzClL4;d3;uRq~CM~4i)4;yFrl2GV*2x zU55zeXkn-p^P?`%<&FC*Bfo;a57t3VnQ6^z1!R$7mY4|T8Y}R%FO>KYUG$d0sQtzy z-R)}w0bQV1Pu#p{&|1{sUl`Y>(-ICNnP90Bni_Sqn}lEMK-7G2ve+2!X?OU}=DXb3 z@_RcY{t*#ApQsrbG!`mJFpe4RdPEA&SA(`MA!i{wqj+qhM_SeMLR|+CeG=81!iG42 zfecZ6r&$0bLL`{PyC8*RMUcH<)l5&4|D4h*VD;%N=q|)Ke{r8tVaFs|fc{7WY~WIY zk`9en&chWasL{)Q@-5!jxoYw3L`Gj%6%(Il?QQ;7(kre^|8^=-58X-Uc#?3t!z9|+ zSB9*uQ3N(@QN$QPOda3Ui!MzYEpS-Po~VN_Gdhs8F5hkEtw_rqg-+F}<_&A_0WV1o z_~y&!Z4yCS&uEtA=Mycu_UgkIgwkmE#9Ycd+8uU1A5s9d3iT8$;Ph3D9G%mK^W>v` zfdRaF;$T_7ZYI8OO?up}j%J!Hnk#qIJ}qAa{C-C*e(#xiTR*5b5H*A$3k(84?%IZR0mOGCLj?|@!~bIKuV-WBuZJOXMh#e2xnE>xk$hap zUM*F|R0xVsg)il8dpY$bi=P!~PohEeq)uv%!K+3@4*!b}WtEQ9qJ=|Z4fgt;^}1?N z1+l>Dmge`biZxQea0XZaFF_U1H4#h(l8W|l!ergWk0bi>RX1m;*>^*Ta46=U|C%p%fq2tAk*26~6{h zJ-0)~XDpHQx<<9~<8zkxWx{Vb>nXEJUFuAn6yz)ckDf8^*?`fzH|8v+|5{blDVCPv zh^&+z7-IG-&*#O~ZQii_Eqmf7Vj`XTwEm+0&Ht`JI=cuHGz0-fQm>kg9YBj7lGW_A z!^y1Cfu|8F%6c&y}Pz?z!8=vutGp^MJehaP_EK^6zrH-CWIRm?x8#xu)=8&K^aK)VLlB; z4G6+1Yeg76)y~ZbM4fw}Zvek33FLA{j(ubN`D<>&SD9~YzDfO7t&4EJ3xP*4oPoJSR0~TCI z=m0dVgo(QXq1Hh~EqHXh+z8?wTHUn1(<3(l#U-((rhjk8RCxz9vgEmLI0#IE7U4(? zf8AeSIi1O9rl+R;rilVgPB?_Z*N2=xw>@$Jy^}&#L#5G%}XA@_K>x;~Kb@jp_}X=!}{-@so5-LznIrgDLJ42IjLG z;nmFQ^T#;%z!C1VW4je&etdTHBaw}9MxO5rnV zT;G)ThYsRJ-~A0bW^v;z>GonXa<8W^2OVfvjs+qmd7}03yf6@<}D^7H}}5$ zvsU3vo_d+{Zy#IU@Kg&m=Y?0-%_BYClI~vN0l@%f0hZ_J8yJCuIE?HSp)_pQ^y;D1Z;F`lrnBr|R%mg8vV;{WCz| z56u13X<%mjYdGy+O@Pn*{YOdQ&-y=&0~Wf!M%4Z})_+5-QsRH1Rz_wqh9I$^!ooT67X?}sQ!TG|32xJk?rG_ z|7+6g$GX2c>!08J&l3TEbI57P&kVzHmN^b@px z0@?o$f&C|?{0ANVC#?LxvC)4p(Z6Z!C#d}!&wg^=PbmAzV?R->!=GF835uQm3)uWG zQw;tmV3UD~jsCxYO+RJzsFiu=b$da?cE1P|J8?h^UikZ;^d)QTHEZNQEJ@?zs|i^g zC?j{Bh@Gj|@6BG71>Ry{PW91I-VzrTZp;ZLP;WoyI-e-=BFTq|aK-&0JjAFSkPn+J7Hxu-%S9 z55Onm!E?hZy?%l1%KLnrsJ+Z%zi@l|FwW($ z`(Y}&=`v$#Yk+EJ*Vi9j;K7itt7wI3rnOpM!oF-)*UvefZmT_R-jrT7A0At{*;v{; z{N8*~yVZHwUe$WY!_m-kBVxzu!+Y%zPfE#nnx35nevoj7PGR3lc3EP%Lr}S#Z7&X; z5iZEgd~0m2*swovM16pL2>;b_;ySB*^9z2<(ew2j{`_tCeB#{sAtQ6+M*2Z&?WBXn z<|6+2@Y&-ozxIvi%5!1k*3+ru;cei;(|O^xwZZNuDevw~GqT#?ek~|>=Ge{lLH|Ch z_m^+vsGJ9inu=|mjt$;V`U_{(FUv-g!?8z$*!8Lz5<^6XJVGc1Wdav8WeM433}eX z6($MHMUCorxC$pnsp^+ZX2>U5XN!Hqs=IUQ>v)P4EP?JOrP^3$ov#+5CD}}f3rG+H zXwaPfzzC9?J&+a&W7F7U8Ll)c^TRrJoyd}=YB;rp{Cir`M}#cY^3~k)k!snhkIj8E z6M>Uz6@&@Udv0u%p{WKGilKbs5xZtox_CHoV^$XJ+-JpLIzA*Ep^-|40#<;=R?YH$lrr zb3^PC`<{@<>NkiCO)P|Z2$ZUwg_cWk12Z(m*)sz~B?FW^667bcE2o!JtghCa7eCyY z)vzw74wYAzVvDqNy4M6Zzn`LzUWBA1Ce7=j5gxF0@SB%m1HnR}s+rup8f6%tNn`{; z40Eel&$}-PNL~ke)sG8m2{LlmbA*Ue=S$I`1;C@#uI;55nMOpm+Ngxm460TkCU!5g znjV@dGs;@O#V@UDGo`Vr5vrn9xsM@Up3}`X98q5%86$64TgUSj88f%YGRCJlX#r?L z@{?hbJ&uefiw^s>aN|+xS&p@i+UJ_*7mLqWwGNCX-7j+KI1lb3Cs?(id;LICzb`3( zX_GE6*pPJdyBR2Id{>%B<_hS$H2|&LVk#MGAZVa+c)sL4Q*J3TQar8~8mmyEx;bd{ z8L*Ni(ZB8o93RAFBPu)y_W?w?%`xrL%-64z0loxB*EYZ4Yv+&&7|bGt(kx50v}M*T zEDwtgxUFl+@R=Z{8Kuq{(0bc#hEmR^y7o#h$zuYV4w`ns_9($N*~cskuW@KWxyX3L zciOHySnbtR)z%tZ$q+QAtfHh&nCfvSokH zlo0RamuM0kl?DGX$oN}AXcX9es0DnyEt!zOX&H<9 z@>}Z&R1)-|eSaWrWd2#~dFIq~qgH(@ zwWzs`X`#W57gpOR%=`JHdArE2jbl)u?Sx6;TK5KimLs8p6lMZoZROZOcIs;op-W=^ zSjM3OfBSE#!-a^K8ix5;{Q7GIzkK7uVV8O(iV4+2%4g%rRetbaDB#OaGuiXcZHne| za(fW-LJ;?4!SL_VL-3WLfa#Mrr@$`EndSQ^ku!}69V>bl>!1kb(qEp>wohoP1R#=7 zr~H0ze!SV~VIWNh#&w*34sq?g30q)eD&TDFTA<#uVbDPlUX2>~O$4xqmY&QjCSeR2 zN2(G6q)yDL;RV7OFO7e`WHZZl{91{Z`98zMogk_!KrYzVYpEM+_BBhXJB+eR`UsH1 zDj0>)*a)@)CYiW6Ca86T>*s)ZG8VhsJTozhJc_7NRZQMsik?|Og=&;hd>LljIVH%` z5}08Q35IfFR1fu2q}IsdH(+7YZ3qqIW{{K;UWYjoOR0ND+n0EN@|%(@2q^7slmdZ7 zkt|FRF_P$dlL%=Y$RFt>oq}5$K--9Vlt7imx(op+)oL}tELY5H;A|_d@~eF0`vC?z zEd(R%_iVZ+UxCYNpKYzfBd#I8AR=A4VYBg6suEaCshRh;RG&Wqh``zsYdD(|GMN$8 zv75m?t$Cm+T#*}CUc^-rtO7@URHTb=z282x)j28>5Y=u1e%H}X z1pcGr@#TW2IrY;{?Er5^9!Bc>6)`_ z`?|53E##i@c6FySu{XGnAD?{x>_=zg1>NO-mV2IGcTn76^WmZG?hpCyHXn~ZI9$zG zY<8^P+PLlj`y${JvVdcb+V>d7JKQ^)9{ij4oB!bZs=Et52QQpu%>ApeDb9e~G^1QT z;%NPkIrath!H=HKQIQi^y5`Ixp@5UZwKf;eoD)8m-?aZ$Z`I#(%8!=_6jYyEzDIG2 z>)VDEqFa(AZ~gsLoSd!dWCAKIwbK`V{MTQ0{otpj$MuXiqEF~CiTbX4 zFtI{uTEyY$86AAFBF`ppbI;n^qFc+DTjS8HrX?kFmoMOA^@V%Cm~LH@+p=Ey`nt52 zr*D|q`h0Kvkk^*29Zk%r`E}{n93UjPu-%-*bcK zCBIvFGQ_{4{uiGtd+^aW7c49KBK=%1gs9AkJY(tj-RDE%^XZL>wgICjHZyIr@!sEbMt9{isbgAe4y~Ny^z!AYsr&huau2>v&~r9%?YJrP zr0v9p#ZeAvp(VR{Z|~HKcl^nfxo&p z7x}3e#&~mW_pXSOH*KEO-&@ICb<_5bP6cn(ufrb`Cphevn0;>Lf$T#z(vC9?=Jian znEW7VfrM*vl1%4O5#2V|tu1}g6Q%AYvMU@dzFx;+ZN2^O?0tvrEy|zA->-bm_v+4` zx9@)ZnPR+v`T5>=tN}mmK3ehboL}V?87c5!5)02%N5{Yb_A)!xy>;njdk!{Uj4z%h zUpn2EJFYy@$L9{;+h2Ls?)?qT|Eu3uuX`?g{PE8d)kkVy3V+}6`NIwNtX1`Yrq1Ln z-sSjx$MX%>EnkV;SuS(wv-kSX4ZpW+zF&E(`1otNQqSksc?X$)Y1u>=iFtSJdwZeEb#gh0|q0uhkLM$(I8*`@1ObISgqS(dIcQ>rzyTHvF*9J#7)_lqXc;i7n4vjvUbYEDVg% z{b6WfVT|q;LrZf@bTK1yBQxN7dKB}F%t1$tqKR1;gAS-fQDpF literal 0 HcmV?d00001 diff --git a/tests/test_glm_utils.py b/tests/test_glm_utils.py new file mode 100644 index 00000000..b3d047b8 --- /dev/null +++ b/tests/test_glm_utils.py @@ -0,0 +1,86 @@ +import json +from pathlib import Path +from typing import List + +import pytest +from deepsearch_glm.andromeda_nlp import nlp_model # type: ignore +from docling_core.types.doc import DocItemLabel +from docling_core.utils.legacy import ( + doc_item_label_to_legacy_name, + docling_document_to_legacy, +) + +from docling.datamodel.base_models import InputFormat +from docling.datamodel.pipeline_options import PdfPipelineOptions +from docling.document_converter import DocumentConverter, PdfFormatOption +from docling.utils.glm_utils import to_docling_document + + +@pytest.fixture +def test_glm_paths(): + return [ + Path("tests/data/utils/01030000000016.json"), + ] + + +def generate_glm_docs(test_glm_paths: List[Path]): + r""" + Call this method only to generate the test dataset. + No need to call this method during the regular testing. + + Run NLP model and convert PDF into GLM documents + """ + # Initialize the NLP model + model = nlp_model(loglevel="error", text_ordering=True) + + # Create the document converter + pipeline_options = PdfPipelineOptions() + pipeline_options.do_ocr = False + + converter = DocumentConverter( + format_options={ + InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options) + } + ) + pdf_paths = [p.with_suffix(".pdf") for p in test_glm_paths] + res = converter.convert_all(pdf_paths, raises_on_error=True) + + # convert pdf -> DoclingDocument -> legacy -> glm_doc + for glm_path, conv_res in zip(test_glm_paths, res): + doc = conv_res.document + legacy_doc = docling_document_to_legacy(doc) + legacy_doc_dict = legacy_doc.model_dump(by_alias=True, exclude_none=True) + glm_doc = model.apply_on_doc(legacy_doc_dict) + + # Save the glm doc + with open(glm_path, "w") as fd: + json.dump(glm_doc, fd) + + +def test_convert_glm_to_docling(test_glm_paths): + name_mapping = {doc_item_label_to_legacy_name(v): v.value for v in DocItemLabel} + + for glm_path in test_glm_paths: + with open(glm_path, "r") as fd: + glm_doc = json.load(fd) + + # Map the page_element.name of GLM into the label of docling + for page_element in glm_doc["page-elements"]: + pname = page_element["name"] + if pname in name_mapping: + page_element["name"] = name_mapping[pname] + + doc = to_docling_document(glm_doc) + print(doc) + + +if __name__ == "__main__": + # generate_glm_docs([ + # Path("tests/data/utils/01030000000016.json"), + # ]) + + test_convert_glm_to_docling( + [ + Path("tests/data/utils/01030000000016.json"), + ] + )