From 50d2ef1ad60c92cb8aba6c90acde1f79e7cb889c Mon Sep 17 00:00:00 2001 From: Yusik Kim Date: Thu, 20 Mar 2025 09:14:49 +0100 Subject: [PATCH] fix: add pages to DoclingDoc Signed-off-by: Yusik Kim --- docling/utils/doctags_utils.py | 39 +++++++++++++++++++++++++++++--- tests/data_scanned/ocr_test.png | Bin 0 -> 58035 bytes tests/test_doctags_utils.py | 11 +++++++++ 3 files changed, 47 insertions(+), 3 deletions(-) create mode 100644 tests/data_scanned/ocr_test.png create mode 100644 tests/test_doctags_utils.py diff --git a/docling/utils/doctags_utils.py b/docling/utils/doctags_utils.py index 59f6ba03..e3836c4a 100644 --- a/docling/utils/doctags_utils.py +++ b/docling/utils/doctags_utils.py @@ -1,16 +1,49 @@ +import base64 +import io + +from PIL import Image as PILImage from docling_core.experimental.serializer.doctags import ( DocTagsDocSerializer, DocTagsParams, ) -from docling_core.types.doc import DoclingDocument -from docling_core.types.doc.document import DocTagsDocument -from PIL import Image as PILImage +from docling_core.types.doc import DoclingDocument, Size +from docling_core.types.doc.document import DocTagsDocument, ImageRef, PageItem +from pydantic import AnyUrl def remove_doctags_content(doctags: str, image: PILImage.Image) -> str: + def from_pil_to_base64(img: PILImage.Image) -> str: + # Convert the image to a base64 str + buffered = io.BytesIO() + img.save(buffered, format="PNG") # Specify the format (e.g., JPEG, PNG, etc.) + image_bytes = buffered.getvalue() + + # Encode the bytes to a Base64 string + image_base64 = base64.b64encode(image_bytes).decode("utf-8") + return image_base64 + + def from_pil_to_base64uri(img: PILImage.Image) -> AnyUrl: + image_base64 = from_pil_to_base64(img) + uri = AnyUrl(f"data:image/png;base64,{image_base64}") + + return uri + doctags_doc = DocTagsDocument.from_doctags_and_image_pairs([doctags], [image]) doc = DoclingDocument(name="dummy") doc.load_from_doctags(doctags_doc) + image_ref = ImageRef( + mimetype="image/png", + dpi=72, + size=Size(width=float(image.width), height=float(image.height)), + uri=from_pil_to_base64uri(image), + ) + page_item = PageItem( + page_no=1, + size=Size(width=float(image.width), height=float(image.height)), + image=image_ref, + ) + + doc.pages[1] = page_item dt_params = DocTagsParams(add_content=False) ser = DocTagsDocSerializer(params=dt_params, doc=doc) pages = [ser.serialize(item=item) for item, _ in doc.iterate_items()] diff --git a/tests/data_scanned/ocr_test.png b/tests/data_scanned/ocr_test.png new file mode 100644 index 0000000000000000000000000000000000000000..5c6333e71f3bc2331e77150271fcb5626dbd1ad7 GIT binary patch literal 58035 zcmeEuWmJ{z7A+=qO7JMR5^f8B8yXL$I4z4!Y*?|RmnbFR6b*Rs;0he*zl5D^g_61#I#j)-WlH4)MN zi~DxsCm9JtK14)+?Zs{iDcFWibla$tDz~jnYVf`y*{QwbX-eB!t{?8ddJ1R)9%*y4 z-i@NkUeFeMM|1ZfOB(mYp|h{v+%MHUD)eR{yD*~6F~V`k=2XCQ-N3)gYj4SG{lWy6 zLoKLu*RB&jdq7z{#)p`Ti0GMvM4UqKOzvJHBCopHZ~y#X(7+<$ZwOy|&Qjx#2wyi? zT-*Nrj_B)A?Op$VyCD4P(7)f+ANcPd{&#u)S%Los#ecWLf49Paw*pq+zg6MCMf3j^ zRCrc$g^6i$b$)nucGm9e1w_{kQ~S*|D_dJz&6GVvulA*VKfSd!&T_zxk%{T;Nrs`Z zv5sgl-{-rCnN>4KR%ZM6@8ADs`QH8eA1zO0J^Fp2N^EO&IKt<$L;!(uK zD_7nux0SdZh&lVX!soKx*V}F_A>3xsA^DO)Y`i3`DOy^pc%w4BjiqH~_1ownUX?ua zL1vBo>JaW}yl!-KbS`jkaPYE)P)ke8WpTeRaxvna*(PNklm+I4m1nt4b&OQj82Kge zox(u>MGB((`-Fsq4xQnkQ;b)@M}}~j*l%xb7HVE3S`pOqDc&J+H%=igE{^TM=jiAk zzkU_j&X&~Gnc@9N+)6q+I;yJ1OOFUnbme8p#VHi(H#9cJsIgQr*=?>ZdJ3%F**p^( z9GtILb4FRYHP2$0zDL=>z@XuM0KPsa%%7u7$bVRXn{Ftv-)lVbL+o>*|#qxCBs>2l59%Jzo z6O{amOL*?t2Pn|{U!bDuBYLJD7#`k}W7b<>GaXOt=;*k@qoy{ zMO<5i(^}l^_h&@h58Bm@pW8|FZUS);yb|0|6T&SbA~J9Kp6JiE(5pjx#BSVp78uz2 z=etW(RMfn^#$3wr%i=mbdvEkvjJG5OFsX&}T2=0=5|@$b&9^d|?8s71y!XKM=MJKe zvUG3ASWFEK=_wzYnwlCJh1eMFCfYu7=FJ7O>JO|UPou?S#P8gB_3D-F(nNdMb(;$p zFS3VP<5@d7IlVkg87~*}rmr>g&uD8(FJkP4i%YZ1sZ*!e^=qrg{daotm%cobqMV}X zzAY{xap1_2BL@%iJzX+ZR}Wp#N=r-Qw_nJ8+$XHpXqcds)Im(;@R47DlarH+i;IzQ z;&+s&&2*2Hv~&xzub0Q*eQT)z|W6VORIMm@nPeR%+b-t&n3>>W<3-qPGlIhGV=ue*=;q}v^o@G zmVW)@vk6|CsV>9rVRR|n;_>5qsrSDxO?Dz0 zgd7ghB^nzW3#<WWAWrQ5s}WFI0cW>tlItfC71E}qP^#fseicd{8m#lIr2LSpL&X3m(OLTBzXOqLx#g`ve*ALHZj5SZ&c7m?CTfY zT##S;gt!0p^zG5JBt9OL*9}`A2J=777LTcn;(QS~(Vp=cvG?)_mEQ19^Mh0Z>ZdLG zO7^%g&$engPxgE*u&l=VId9C0x-gR>dza^oOcCv=F898;jF;L>Yu0?|%rTqpEiTrt z3vUloymRLcVhR5`I20lShJ|UY&JE&mnV6V_y=B$V@Il}lx6iG6-qh6eba{&3 zZtfBd&9Ca}M1?pxgyt2c#19uzF4NKyG9EG#D=X_*(Jyrk4eVk=Q`6+_P!|`M2tjAW zkD9-@u91;itYna3b9{SS8{J&Q++bB8z2aDNf^sk`HaO?Vaewi_jw~airZ2}%o{S;S z)BgFUJwt!EPRdWlVTwT3WIza?$E}<|uostCbHk6Y`&&Ubx25(XMcG`KK?f z%sGC0(qqRWd*<*5xEwM$l7Tk^13HWE8-D)$`TO@>o4zk!z97h7y1R=&u;$NPeArC%IEB3Yg4-3FtHtettljy)kHR}1l;!+)RK4A)Zj z1y7+`I?rLfq$32r;tt8fkLl^@O-)TPC?(1kD-5^A$$!4>c}^o4*dkIKEM+FV~|x%sH32L+#P zDxQAB=`X>rUyGqgahr7b1qa77udXc`8SEe?CZ^0tx*O%5nd%Y6A%Z816k^(2^tIkr zy{57<9=m>fV_xR);oqsLsZ>rYktoaLOXUWJh8d=TMw;~&k)N7hSnxYFjtwYaRHRdJ{6Wq3>M#}MpoPt7Udav7-1E3<FF3pS0|dIkYR#mYo%ZLC@-E)A~vmpxWq+}>K3;nk;J>KhnP5UFa4 zlUK~UMCK_H6(+T}gA57Mw{{7SrFLriq|!u)S<&yiD?nzroSLgmF$(lpx9iSZ8^|OT z>X|csE2xe}TPMlLavWCYPBE!IDybyoZ@u~1<>d%IyY!{`=-{LzljW&y!fNBo+Q4xlcD=pJY%P z_>+iqle8^4I znv%t-#H<~K+}`2FjT`erH5nUBKqPf7zUs2FUL0LPL9tRHJq0#(AE>htAi7IKG>@&U zy0eS|tJ`CvqZu^|@2yJ-h@kKuIK_BJT3Tr^ln`Z{NP%@aJ*b$hGXi?i-^E zZTFuxGuF1D4#apVXiFWft?Q35J_hL%CvGH3$2q@UH!3y(jwrvR<6R}CVUr2=%T2{$o#0nOJv+$=WPwWL*ge+_yUBz zbaSf+%J7be*#pG(W}@oIGUG?t3qRanoYC&gH1NGz${3-$xw%>8&k)#sT^1EJ>eHuz zlr~(Mp(?Vt2WvYqg80@JxCuD8!H{c#iXXDSN~Bc zzRRmGstr&(($;!-wRjJbxlapB5N3KWJ z#KpxAuN@0#(-jZsoBQ$OCQ{@?ZF^~{i1hB3Yt-3$51qL)eD(e}qW;}^_KRbe!y@0m zm(teO-a3*-zP7IwMevsO^Z|sF;Kqzdwvz|-lz`nyZF#MBr>#}})#5uR4!!pDq`GdM zSRE%RCFOaJKRD-~hcSQYhu*9H`bYY{&zF8SewJ}NFY)+s>V_0PLvb_{WvVK*&3St> zu=~^+AU(EY%v8()nb(9=Wv@csGS_lA4ZB-HPeD~APC(;3_o6qJSL*h#U(zVQ6Rw03Q*dg?}@vQ4+3OPuTwbv7g6GyB9E+Ov@pXUo%Xv z{P0Li`<7P^}5@E4Ck3c;#tYk#^bcIQqx z$miDjv|zVIIG^1d%5(HX)UbxnG7&)JZ5h0IluuDw!}|-H(^9^PHlhDxRTy zP?(pOmz8A&Xs41-Ho0%j3CzJ>N zhnQHRth`zwrWnK-&`*^8G!+$ip;ctFq0__6iyLKB&Y_$}0aw!jA&`BNu<6(M96?M| zQc@PueFFlxEQYE_MjlOO6r}bzM&qI2foyi$^zQFRCHE881&vy|MCH7Z_W85Exw$#g z`sT)lj`W9sfF-0|tOIuyMWTDb-08`9)7|a@hu>Rp4XzbEe)43rEv+9roAV);Sr6mx z*Oo^G=shWTCETzq_eGZ?{O8oxfZF>sefN=erdJ95MmVc6N;9upmn;25Y5Vh1K~ zAiX0a6M;K_`?d=3;A9@~X$2TQ)~LjmjErpYim|r#adL8UQc}zD7N*fF8%UCeDS2(5 zKCO!T^~hD<;NsOemA$0Wc9cVUbx`8^o)jvMm|=uA~m@l zasaR;ZK6qv=GS!f2<(sj@+$Z4Nk#~0tg8)z6x@Dynwm5_Bcp0U?Gvj}8UaMt)=IOt z;hq3gL6;?bDf>audL5U;tiR3T&3Ie%*Ow+6N>CO6N?M}18&DcF^DW~zuDiRr-GcHW zE?DAXjM~h(*jgFDRDcp2$z-Um-i2os8D^#nVz`=gO6{xtB8PKGaPZP}uk&ziSa5~B zoScKBBLb9u!wVZ}EcqS)@!;RT7bF#N^#r8wCCm~ZEhD(8aKbJT+gr#eElE93?p^}q zpLn&I{Zl<(6S~=SQFwxCR#8cXlYZ?#L4!C|Ef_#XDSJnLv6`o#rsm+?x079Y79|ud z$r?tX_BvZzTP2@B$-*Lx(#VJRDRaLk^I*`|A1>ZnlPGXh!li3DZz|?n8azniGVS{C z(k7xX{-;uosYXpNNcG*}k#`;*N@O1I-gTwxN?;qiQL#Pwdvny~d01FCwntqkPgF(X znPbOhfn^Mbi3wR(Q^W3{y&^#;(cK?u-s`&m1hY||^L7#a=YuvBXC{r=x{BIRxick^ z$#hMd!I%x0uAVq?Vv@t~`}glac>cA;sIs?=3BmfIPfy9_ zf^+Y=_JgKrIaxMRSnsmn+Q>x%F$sw(K*{kB7D!f0#GwF#qaQBO(~FNh{{o_gHE(9E zXh~9YQ_H0(y>-sLjRPCYs*A?a!NHYlCrChGb|G~5JlbvlS*vs zN?m~9lu6I1_-y}-jnOlinV2N??>R3cYTuD#f5$d1ts>N{a=)DMXcD;Rr zq8=1+7x&L1w%LvM`xHD0|^wqRic_@IBH5ldge?Aa{lNg zRE<_jo2w(;v?;hKjRNZ$KQ|FQJj97ZjSDam}d3*r+L z;`balb*G9&HS>=D(}VW^iG@3olB9CaMtiQE!3}szH|JUCPqKjn1q2|OJv25>^svJg z6uWc3`027~kj(2~iLZHitA1DHQI$SD6cR4eCPd2JYg}AP*czIFp@0CQM8K|P)OQ5V83quSQhwb2kGp~CzZp#7Ms1bIgpWh~b788*>%AGtSXoKkY& zTcDOx&=C>i6Eu?b-TRg0P98pR;6#U{ovp1-UeZ)oUO)J2{n1X`cl02w zbm$i!p+^Sk#_eo8Pa+uWdI689hdvFn$AnXJO{#JwspW9#c}dt7oa43pQXK;oXts2s zaaQ=l(Cg1Agn1IAP2;@l6Nr3o`cf)suZ z#42$>AzndDzE_@@I`6p$aTWG2a#e(`BST-TQnGs9geFHx(JqP_R}R`uIE@nwSDH2)FvV9I#EO00kiO{x_={cu`!<9q z_UeaW>>WpHKv_`h&0|jn@6ON7)mf?xRHn{`JXo&c>P015w%i6hb4#_Mx>{1NF%q15 zf8T`q!8Gg@f@~00NGGa zNn;i1NTDvW>%B@+N>XLJe)42MI6Jm+&20Rc;e9gF(x+ae3T~}LRdbT|I6>@TH_kA2 zEs#M131C$58N2`q+bzOOTRR=xfRR>GT>N*d}DFH++MC z{(y+@7(IJS514{>vVfkSf3w1umOQ{uz>XBxARWf5Qmg*T=Fa?|u{!7!D1yR^nbi(! zqn{hT57$K;I96BKoT4QddFqB9$^tc|1NOY$WjB@_fP&^)JHJR^yEC86NI1>R%nUmI z9J_dX-=sQQ|KGn20NXWxzS?BI-T&Gc0qHXs>PB-_Sj%@!UjJsTEYNX=eqG_$ulhB? zifP(Ef$@btED`z@^^q0YIuZgZe;)xb3g0KfshSs7308PT;&E?CN_j9KU z+bw&a0Zjo0%3##f*1nK5msq5I1Pzm_{WUc;z^HVTzU`Y;^~&e^)te91)R#p*sihH_ zeha=K7GWYREbKbsna%%n>CXDdX^ux;;OLn*P&XD{5c)$I`Z7EWiA7R(h}jcQ#3uo7 z5*1!E9M9%^e_%77JJkXkqe*5~E*vA%AoCM3q?>(k{^Q{BA#q@E!r2rY*( zm5%zkF~(gamxYb#zPNVm9=q;)bqG(Sp-ccG6AKH=>f575T493Dnj>fLNR`jk({SmK77(RBzk?}d?roRyVM<}B_*VJ`V(zwa|K>0 zTE#^(>~6Qo?IeOp)w!u(d9~Myi;1Bx=I!A)lJg+U;l=>d8yURAp*k53@~cxfjiy z?)zWBn*4#*zQw<8cnH9h;>6%_Or*J%qdpv_%}Hw9lluK#`~r~r?j@;m@?55*q>Onf zWNbX~f%ReL(*{x?TM4s7;ktu7=PCFQjsAj3Q05lO;uYc{R}432WJV4)V;6~UE5D|s zyXWKMLkknVqx| zb4T#k5OW{niH7kpn#?;jsR1{Ubz?@W!v&ptQXiD=pWYWnW<>f_)VpqBveO1&Zml># zC-Cu21FiS$56#S2Zi)?f$-lf{=J~j9$~P&7j`R)6CYbv@c3V)jgKxj)*hEw(8&_0R zqB>%>IEngNvc`&8C6JDDKW=-2zoJ{ zTAG_T9PUEXPm-6;a`n?G}%2lVZ1WjIW`R?t))D+anpAA&$3x_v|W zpDOXvU8-SaQ&v_U{a7tYvB!_&#LN8FtG5Kshw6HJd-Hr*z7)n+I*T6j!oot6w5mgY z81SZ=np%!=dpaaif_mEfth_!kDXBBdh}(AN&yHPt2NZW-<>%M9Wkz~w^yd~v<@>DM3S|0n=SVI8WzFO@*r?HyB51>es=Siw5N|?(A89#X znmdWslzE{x8aJrhb0Ml#ePd(!=-!0$KP74BK@<6Kag?b%B*;#bCt!amy?*i4cQdDC zuaTx_94xmWgdKzeeLMD}Y-g_V^6J@KDY?`Uk&;CfY?10$k_6)2a8B46T2wc6Hd+q| zYQw##AZQ__lwU`ivO#DRt$b12U&o!EYF2bNT?)$DhsNGDG!5U8vxDA+Jn#>2b#*;4 z?XWr+h>ipmnyx_1hjLznJq->G1++UOe8WOR>6Me!(ZJ!`-n)e*c#tskM=JIzWnYJ= zaqW~qL)PGw{Jj*uJz@XZoQ*|}>m(=XAB_c53OHP2W0PAKww&(a*V59exlib-N$Q4d zANdW|t`63?F!CGiV)9e__U~_B^N5QxK=}bXZ3TT&!fnW$4N3bSm1Rw!8Z)QeN;o>H_OYRcJHy=$oL`L3lul zq#(Eta}R2nKeG7~Ls#e;AXQ>k$B-8id{b1BMNO>}w=WaU2Rno_fxmf}vSQ0!f4VdQ zHdzzSzn)PiaNJOs9^TBGBYpIrka#Z_N5N^> z^kUC}CKseHh-&py`FBS~&Sz|B?fH8&N&a&!v;=E>0>nQcIvpkWHEPzwP%5{VI?Xy% zRLsBaGuz=NLVdmJ8J{Sc_zmD>ZsCQ{m*X`gR}Ry#w-$MbWeO%5c)}c!x44;9PvFf} z0Zj7p^35*LH=yc6MSTJ>HRk8W=H_R~pbL}f8DN#biHZYP`c9X#d8<>sE6bN>k6Sv4nfoIW|Y@T|nGJB4y zufr@J{5NRvV<7pI!1S>H=WnbFPacJ4RD3`SuQgJHSjsFC@aO5;f&qF!9D5ub9HJ>1 z4pH)QWEc~bSf9IYZ44$&x_S?an_kfg=!B?_C$Zc>C>nBIjxUHj#@tof4Ol`{MEYw-y%v1 z3W{hZRzN?C87AH`NGfBkDT2`Q5FcRjSW&6#JRf1DL0vyN9RnmmLjLjT;#iDrvFBr2 zTH3x{PSDqZIqq>|Wjg&of4utd%9Q_lM;^6$ouA(k5#6v&;+c!r)*1_x9Jz8sndt1< z5abRF%U=<3+cA3?_WKEFO)j5dW>**gjzwo)H> zjDG(v$fH112OKseo?~QWbUkNbp6Vyef{z0@f*h0=S^{^_30i4_+oqtP-~v^y&2+e> z?E00|tH<(X91)=IueaYU#*OUu;MRS9Kg6fqGaO)7O8k7{oW8~~fzC2mBb57=44f;NRf3u?nbm)MZYk^gL@O4}v2WjQjDA4YHy}#@w6>f9 z44)(G8y;>I6Sh@hqEMi3Ml}$-bs}Kz2bBG$LNPTC*r4 zTh1Ogr;oRs>{#>^-173svz`6)cM}`nOy5g_NdjgvlAr5NuH$yg$C$5k8nslXzRr)F z7I5oN4=_vNb94W9{eI~f81U$nAFa9n?fO{p5h?*jeg#W~2aWS93Us6o9z19e;jm#a z5TA}0UK%(E&6iOvI}y}-ZR3Iv98fVP#=skHdr!3U+p;+vprat^RoOd04d8O;@87>` zOOp(lOVI!Ft(}Jr@#nrp)UdCdS2L4`fD_;}O|Ow}38Y{oJw9tXxXLEJBQdE1t&YceG3%`8Ok>bp- zab;=tClWVpI1Wmv#=iJ+9RHkzWZTef4@ZGSFofvU{*EfT5 z5aNM9{XAB&y$4xl>jby`4&$a+N-c+k|3(4=jgR=NpP%HHFQ|2PSGiMKT6%KNo6Ph& z)MWGeW42SO1=dL$Cb_@~qi?mnFWh1?Dx%j3^V*KF{Qc?H@87@QIFvRv%Jb%;%e2}E zUxtQLpD|?{@R1kIEnbKX=}e<0OdA)1)&;J?$p$9r z`fL$mh-IdMeTv77=2v?$`OdCFJNaC@(WA$YUnoponC=zKh&jD5b@9>&3OSIs5Bof9 z6?p0lBmyp)cJHbm*B$>4st5w_%j~H~dl(rXO&11@&LaXVH*DTJAv+uBToMr20u}Pl z+lg}15E7P@l$1FczZGq?1=mE-^K+3=cAJRztX$BfD$gymuH3sF-|wQ$=4SI|>?s9)JHzqleA8)eZtm^nUS})iTjvCw3N0TaKMLzjRm1K0=O05I+KjVh zRQmVQQC?77y*+|>EMh~;UZkd8rM{Jam5LLcvr!Y4OP4MI1>SvpM^G59$$Q?dC8eb= zq4VhzySck>&6FN#V0Bzus9?NF*Yue0@t`yW_vP(5aQL=QI*VRufg)RJO1uPNs6(-2BIqOL~ z>8Yvv5>krh78Xe3`I?e7o&p%Xaj3f{YKsK`G@zlJ2lT6|UgyGt8y$ay{tMgNiL+}{ za6U3$Vn5y!#YT#L%iy4YQp=0g03onZrC3Sw)xa*}f2;WwGlq0XEU=+kAPM_$m7U`> zR0v=vM|aqA20E|z>Z@EB_zbG*p_bE|`!DX($4mlJyUym~Ok2}cmE%EMI{bFv*4#etjcB;!a)uXjF<`L@nTL!$?89k$R& zb3)+K=YEKPB42hSn;bz4cA-zStc~{GEQ4uByGJFWI%>goIb=)RoASeTf zT6hs^q4B+wkErVGd4zq5VXGTE>2>qu)IoM+_9GVCY z3R;|Km#6FR>!Wrw?a6%-6&gK6NPC)Mz#m%Y&lN4s!dGZkv>J=@41M=_PXu&CfKIjZ z<|(Hq_eJ277hb`+79DCF!l0Z?Iu^Mv#qL+(`XjXVHy4$=BjNzU&TyMD&RgZ*xZ34k zR_=Q9lYoFg-iLFw|Eg;j&$8*ff{>>780oH|=%$1}eKYV9qduD3RHKcfNYL(((WLky zBFoF~MBa7Aem$8-uqHKp2NA8RSX{zh9ldijU>7XXD+9hV(GT@%Lr2HQ6Q0ILM@N5m zA(miwy2isJ1$=Y&=!c-7{aHe~MV$W>^Rx8dW-=p0Q*?izRG_2)rEg;aW2M8!rlw3E zJn-&&Vx+A-hs;=|z6R{>x;GpaF@j%2Zx8_cN>hN1rKJMxi{=MtjF*T5z0TvB$WOt) z`P^&qMx<<_S`KqY&;c#BD!(fh5NjzJ5TsTHs~Ri9veLJ^Ug9kCq^e78 zgYNG7<7mE$s;UxbBGd|kdjQB<(LUt5jj8(qLqLs`6f<|<-NeK>hRx!1f{*IYQ%Z=T zadXBJH|MxMnS(GDt{^l^?xo%cHkaO4!{@xEK4hkn`#7@t{dtE{M)<@$N;;&5K}IKr zhXWtJ2LYPT94EVmC28TNx5Zi zqEuEIOAM`CVddjfzjolZ2L<`rvk4D#^{d4Bf8WCk=K1SLA6j%DVaT^mn**ZaZjJj$3_uDpX6WwMs!rd4!VJ zFX?1`GJ}quUJDE-Akz_GNZ7dG_OOerfCIXDq%|?=LVFAtya1l9y~@$M<<*>-tpMBU?2u#H<|Z@84TlSe&4VyBqZj2zF$8I>pIaMqHfa@Zl|#Qa72A zvDi3#ZtY`Tf}Be636^yNWCrAGc6=8PH-tL&8VC@Y=iaBRpFFW!UrLI<@7}i16tBoY zDQdiR@Siq(oB}os>_eodKtC2nKS!8$W)mD^(4kN}{~|S`$2T%ELg$x=9@5R5Hz5T! zG&C5tCMSLE#lCLDo`o}D35pwH?dO}5zFVyw9c$>-F>p|Gm1@|%qiBJ}&L`Cr>j5-7 z-jS8idYzyi4EhQEBf|bVk_HaSz!tgr=yNFuv1P zO$Uhc&{{O$_kqBO>@o(EHOJ2q+TyHs2&&c4X`=}$+fal()tgJ<-a5~I?2~8rsu^Kt#23T3F>eSccG)l|L zXd_<2uc!#~HDc)`4<|r@qV#@GZA`+{Dk(WFMvdVTGt_0F5Y`h(aa$?w^B2EdSU70+ z3g#X{kUa_yjX`9AXARIf1>*V9Zq*&$Zfa{(K4Ev2`%`t^NauDxiOWF1zEh7Pg9^ZwsBC0n;<&Y8=kG+K{VE9$6y|eZk9@vWt{-~nM8wDeISnNy$%B3jmN6f) zU*-Y1%RVCn6Zk*&$g^CnhGaiC(4@t%;XOcq<^DLFc*&ok)pO zSLAdMV8X4Rg05)fFM!H4X@y&;|Ezl~9>8cv2%a}*$=M*$K#2pmuCA_5_n&h{p9%e1 z-E5`m_V!g2*&r(=f0eNaF!0rtl}FAY{$stki_P^c78ZJcLO1|lK*Nc%j(Eh(%nZWL zaP;fD!A}IKUOjgPDTAQjE3wZkPwrDL3#kp~XQ(k^E_-$CA{M}PW5Ld98Pa9Vo0AMW znpYvCLwRYzYaMJ=8ejIIFRPGmIf|K(N3Hx7OndnczqjTh_eG(>=s;H4fzf9f(F*i| z#lA_>orvUo;7CXUjA~VtEvlP<0l>{IMN&vueDGM~nqlkf z3G8j2o?ryVl@dmB>fYaXz+PLLnW=Aj^r$7V9W4g9x56cm+Act{0mETqo5N?KZB{>} zWQ3tV-lea)(E@)Ux$LG`^HGO;7kV&uAjZ3XJm98d;^sP zU7Lh5o$;WyM{o?n&_I|t>SlF=UZsWg26(u;J-{f#{Qc9dx8Q)?=++su=y&5GLA1{% zH2>N=U-Oo=K!?I;msRxf#*v1YD)erKYQq>lld;1k1;5Jfy?cvjXE`BT!FFAZOo{{m z*(wMqlq2H4kWT1DbxH>(?H9!sGsw(y8 zm)jV^fHBAhc6lqe!413r1W6N239vCr!H_uA`f-A7hU^1p-U?4cbhVCo* z(nygUJP13YPQbD}lyAZ`0Tu#1arx6en5aQV7SYFr2IpkU_YT0|Lcmt^OvwLk$>Azd z5eZX+!e$a3up@nz3V{N}A6eS6Ga7qyY=D;Mi;vPuE!($pKl6N>9ShL+qZkXO@5
mA-_WTm$M4rU@RSV53@KwjwAvxTC$@AI%={Wpu)LJ6y^Mr56gI zW}f-wYu8qQOyIMC!;UbT5wB>3l#Y=C^Nq_q7cTq-&I7#w=7R`J$!mEu*ogua2(%~o zJFJ#E*28Z-JOW~OUO_7hFVac=g~|>AlhXwcwE3_kCPIF~AKke62RjQ@hasb*3U3Jr zLzl#z3E>zzFJ1Z-lrB2DJM6%{`Ubkca8KtT03pg@NPX++3D#_5ZJi7U819NdE~C4A zrg5Kmmznu_z!DXBk{|-QxQO66s%9Iv!$j~P8zQi;wZb+=#dMae={Ex4k=0p^{t^%x zT1XfT`UtwQF&2bg5&_&mWyMXQC5|!$UjYFhA=wdHOAL<@6;j-Z?W*GD=5q)}C~oA2y`J}GFw@FgW>8sQ5Xjy6?fIT z?k+=eKp}Yk{5etg{c7mL&P;A`(*!@Zap zx(gQ~&xyUecY~FSvfoS5ao9WMN3>SRMg1)NRQVE#f_V$dYTY zx#*h1O6tOaPhb}HwpR-KVe>8UYK`?kyFWh^cxAf1#a%{?rvyBdc^f8 zo`xySsEdeR2mCFJJzqMXceJ(nF{-44i=q^w9hs<_6)&{x_X}~1G=Ti{9t;Ck=+B=& z6NZvNmLY2zBeIcQEi)6YZe?VK;}U=tqwI`iJtw*ga;#hO3iAQPU(3Z^`my)hGkBn2 z3T$$Q1j~ciRFh$fa9j%F(3rx;A@nYuHSZ=2ky z|7>lF3yg6=-!v}=C$Zpb;Eb>_(Lrpsd%Rzq6LPw091jqFe8ZF)pC;?N`&YvcayCjq zCv{#r_KL%VUlHBD&CEsQe*DCV{*jRn7?3>&f&L4e<%n<{X(!wyN-NE!OJ9i5QN;Y$ z)So~1Qy2FTUU{Cj{3q_=%_$~=8KvevKo!6=GR2@|0VESlhDtd|MaoJdWv-n0=LOE6 zPMsy%m5*KFviDH5H(QJxcF<=8KZcyXprwWYg&*wS#w!8`GCPLYu#Ig9lg|PGv+#iW z`THwsZtNqxACcu37$w|Me>t#eI&}87dJHDn?7-Z#{bE z%o#GW&vJ2agn^3^5*c>7jmCT*KBnDI(!l0RE%3&gx+<6bZRDdwllbR{G>IoX^X&`L~ zh~eSMvUI9BrUW$H$?*2Y^UjwJeEW8s5%(8gbQXDbH&H!ru2Dv?`CtHx))p|7inPCv z&m4@_TaayqqtA-~s-o_PV3Z0(XHmTqUO0>rMF4f94f{NF4vm{Lyp|&{1z0>bdi~}N zJwzDGcWB{as5~Ph|!`2Znng?qVBzzZ{3a_XO~~<_{W@ z9XVzZFUNPi$G|I>Nk<_l7JnOhc&LHHYs9CC>`XB^*XuCXU^~ba>ZZ#%$ra5XJ%*P)N-aosT?oS7R7qJMQ0%q-W8;K z43Ac}3*(&#v(Knb1n04ygVbiWpnh#AJPw#zpk-#3j*P&TebWDHl;;HLO%U+azi(O0 zb(NqY9sO(c?Wr~{E|~0+n{XPKEWx#f3G{L;G+qdJ3I)xutaV19e`&{geS$@`4fbgK z991o_dq3(kOq+kFrp_Hd?$d36P8LCUMJPZh(6Lc<6F!E)7o;G>x>la~t;h%%%F$Zt zwXfa1=Qj|^qM#FoLt>gwKVPoIuQDih{rP$l7HsHJ_4W0In#TsNcL@*;9L#I;>IIv} z{@?{EKW(O~+noEPALNhc+BE?6Uq5~jhBSb40_?Jnnn35fB1iM@U)C2G=qJ zfe5;>Zaloa@Opy#qVEa=3t>*;`srnGAUrw3ZKQHo&_z9Xk;rzwmGYlzljR zLp}^_Sd_zT6T$y9AYptD>1lK;$i@VQwcm&k?dirZ(8-B;hy)GT6j#w8i+LhGRn7rc zQz{~J_oHX$vXCiRAO6}mn}rLhi4ZIXqXY&KUPa+0oDl#a3M3@Zqu6gE)RgI5?pqL7BTK<(eY>UJ#u)03#Sc>CEY&y8~np zOaQb^FJ6@6R0@zGI1XUHJOa4~qKx^|m}fitAxmO}S{rkP_KyKOuy|F36A)OUV`9)X z5#z;tA}gp;dTMHUfxrDUu;fQ9W$<@QDPd_vy^lOzC-P!#Ja z(j5k0U=HVn-=eUhtzFuJ6Keugj1R^Pin<+pnt+l>bIN5jEF<7S2OKFgLKmFauoe&65`r()l^UiJsB|qk7$&=OjN|r4|s- zb3TUh{RrkwbNDG}fRG_|U9ZYConJR_aq&?Wrj{|`bYE}H~#n0wu#WiYz-w_>y z_FxfkoJ@m84#E))m;81R-4L;*At-V^Ne75NdY%8_zA+BK2onVF?R~1c1}igtC8w$N zXs!Nydk#I~P8kC1A>NBe!dcJ20GeGbI_(mo$y&t%1j+PG3`NRGk~b&@N*Li_w*};b$*D4wpfEP{=Ry1C`67gmr>1epJsy5~2T}QssmtZX znGsePR_Mo!it}M;V2}$e;7!((L|=uS5eD@duq*L040|OZ>5**W6pa+DUPHVEw(&M@ zb-?*bR%&Wn>H`tHg0on-njFI|G^2lz8pz?bopFLEESVohSf8 z!oyAV^_6jD$n6S=_wIT>b>&P;PxrZy>OL4T*@-(RNC&ijSYxfu1I=+#TN6j zYnDzZsfYkYdVfru2C?ZHLL~Y}A74SdM7J;F-|!^;bBl@+jXOT65>!&HAdBVUftQp_ zuim`zaWeD|-wB|?!J$wP_k1bN`ynI+kc6eh#Y2Y=JKHb{GTp6V@58D*`2gripzEc_gN5&NX8 zz=j376%SAQuV25Ko3q4xX+T37X9=zRUAuO1x}dQ8ZE-`_D&0$kp)~J|S~PS}H2(gF zzze9OpAT>Nzv%WW-nyc9-=4vxW2aA#fS7`)pqyid2O~nn#4l0%Ftiel?Srqi>aIse zgl|NC+WF2D!b^ioDku{9DJ1)#$GH$inr7;b0dsPmcUVu z+*IJ+jfqr7i#^a-iAP=0y;U5HhZ*o4m7qY?@O?ZfG^%mtg=#GyXfH#UHJ)X#Gm^O( zM8cqE=FIeT=ty07$QkPl=x;+O^jD$4Y)~{p86zBHB_$$xVB6wm$f*If84wW1W}&0| zcu?_LJ_^WyKKa4Y=O4B1fQB`rdhXRDHnS*r(f?R8=2Jil_DhujiP$_ar=CsKYaK~_ zSaulfDTo$Z2d?2N=zDDIX?T_$MUDlr4%jQv+x$=dx`w|0C>Pg*p8-RGp^vtuVn0C( z0upUurHZsqM1iV4`PjyW>=aVE70(Hhm+cU3Ehn@_pPW5=wg87jP(F8o46?qm^2Pjh zDh(c9g|hNF9=zu|a9C@+qJ+0UxG(6t{E+eduqV#qz?j-BY{|1uI9zFLaL@{DCbY&G zZ7)3R+Vmp1Y_3>U{06dW`o=keDg!$v`kMN77jXmu=8kJxcqs_v`z7{=uF+iACeQ7j z-$<3d(nXl(nq-6I>Ukl=e9&M9ZAQR>%$%Iow2vqpbxYpw3(E((1LUKxx80WS~|cO#28xKQGo%-I=mU5w*d?*5(9suD+g(tLQn;2-c_}%i*SY$Q0qbc%bN|! zJ<~WOM+q2kW{%Ac6FWygIbX{{tEeU7y_4*cjX^?OhrX*uncH z$e|`P*A`+Jpa~?zXId4vwoih%8-zP&4HuT=Z z;}lnvDGmVfQwcNt`{Q-H>NctZIs1=Txj3jRmo}A=ysetZj#@9HU~p_#n!c*}Sa8qQ zoDw7a;xT9V?JeEeo7jI?V`7WCnjcR7>e}yM7`h2}31AZ^9$C!jtYRk)e_T|0ef%

3HjP?kTTAf|3-w81@59e%Rnvij~0fcdvg6B*)wnTIvZ4 zv31vd;QcY*Kn>?Hx~2qF%=j>Ib+QaS;#`oW_RSci82j13ZlSA~rObaRUfmRY|RU$%b^Ht-kBqjvIOwXqa9mKglAOs1( zqfle9-RdF)1)aADjbBILX{j`ne`*nToaItc$4{^^LSI4KUxocwM@K$h3u2R#mPY7z z!|CT$ML324-K;g>D66{Fg6SfH)OR&f$Y+5N41?!l?ZCpFbb3 zegX;wMAnp-f>b|)!y3YXysRtm8YY}v#C{JLt2PH$RJK+gN z7$?1PjaV%IshbLLXApeHda2k|`5B!*%F3MQYq$XuTi-#i{Ex5%2^|szt`gxuFU1cd z3Leo>Qc}N{`^yj6AnzdSBg{|cH^WMRA@6rM2>~GrPdkPe5tue8(0HpmBHQ5Z!9cZm zSQ@fU#(*_gOt;Dp(5xx0-oF7Oe`r`3nC7ELgysSIUzjT3)P3>dMZWbUr17YN*XkG& zL=wZ;!Gl-t?{ z{K2IGr!vf|+7FZ;Goa_j+y%wWO7rF6({`fz6~OSoFu~t(1{V&&sQHMaOkj$46H5;% zmb`fRaty*7n)<0<+{SJ7rKM3wW;kgE3=hGKQ-K0vvys4|wqgKj4MLbzwIC;F7KABK z&!a8bcrjs607q{8bCO6G!Fvv27=+B#b+=R+9>7yOJDkwSrsnMj;Bg3&Oc1reG?kQ1toS?A-n!n?6QsX!CTs{FpJbpUtYZUCV+# zo`4ZvPkUMOio*|EKY!=L>&F%WfNhp*DI=W+@n)GD^WWsyqE(sOEmq!n{@kpwu@==# zUccXjdC*-yQX-w=BPTGUIalAR=%bjDn#wl*3?b!j2wY80oR$;4-|m`2w?EUr+#5Y> z@w{VWuj1TMCuK}K5x}3BbbU$mldw%=En9_D+Q`mv)28@QeT0>hU&B_iT6%RR1W?7p zA9yY~J6a1RP?qMS#lJLwFS5K+*WCIwVm?G{)E~%|`W&A^r~lZ(i@JZEd$c{^k$Y0; zN+$~ZfpZmTIvp9`xL)rjhsol)=cLx<6-Q9% z*l?|lq4nV-UN~r&c1H=rhT~s)RB`^3GTVC%QkmYkB+_E|#UZk>U#~6X+-W6dA)}O5 zm3o2+z?!mYbP4_YtU>sB4f}-M{$wL0qob8CnT&VS3m;Sm*86w;*nPiBc1TK2=Om@2 zr*DT-f`Y=;v*6YbZ%EiI374v{86u)*wfKT#LjA=sq1?JK$e-e{p~aGNDcJMPt$xll zK}2wnrhVObHy_XDMdKaln~(&ST=+HTu;iX4V+ZiEkFOpha~Sr-X6Hjm@;rlP0HH*~ zp+Ry2Gi(Kh{S5vFJyiHB7JB-BIr%@6xUt_~!L%0x6nQ%rfHupJFitjOOi^uHH05+* z$S4P#CLflW*wJ;9(ft18^Sa;@C!WH?+E*YivHiXza?9I1nF#r8FmCdG!=GP?A5@D^ zCoQrs>*3C9!@zf|hu5T8TUkZOe#~`hcV|tITqHi9GiOeZvJt}mGita%gRVqn zirfybh^8MuYEO@bDDy0BVQcjG_mEVa1+A}4uZHvWhDaflrj7z6xcdErcFXTT88A3E z`{1~w%>}Bh!AZ+AbaHM;_7dXH$Uj1~HpKP*Z+4BGc*U%;hiuI-MJ#d>AYrhev^R{{s{G>WnwAEAHr z`0?ZQ!%x4FH18dE6lvLj@B7`)sOXhX6U>wHi#QA23XK`19mXkI9xGTA(u+Xa2^`toJ)ItLYo;W5Uzps(Z;qwokCUEoZ|s9Mm-S?iXo|x z8B~F?i|Qz53M=zvZvJk2&DSn3=!lJb1k8z%!WL)eB{c>G9+9gui5!>y&Xehp^Ud`_ zQLdie@{3{0CYkvM9`3F`cW7FmmjuHZa~6IuY`Zf+XH?*0>vP77lt+x1{-c}}C-*HT zbjrJ|9*a;?=NcYpa5<22u3_2U1sA)Sr8t>srxp0^*w5^wt!03}(R(QKlP)F~qN2JF zmlhq5%aZ1#Z&z>m#oORcB-(e#lI_5tbKk6}#Qxilog^B*y!`jfuDDb=$)>IDgb|;ofDX%n{ zduI#jCU_offew`*X)sY~+!XEZ1sj%Na1`%)g`QHO=k-}AhF6bp#&NAct+23Z4iVjO*ynX%x@kpD62_ zKkIe171d~O09z4aA@Ar>Kr z%MEH#triyinWeAR@SJ%pnlEPZQ3DrNT=5v-^{=Gqu;7g^uaA>G=5?U^-vx8-yY=?@bTHznPF06a}NSR7ez#+oF20Dr{c;Je%|qg?=SyY zIX#SAJWo4s-9Q@ssF(N5IQ5T$g~ueMU#$io?;TW%51yAn#|VkrzB?<%y)JD zN=+7N4Q^3z83$B**Sb7I-%;pG%+AJJWSlRvNGJieM17PJlombUq?hH$=&GC)DGUkx z>(agmArJb+yiqeQZol|ONmG61DzA0@+(W-!Ht)(1jQvn;BJ*6cus*&Yz`~Ky_o2YX zG&oq>+mG|npa{O{TxE3HX@13CCes~Hr~$4V$4AHx?Heb7zsF(sKhG+U9GO*GYOdoO ze0AHKmZ5Jxylb(BF)DQWihcW&2fI$lM4Z2VxUb~~mZb3RJxGXdZjW6tJl!p6U3l~O zIv%t}N!g4+)qz3#FSrK`H%Z8rG&PCnI=?w%dOTsatN!O0M6 z%a<=->ZDJ5fhXzLrDxTj`LEdZNk4zf*>v*XRo553ZJII0+}X#R#`4_ZCKL%1=3m@JP7!StpLjfTuX{Hy z$tYbe68y*C5LO??^iubFBCIKMA+C z)OxNv7$kJ^ma}gyR+hOm3$6Ku`AbC4% zBwt<&nq#kPC>sm+Nr^$P7?TJAqVf$Trbkq6OO~>_Ni**VYE1s$?~okq zRUY@;pid}A3O9YwpE&f+l>H6IS6Nm&-6*Xr$h|b5KFi~l4I4MEPPARn{5WRj?etQG zn8(|yFBzT8JE%<(bDQJxZOzJ+C2&{xRW&oA{q1A9<;ri9%|*1S+g}4E`Ka&OqbdC~ zs0yF|`8Y`LZZ}<>_QP$@oMyWfX?<22KG)CO(0$Viz--|yidIOJd-irAvd94?7pg6aD(lzR&D@?wl!NHn0edmqjI_usD! z{lLJKEOYi7uUEKfZQ4lAi=5nyyZv0=Lbd3^XMHsjePUWzg-MNGV{~-*@?!S%-*7wl zq~cAoaG|ELYVUEu%Drd?Daj%>aju`XYVZ^KTil_rIKS_N4XT)Eq9Q+{n6KPWH{n4Z zbuCPLKS{|mGE1YEjL|l;e$p#-roMipb;7qF3gz$^E)Qd7$I!Pclsxr4))utS36F>_ z@HDknSDr9X)^O#4fc&elUC1 zELwj5W@nF~x2T}dvoXZG&DwuhDx)VICay<&D#xfg4&3nWpjm{zKh${7oCkcEDUD&U zc~JS$f){77v44H>AfgY+hqCF(E}rE6y?gInIsJeV5zZr4<^%R{>Rx8z1XdoE-7vJI z^c>Qf2+*-S-QA4!m>-2nJ*E!9zBb{lno6>AyjH> zJ1~Na75ISs3DOy*&UU^>&f(X$XVJE-Dwmy8-#FrSeTBh+K}W341?p$+=Sd(}mcN`| z4Ht|Rj+~8^Ev?*9-gh-0%E?X1yuazMSUGhGFEFR^KY4i{cJ-O|aI^u=&DtLIp@+Q; zWIdL)*S&jpQQLOkfN`%YDk5qMH3iRvN!sEbrL0RS{N9r*k2W>{I5DO$}3AHhmN^M3sJG2DDeL+-@P1Mt`?3#xB; zAoq$e8P8w|Z~Q3|5dLSYy~yoC*AV$QfGe^mG4kS`1Bcq({=wH~<=su%UdiVg{61N` zxJMUH^_2_XuD)dFKt5p?lUm)5*M2He!is`F8H-25vZJ&jwg7l)1 z=cW=dcX@w4^2js-#THe&Qs%5DmwUK~%=_ z|B=`=ara@aFhO1f>vm#-#nyjz{K9&dfk3GB!{T>Nlqr1D)0iX`bar~*A5+EeR>U>l zHa2$u%ro?}%ct3mpn96!-CY&EyGJNmY0Kty_v>+mhDM2vUx-af>pmm%zqfjM8R|vM zYmiu-;l7~6(eH<1Hqs9qXpG@D=p z4Rv)_>`N(o;xIVDwA0zc8k?ejK`jP8QD48h`uYlLAD^{MI59Cw-x{ z*1Qu+HC0tn_w4lU-qY3DWZ(X5=&D+`hxLIkr>6$~x;0G5N&ulB-oG~wdhIx8^4|x( zkNEw1A0$%`$J00`;rxkATNdUqGXA7P$?!FTA>Z$KcB+=P}A%wFW9^v?4o+{VTI`{!{WYdOaIrGEQ1`ns`9#G$<}Y^@@^MGJb27O>-Q8ckMQsy%6P#Nh}qNH+S*sIM&!TT0LN#MGp4*> z!td$n>GjVO74Ev^UeXhGAgrLhuZ7ybcX0g(D`zC|+xPF&fL%ao|Ir5qbLhp$U}~xd z;7aXO1s|NdST4$N5XzVt3^J2SS!F-QVQ;u~sBGAxK|gi}DmI*SQ5<)zI7ijyeiA{0 zU?3U*R8?pnAoDfar{Z$M2bKEc;O2e@Wz^(v+rCkys2dkQ%x_2IFNmF%A~w~ZR!kjW z6Wcgd1Ia04E{+L3ZGvQk>mO$TW2jKuJn#a0GAgxl=(e_&G|4AiiC|J0151oLMO+sX zG6`kEY0V8Gf9;fob?&=!BHTJWICAjoEyMn8IAQ$r#tzLRE7F{!&Ij6p$DBXwdd+x| z%|UDD?7nt< zz`yT>r?p5ayBhty7d|sUOp~^)pA3Yl=Wydc26#=screszStUO&xy84JdHqv|Z)@Ck z{Zr^3dLxxbB4=@A$kTYsiK8X+%_hD{XRZe|OiJM-gbs~Fe;1z_wZ&5QpQ7k*)cPqk zYc`!-;5kUD-$JPYjK;LKo6?MP+4A5ISLs7CY68_A4fx#9Ale2={Tz2Ql_94-P^4-T zOV-BTy?TVV+&!h-( zX8qkyUorE;13l1^l2YDgS=Guqw*=Ch|HPZ;%bp);F&U)#--8AO50l)vO6Gjg!?s9y z*E$kxMfHB8kN!4si=6FlM~I!+V=vB&4i6*IuZK!kv;og7~2Zj zU>fp0`^ghmLSRXBV?!F%_1vzwrJ3v<-dG-MF-2Gp{dSmEDz2P!J#<{KQi-seU)DX) z`4VHPg||?1+IaNVmp`8u;xJHg`TUjUuqBDL^|$E#ltd8 z#VQNl-tYXe5czf)R5z<^!-ma`y+Y6vJ?A5qz5x*)u5G{|F3TAydoFCoqCH&YGq!u( z=&wEe^_wo9C_6arSZYejiE>@zw$H0A6#UiZMi^sTqUKS(DXFX6E1&6md$D=(cf|1Zo&ykZHYc8% z22herv`9D6IZt+=lTJA8mn1DXV!g}fXYQV2XKg*|l2^*$ zqCfxqb9|3@{VV%Lc;NsV-lb63PCcO`Z%?*z73$hAmIeL#^>h8Ycq{~>6$euOVoe={rCCDo1mGUKkt-xu|{NIljN88V-|q@xV*Lz;wKgS zf2E$&Oa+NX8jpp69&s>@u)i`w>vrqVnIG5JdDn@H4yTNad{M^OKugFMVYD^F-emU7 zcjw>oDW2DETypLBptJU~Q@Ra5UG-tIPu$Vw${Az2e%1cFt+mmz#;}sw!L$SaNDeum z%p6pcXP}j=4C=ay)d@iSq27y|QFW`ARY!N%&Q&L$e4|r4?=H0_R)-cBcgruuf{Ts8 zhKw8~t+e7yyMb+48@}>#y7mE%?KX2gs}8s82(u#&>-aCdDuSI^em1AGyf-yl-1VGF zbN@Bcw78rw>hbI@+_sp8S8DL|Y-zfxo}9T+Tma~kv zlkM$S*WgW@B%gr@qh-9FXmcI$SXNLs&czxJh&eiA^ADUqMO(<#*rUJ)Cz&I2s*5(v zjw&{1+I3aU=trCN?mpCAnK69Fl)Ta37-pQSHFw>*O6~|4heIR!^*Rl@S@)BW6EuvHQde5h=?&bAYH%=%`<%H+uTW+S*bh6;_+->SiFpe|LJ;%Hj`DQCRkzlMnb%Ss8imNHn?w@Oq@;IG^`q zYrC5c%S^5<`UkykuKgwMlN95(@9%|#Z^99(XWAV`Q~GNctq|d0DQvZSn)8g@r}U7% z^{?$}^A?a(SnI|_T>3VZiIj-v+t<5Yj-y7K8~eS_5KSf1!T${YFL1@G)~r{tnh!_I zmN^;FOu}vK%GGcet+MsT&n5CHJH}PhZ{=3oYGdX6+3y%fVFc5I1>_7n` zzriyCFs4&YIhFW+ou{WKkhJHd z(oj*^%#4TG(+6lJCq@Pj#m!*cszSBvXZRWVjYF*6wQPmV_1oWK%HQm~O0!hz zvnYH?bPK~b9qqOZrN_*>8uT1qDW)x-#~$87k&?COk3n;cjUPsM2unlK1wA{vXLCf+ zTQun?TgN8I=^{T7en^kvoz7O0_x$;JoDWdNya*S^WRVY;ib{J+S(5W&tNO?P=f4*bagt;n2lQy3E3NA3h8nG-#~dIeT9}m@-ovOZpGC;bSQLVhd*?|5mBy zO^7snI}4p(pso8@vxG?3EDoz(%7BQNH_Y$Z#GU>mlL#8s>GAXD&z{G(K#2=$Wa0U= zj{A>b4Kysx{IAPFr;WeS-K`HpBr*3lWW6`AH7}Z+v0}aQYF^r@bzp>Ea)-GRitn=w z85Cxr+a?4QP6%d{wroA~#SJv8^>fHC#4MxY8Iwn|su9*zjM7e6C4nfHS&hN6vX?EI zP)Qe-C^``PohR6gKR!OO&mi!^!>JW4nBR*E2j*Ai@!Xu#etfWGwzeh-<{l_;+Hq~D z;-V9NC%LHSDA8!t?Le{-apH-|JMPhw3|ZAZ^`3W=brD&9#87*2i3mY_x4-3$tx;u0 z!Z#-O$|oEQ`~n4DgEgJ7F%Ju%x%&xH6U|eU!1=ecki-$$r5!@+$}&tJEWfXGWEGuh zleD{M^zLrfUAbWWUNK(83`hey6zWwRQ$gwIicHe%c;8v~aV#ZLul$)-Ph1l*^WnJb zOwu#)5zy?oDG#y6u*sXbrEkGld<3=%LEW&6BBs1ZFY4jf<4vu~!QNQ`Dw8G&u8^`s z2FH@0naE9#cV{klrg?%wJ4Q_^{Y;(&de-5XvF^0%|lB3)b-dm;&ljkkIq zwNq)2*tGgeQ>RYV%2@VmB)PpnDRZ0#7|7$~NqUvy(q8B-faNGzUkV5Rj~6YQu_-~$ zfl$tG?Gg-C1h*0TeE~}f3N>Nd`^U{4HELA5r-9FYF=`2c&G-;hnXC71G5ST(m#jNi z<04z66$vU{+-Wb*oxlf#O&t~f7-msiaVVNTFoWoB;gez8dgDFG%>UQi28ae8k#I_# zeRDBq7G++{p^?d)44Q}{&oXTlvnnm&KjK`}$CjCK&3h3=QW2%2-5lE^IxA^^QTz&8 zPldZn;jTd~Na}!TQAv@Mgsk0TkYP=YHv^x5@M~#lg78DMiCqyOjae=S#$eCq(Y?WO zr>Ze%@%j0|)OupT^=KDeeERm2U5FHLL(9(1=|!HTJfY`Evc-6}+fmcYD+QyYddh*x zB7>jjZs1dAJY$8pH;(35OmU~VwrrX$B_E5 z#BM*o_4PIk@j8b%ygdsi*}>iJm@s9Ec>Jhp_%uSn&z)dvt*t-8It!N<%|tT6jry0` z3>dgW)CrZdn%G#txZjSTasB|bj` zlIEM3980Qqw=K?LxZafUWDKO(G{R$o=@K@Vda@EMRT|Z_tXID;nPi^sGESloMvF+y z%~P`Q=81L4XE-9F>*TKUmby{-_Je;9wv70FfHN0)&5Sr+6(*Jn_2Ut>SQv-yZub%e z1x@vVUYt@)(4gV|1is-O=nMN%XYdXUb46lLC-o~zU zsvW#nKWji49cGa?AdQ0$^fXh}eDh)LIleVxduzI}TsGHz$!bf{dm3dv?x0W*g@!5( zo$WmBCB_X)N^FhJQ=Mnh7j4^pU!`U~vAlA=;=yT-g1UK-^knU0yRc>Bi(sZzFU<}L zW(;LDH!Mai=^lKlhmo@8x1tpushPo_KYhA)WqHUQNK8MawzQj*U`WP3IdMD(hk?Xe zg%Z3UA~*CY;*FxjTb6eJzgTIJc%F?Rv#|d4>$R19np*30>zy)7#vdolKQOyIGZ=CA zX-34W@(wQFk1JIxddl=-v!9cuWfbSIyQp{NA*-eruU%^TDNiLhWFvNY0+}Qlb!bKf zVeibn*Aj!(kuA@Y(X4*5978$6x473?Mssk{Wf%(G@*LGoNB(*AXban(LRU)ld~PM| z^qV&g3=GY6xi%~(})uk>f^LQk@E%P$&N z&=P?UJldWWZ!(_Cz0R?iMrsZLxzOejkb8f`3Px6l7C8@(+FH6{)DW##O!9q7RLqHfB!{vji@ECZ++bj%QAYANztk z0#Sqr`yL_y;%mF05M?z^B22Ci$q;Us2G!xJBuvSKY#gqdn=VK&&etE%#Q)0pAy^D# z&8U7v#udyLOyge?@0iK0s;djDmU_gqCu$y6ym6pA4m0gUbke5XiJ}$JCg~X&B;V%+ z1@VBx+9S;kSLZ~V;QRPbdAX$lHgw63@hg*S={JYuE%$FTJgZW-Hkm_2t@oJQHPqzQ z)suLeAPRkPjUYW`Up7$iABj+!lX$=Iwhn_gRDJLi7q&tGa0?;gAGf>+HZ17TSIpS@ z_jGwUcWH*lufZMnfOZQ`5uPA>$|YWE;rBqs9K?rq&mN<}oe^${8MH18`mf6=s6I24 zgqPvl-X^F+H1BX2(|{|q4!)F~t4i+EpxqPB-p&L93FV&<@1_ zO@B|x!G7b$`kI<%$^m$4-Q3+Rzq(Bfrnn_XTG*O%$?l8Wx5d;j0?!;hxw*IS%5$c* zN|F~X^X%-|1|Qdbi9aee-XuuRcl+kepJ>Yz4JpTQq(m}e2aJM{#pA;ugud%e9>p9D z1}Gb;f)|+=fSDVf*@1oxqtBg`oIae{Z)jZ;%X0DzbuSkLAA7E>l7Ui&P&R15fUodf zbnd9{6ptIBm!$U~T3twWru!T=m^BN|XNq@|3T=(Bi~$7Wp%5{=h`=&9fO8%9;)e|75?m|Tx8TO{$76T_hKigDBMr%aQXj7ZTM}nS zEulWn$Bm9WONmA?%b^paB%n~|sm@_1P7LH?Nh$#T>i_hja=tnGoFwi}Otg4&6sT>f z5fDO%;nU(c>V!90Vba3+lI4f}#HC+c3l|{O%8H8{DO|9r z+Fcq%y6nTa@s~FL9Cas-4s?aS*Trh)L`Y0&?6e&2mcxTOOA zQ7jU+jmuF#fChSOu;@wP|H_dPgeZhi?b{?wm)JOB$d#w`^-`5Xo)Co^cUb0h>9V6` zBTW-gVMuI8^dz#+V~tBi&TyL5@f;mG1IVUN?CO=A%Z$7RdRD1?WDd0J1xk11b4iT~P> zw}2@=U$>)WEx?<)KmS7}Vi#B%t@^?IEHhk*_~!v#NKj>TG*`#tR&;NWoF;!c!ct8` zLl!gy#v$k&DSpo5~5NocIv@ zI_$zj*nRfDKkQiD?J7T3{Mw4m(tp2u=il)Z`NJ7$ESWlf{r~oblOAt!?viFpC&AY_|rVs1M)gDmY z>({S`-XtI&FOKE8v(-{CM>?z}gu+x%q%i`k<l_%i=Ka+-r~lZvtER^P?F z;D_hTVN8lUhn&e>eEK=FXJa|TfHs5r!e0t=2GIe3uFq+6;SV5o5evP(j7HJMl`vF3 zbn>#Zqv(zBc*w68Vq+Zxd<6%FW|5$a8iLb*X|>;fr<+$}Se*oFimTAB-G4#%x;B5Y zsR^Q8Ui)%Aqt?O;fohD33vxybHZsAFp5@)U6@&m%dNLmynv>9|Z61a42V#|LMlj3f zVp6WvD`WHu?1MB)u*5KZ3`6@ht;B+kb6~|)TiYJ+Ov+)+_@(24uT=tpmNZ5*4^g*& zT~$>Ck4XC^^s+ceUAkzWH;FdU4j+8@0-q6Py-4Ctc6Rh_Ehu{&#iuz}?hUZ#&X8}& zc#bKWE^K>^6MajOsU@YFa_-h8Bn(chxHTtH!`EN`NMN8aS`&u4fWq{7s6T6j-vS=< z3+5-h`==2TcrN%mVhgS5&z^fcRm?7NjRgvu7#TI3ehCv(6TdK_ZFXhNEh30(!SV>> zXQid6HeKk2V1L)+Ikj*yFF-IeGBtel`t=uX4*OOP7cMht?JVGwkgRQcAiR_)scBSQ z%m|8&eNT;E<+P#WLpyWfO{MYxuMv|bEsiked1f=Q;NiVMb~aAgw+Op74@8qxL>^&p z?!K2RE%`I>l|UPm*9(M2>QP=a9bB)HTKrJA`-A-Gad{(@+rKcbT5J9fW{`rv&d`;3;C7Xp7ID1m0v z{<~Hi*3IeZrQrHN_7(ea{noANDmU@qpoNr}oP3b$q;YcN2J)|&?7h08*kWiTAWXul zU@C7hM#Gi3M`UUB!f6pj6me%7sl{@L@m1>hkV#YsEp%g4osQv%H4Up@c6X0A3+B%U zAiSkM+PIOzeb|5j1Ei%Dlo5}XvT^`HJF}A;D3bwA#{(v5Xb8s~JV2oE5G0Z~{DL=u z+-UWY0Cj9n@ycLbO79+T7=t7e!{u<>9<9%~-X#v)pD8qDg5icGtS{<_-y=+o_xJKz z!+RDJ3O_cfsU>m_h2bx~Y<4c1O42qSExR5%tI5el04vjV&!BCq5LQ$N<6*XZ7@opo zpY}Vqw|wQh(cFB)28qTAr+-yYJ6=Ue)-F4F#-g6s8&5X0r?RGP!v#kOTzdBEH5C}h z=0oTomCfmSA*M|=uCJqfD(pJk8*1f@%wy^m@}pLw`a0BOm{xIIe+a9lnKOZb++PWz z!FZ!;@7}$T1e|com4#p!ilgEi(?nNS{U+5Hnzax7U>bYEfE&7AsH|435Ce&rjHqXW z9JQ?PNR+vN)FLD;aqg@E;y`#I;~6L^Df(rUL#)i9L6h`LCVz!h;6vvezub`!rk}W% zy?e^NY+EL*;C zp1K}mXF@%Em(7YMszS)Wr%AyWnQ+lI2R!%66;HAkUF_&_ipq(vOI!vU+{(EzKvtNQor z=eKVqWFm0Jm3M&f&M>Xh2U`Mbo>Obv3O7lsjT>*mxIyjb2XT1?sl#shc6i&6rjV_t)(sR< z1*l0()YI4nNdT8naOo3|GU{_OeoPqEFwmicNvgj3b#5e!Ts`xW+Fe7LKQZ?%hN80`jJ!{)|4Hqa#zxb16yqhe0zUj74Lt!Nb$DiSJFybrM!&P0TRmXs>xPX;#fEJcKIe z|MzaEiFYdqVpj90872$23U9G|^BiOC=a%Y^AJ8Kr)i00huNcf&rH1|I$PEo+v9S83 z<_sb~azUk%euObCy%jo5r>=e?QM*xh+?Xfn_mzga;B!@TCSY~nBq)$ggpQC~O7FwE zgWs4ZXcE?0L4bLXzKOv!jpzt<)RLo)amXRZf!?$A#8<6;p^`R2&((9?dO2D{kfMS-VqHV zcctqtkzA2^H1V65jmLQkPF2G?X8Gl*ZHOu-e8_5Ia5;$x-f7`5E^uHvl-!`t1zPG4UeWaZJ-0T4zNFVc0AO;lTwwR*|o zh*havt76|q@LQbWz5yeo(0t`7Q(CBKTLmI{e{IBF;hE9>>bL>>K*GOayg5-Cr{n~!j zYccg*u0^k_n%c6Z9m>4@lqpd zWaTl2JFT^5k)=z%MwMmdA>Woa#W%kv2j-9{V4c5QdlR7hdhaMB&93V;X*8?$H=jLQ z%!T79w8GP(4BCVX;&c)F_FpvRDtHhhpourWF0rsE+mKrJokB(Wfde+b{1n1b!;$v` zGuyt!x4)Lo9NS>amRoc8=(DDY#$U!KjK!ZPiGRAB`m*HVLu2@cpzGgD92=imAEI8m9;(Q_b|12OPE09!)Y6!knELAt4@Lz{emyhOJi*c-c*t?s zjei~#I$ZEkJ+=W9n0-6n)S{@UXevavzm<(m6`xFV%jEuuU{6=?S#h|u^@OCob=HP1K)dC{$x1y5}6C0b`{*IbBkLm%m4_P%~{`Q}xrQ$gA5v#6Z@~WXsxEJl^!Sdk0q|YWpN=M+w9hA1)g?(WmAh#$CQV_4_mH zp0U*|;{)A~*8$Ywu7!r?I*uU6_#59>T2ZWB;1Z&=wcCDH^zmKs&XKw>v-$0*MjqO} zefwzB|3pc){r%~IuA{$u6EAqNLdI_ueV zL9aEs#YGg}sUsQVyS3a$ods}G{`+9qD+9eBi*wQwmA$?{xBJKje;us$V6t_?Z6o(6 zCBu2<2>@~?Rgpa245b4wY}KlI+83W5hqon#_Qyv?1t9o3m;!9M(@_TIGZ8O&rtTqt zh@X6=#4)2~Wk~;`#+!{doCCa3903G z<vV4_PY3-R^y(#9G zQ@`|3LygHYyxucGN6{=Lg)`>7{UwdqFgDW18;z^A8y^EgvWlV3Fe-XTilpPdtnYi# zJl0RSbLWmr!PL~yVOw*RQ(20xh&ycY3Fs&7QJ;pTzMV*kyc4| zPQt%DlL={+flC)^SK(Hw++d3ZL3`9hpJ)}~N8;$<@cresk8FN$!dD`DyL{JSp06VI z4BMJF>F$9+lU}p)+N&n|atlsrOGwJVmX>P0O&ndTYg{mbrFLtW$4mQ)V`u^_s9Unt z%3I!Q<+hlyl50*GI(_(^)4H+uOk(7P(_3r*yKURNsh+L}IK}hTUtMt&%4oU-Gi>3U zGiLbKTX>pEr3R(G-0Jur4XET~_}mG!(a4*p7b7$*TJcG>xPwEkSiaqmx}o&@Q}ay} zOBPn=nNT6o!#|}}pWe1_*CUp??aRr`p)Pro%9|f}i0k}<$~o&3SeSj!xX2aB#gdja zZ(%CwX{STH1&%5poV>OoONkBG9GFTW*qWNzx`Cx-Hsx8%JVmd@RXwr&5PnF3IUX;) zs>|Ek8hcK>e#P}D{edYon8kGwRAj^3KJDq=UtUbE7ffv&N>#!GNO7$~VbK1;PEYZ4 zFq@c1tpf(hFF(`Bp$Ne2k|COFedymbiiFkBRcxmX%gtIo`Zfh|Po^&!%)f&9V(~@; zjS6AjE!>|+MMPB0Y#jdXMOs67+xJv{!ls&0ao1k&i;YWYZ`^ye%yy{~X@5y7WKFX@vd<#N$3byV_Q7t1AMViqCma>*ceS zjAhG~xw*N~fb|U4%Mz&I#I_gwi|4IBqz4p8U|0;rD<*{Uu47gqzAWd|$MK4aeRSTP zp`JmI(?*sSmwli%-RDv{TiQY$Q3^3MRb{D#Me9Gm^>M0MNa?8o2?>0>5ov$I9`cEZ zz3sE^+kHOo&C4NBm*jM8^=VDQAuHAX@mt1GwsYuvY`Z+rmCLgkQ|c<~`cU&lKepnr z_=JQnOU<`Tc2clx%92J9v!%rTf>eCV_|^B8pXK4OyDWS9Tt0jD^?k+oRq(<$8ef`E zY0f<5{q8eii($wml*IdYnP_u)oEK^(0jnFd0+K)VPT+UydSNTCwjQ7$rImbg^;UbU zw^#2D`^naug6tzJz}dU z-e_(ai=!7gkWpKN09xgWDZDj0#~(p2i?)9``ntW(dctD(f)v+$em*&E>JM=xgzoOH zv!81(oLu1tGVXz%QrPO)gvb(583|takP0Ms}Eww-|{g!D2-DpR{A+l!`TAt{|VbGN`C1&^a>>K zp_XN)CMKcoCH9~RF=gGbL3-1>=R3*6!`z=)0iAstt`NCRE!>Jm9h#DG<{k)FSdRj2X991eZ~Y!s=Hhvr&I+$SQ`1qK6pMPyaUUN zpFEKmKJ9vwvMu%bR-8_H-ZtNu7S&SMjjf-o*WO&fD{udB_3q%ogZE_?F&RkmJ57?n zcjL#2VSg!20xj#-UpuFADLPuaF6r@o?a%uZ<5u9T^Ok}g&sg*OWl*L`! zobET|dbq+?l0{f~`&}e%(Kl~yhdd%n5fQkz((?14z0GZC@P)U2u_V> zR^lMFM~&88GematKfh$(7UA98O<0TX(+YWqEWzE&TZ*r7pYnIFXLI=36JK_5#oJ80 zN^o^s>qp<=_~zicWc8Ma-ZrSdulVBM#TT<1o9~<6Tr!9fbPH-KkrZL1=be^QZYuC? zqwN(6khTMo!}w3g8}9L93&*ld&OB3KhWuzKm{jTAP2NUu_%upuk?N~8)-UsvD8CaUY~zwUv{mztVt@yRA` z|Gd8{yofm^k~qcnyWK-JJ}l;In>N`=4bjb#^?31!;%v}U2FPlk*k5?rcldsqQ%H0? zkPAWynj<3N@FJ5-r@fG3|JW9QQ%A_No7NBvz8&gSd4v#J`|RqQ2RAKw;b*<4jP1ey zyY%g|PgEGwjXFB!JNEw9Htzd(2VXw2pJ>2$nKinj(YAMPNBdavNXJDnBSv=gny1g| z{6_JNIv-Bw&Jp{d^K^8E4zUk9!((Ue5OAzBKX!r#p&aW(R-NdvL($U-tvkyH0T4QC ztIqmS?1RqYx|2W<`=FCR=p+!tKIkM6Itc`^4>}2iP6DBmK;R&B0;EoWBm!b5KI6ug0EvUp36MGgk_d>M0I3roiG9!skU9a9*aw{e>Hjf6dexrn z*mZW79bF`D|C9Mn?(Cv|zx01Nq|504-bUyDI-f+B&PUMs2nfxcv7qx2bnb%AgCJm9 zXDsM^1f9F!|KEe)VQwyt@s4<}4nf;L@Zoq5t*M_qf&Wh7tv-5udeWBiL literal 0 HcmV?d00001 diff --git a/tests/test_doctags_utils.py b/tests/test_doctags_utils.py new file mode 100644 index 00000000..19a39d25 --- /dev/null +++ b/tests/test_doctags_utils.py @@ -0,0 +1,11 @@ +from PIL import Image as PILImage + +from docling.utils.doctags_utils import remove_doctags_content + +def test_remove_doctags_content(): + img = PILImage.open("./tests/data_scanned/ocr_test.png") + with open("./tests/data_scanned/groundtruth/docling_v2/ocr_test.doctags.txt") as f: + doctags = f.read() + actual = remove_doctags_content(doctags, img) + expected = "\n" + assert actual == expected