fix(pypdfium): resolve overlapping text when merging bounding boxes (#1549)

get merged_text from boundingbox instead of merging it to prevent overlaps

Signed-off-by: Pedro Ribeiro <pedro_ribeiro_93@hotmail.com>
This commit is contained in:
Pedro Ribeiro
2025-05-19 14:26:00 +01:00
committed by GitHub
parent 12a0e64892
commit 98b5eeb844
52 changed files with 52225 additions and 4690 deletions

View File

@@ -3099,9 +3099,9 @@
"prov": [
{
"bbox": [
323.408203125,
266.1492919921875,
553.2952270507812,
323.4081115722656,
266.14935302734375,
553.295166015625,
541.6512603759766
],
"page": 1,
@@ -3122,9 +3122,9 @@
{
"bbox": [
88.33030700683594,
571.4317321777344,
571.4317626953125,
263.7049560546875,
699.1134796142578
699.1134490966797
],
"page": 3,
"span": [
@@ -3144,9 +3144,9 @@
{
"bbox": [
53.05912780761719,
251.135986328125,
251.1358642578125,
295.8506164550781,
481.2087097167969
481.20867919921875
],
"page": 4,
"span": [
@@ -3234,9 +3234,9 @@
{
"bbox": [
98.93103790283203,
497.91851806640625,
497.91845703125,
512.579833984375,
654.5245208740234
654.5244903564453
],
"page": 4,
"span": [
@@ -8153,7 +8153,7 @@
62.02753829956055,
440.3381042480469,
285.78955078125,
596.3199310302734
596.3199462890625
],
"page": 6,
"span": [
@@ -10514,9 +10514,9 @@
"prov": [
{
"bbox": [
80.35525512695312,
80.35527038574219,
496.5545349121094,
267.0082092285156,
267.00823974609375,
641.0637054443359
],
"page": 7,
@@ -14214,10 +14214,10 @@
"prov": [
{
"bbox": [
72.6590347290039,
452.1459655761719,
274.83465576171875,
619.5191955566406
72.65901947021484,
452.14599609375,
274.8346862792969,
619.5191650390625
],
"page": 8,
"span": [