fix(pypdfium): resolve overlapping text when merging bounding boxes (#1549)

get merged_text from boundingbox instead of merging it to prevent overlaps

Signed-off-by: Pedro Ribeiro <pedro_ribeiro_93@hotmail.com>
This commit is contained in:
Pedro Ribeiro
2025-05-19 14:26:00 +01:00
committed by GitHub
parent 12a0e64892
commit 98b5eeb844
52 changed files with 52225 additions and 4690 deletions

View File

@@ -2498,9 +2498,9 @@
{
"bbox": [
148.45364379882812,
366.1537780761719,
366.1538391113281,
464.3608093261719,
583.6257629394531
583.6257476806641
],
"page": 2,
"span": [
@@ -2541,9 +2541,9 @@
"prov": [
{
"bbox": [
164.65028381347656,
164.6503143310547,
511.6590576171875,
449.5505676269531,
449.550537109375,
628.2029113769531
],
"page": 7,
@@ -2563,7 +2563,7 @@
"prov": [
{
"bbox": [
140.70968627929688,
140.70960998535156,
198.32281494140625,
472.73382568359375,
283.9361572265625
@@ -2585,10 +2585,10 @@
"prov": [
{
"bbox": [
162.67430114746094,
128.78643798828125,
451.70062255859375,
347.37744140625
162.67434692382812,
128.786376953125,
451.70068359375,
347.3774719238281
],
"page": 10,
"span": [
@@ -2607,9 +2607,9 @@
"prov": [
{
"bbox": [
168.39285278320312,
168.3928985595703,
157.99432373046875,
447.35137939453125,
447.3513488769531,
610.0334930419922
],
"page": 11,
@@ -4065,7 +4065,7 @@
143.6376495361328,
528.7375183105469,
470.8485412597656,
635.6522979736328
635.6522827148438
],
"page": 10,
"span": [