feat(ocr): auto-detect rotated pages in Tesseract (#1167)

* fix(ocr): tesseract support mis-oriented documents

Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>

* fix(ocr): update missing test data

Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>

* fix(ocr): rotate image to the natural orientation before layout prediction

Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>

* fix(ocr): move bounding bow rotation util to orientation.py

Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>

* fix(ocr): refactor rotation utilities

Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>

* chore(ocr): revert layout updates

Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>

* chore(ocr): update e2e OCR test data

Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>

* fix(ocr): avoid to swallow tesseract errors causing orientation detection failures

Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>

* chore(ocr): revert layout updates

Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>

* chore(ocr): update e2e OCR test data

* chore(ocr): proceed to OCR without rotation when OSD fails in `TesseractOcrCliModel`

* chore(ocr): proceed to OCR without rotation when OSD fails in `TesseractOcrModel`

* chore(ocr): default `TesseractOcrCliModel._is_auto` to `False`

* fix(ocr): fix `TesseractOcrCliModel._is_auto` computation

* chore(ocr): improve logging in case of OSD failure in `TesseractOcrCliModel` and `TesseractOcrModel`

---------

Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>
This commit is contained in:
Clément Doumouro
2025-05-21 18:12:33 +02:00
committed by GitHub
parent 90875247e5
commit 45265bf8b1
96 changed files with 9864 additions and 5258 deletions

View File

@@ -40,14 +40,14 @@
"a": 255
},
"rect": {
"r_x0": 69.6796630536824,
"r_y0": 124.83139494707741,
"r_x1": 504.8720051760782,
"r_y1": 124.83139494707741,
"r_x2": 504.8720051760782,
"r_y2": 104.00000011573796,
"r_x3": 69.6796630536824,
"r_y3": 104.00000011573796,
"r_x0": 70.90211866351085,
"r_y0": 124.83139551297342,
"r_x1": 504.8720079864275,
"r_y1": 124.83139551297342,
"r_x2": 504.8720079864275,
"r_y2": 102.66666671251768,
"r_x3": 70.90211866351085,
"r_y3": 102.66666671251768,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
@@ -65,14 +65,14 @@
"a": 255
},
"rect": {
"r_x0": 71.84193505100733,
"r_y0": 152.90926970226084,
"r_x1": 153.088934155825,
"r_y1": 152.90926970226084,
"r_x2": 153.088934155825,
"r_y2": 129.797125232046,
"r_x3": 71.84193505100733,
"r_y3": 129.797125232046,
"r_x0": 73.10852522817731,
"r_y0": 152.70503335218433,
"r_x1": 153.04479435252625,
"r_y1": 152.70503335218433,
"r_x2": 153.04479435252625,
"r_y2": 130.00136157890958,
"r_x3": 73.10852522817731,
"r_y3": 130.00136157890958,
"coord_origin": "TOPLEFT"
},
"text": "package",
@@ -90,13 +90,13 @@
"id": 0,
"label": "text",
"bbox": {
"l": 69.6796630536824,
"l": 70.90211866351085,
"t": 76.99999977896756,
"r": 504.8720051760782,
"b": 152.90926970226084,
"r": 504.8720079864275,
"b": 152.70503335218433,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9715732336044312,
"confidence": 0.9715733528137207,
"cells": [
{
"index": 0,
@@ -132,14 +132,14 @@
"a": 255
},
"rect": {
"r_x0": 69.6796630536824,
"r_y0": 124.83139494707741,
"r_x1": 504.8720051760782,
"r_y1": 124.83139494707741,
"r_x2": 504.8720051760782,
"r_y2": 104.00000011573796,
"r_x3": 69.6796630536824,
"r_y3": 104.00000011573796,
"r_x0": 70.90211866351085,
"r_y0": 124.83139551297342,
"r_x1": 504.8720079864275,
"r_y1": 124.83139551297342,
"r_x2": 504.8720079864275,
"r_y2": 102.66666671251768,
"r_x3": 70.90211866351085,
"r_y3": 102.66666671251768,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
@@ -157,14 +157,14 @@
"a": 255
},
"rect": {
"r_x0": 71.84193505100733,
"r_y0": 152.90926970226084,
"r_x1": 153.088934155825,
"r_y1": 152.90926970226084,
"r_x2": 153.088934155825,
"r_y2": 129.797125232046,
"r_x3": 71.84193505100733,
"r_y3": 129.797125232046,
"r_x0": 73.10852522817731,
"r_y0": 152.70503335218433,
"r_x1": 153.04479435252625,
"r_y1": 152.70503335218433,
"r_x2": 153.04479435252625,
"r_y2": 130.00136157890958,
"r_x3": 73.10852522817731,
"r_y3": 130.00136157890958,
"coord_origin": "TOPLEFT"
},
"text": "package",
@@ -195,13 +195,13 @@
"id": 0,
"label": "text",
"bbox": {
"l": 69.6796630536824,
"l": 70.90211866351085,
"t": 76.99999977896756,
"r": 504.8720051760782,
"b": 152.90926970226084,
"r": 504.8720079864275,
"b": 152.70503335218433,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9715732336044312,
"confidence": 0.9715733528137207,
"cells": [
{
"index": 0,
@@ -237,14 +237,14 @@
"a": 255
},
"rect": {
"r_x0": 69.6796630536824,
"r_y0": 124.83139494707741,
"r_x1": 504.8720051760782,
"r_y1": 124.83139494707741,
"r_x2": 504.8720051760782,
"r_y2": 104.00000011573796,
"r_x3": 69.6796630536824,
"r_y3": 104.00000011573796,
"r_x0": 70.90211866351085,
"r_y0": 124.83139551297342,
"r_x1": 504.8720079864275,
"r_y1": 124.83139551297342,
"r_x2": 504.8720079864275,
"r_y2": 102.66666671251768,
"r_x3": 70.90211866351085,
"r_y3": 102.66666671251768,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
@@ -262,14 +262,14 @@
"a": 255
},
"rect": {
"r_x0": 71.84193505100733,
"r_y0": 152.90926970226084,
"r_x1": 153.088934155825,
"r_y1": 152.90926970226084,
"r_x2": 153.088934155825,
"r_y2": 129.797125232046,
"r_x3": 71.84193505100733,
"r_y3": 129.797125232046,
"r_x0": 73.10852522817731,
"r_y0": 152.70503335218433,
"r_x1": 153.04479435252625,
"r_y1": 152.70503335218433,
"r_x2": 153.04479435252625,
"r_y2": 130.00136157890958,
"r_x3": 73.10852522817731,
"r_y3": 130.00136157890958,
"coord_origin": "TOPLEFT"
},
"text": "package",
@@ -293,13 +293,13 @@
"id": 0,
"label": "text",
"bbox": {
"l": 69.6796630536824,
"l": 70.90211866351085,
"t": 76.99999977896756,
"r": 504.8720051760782,
"b": 152.90926970226084,
"r": 504.8720079864275,
"b": 152.70503335218433,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9715732336044312,
"confidence": 0.9715733528137207,
"cells": [
{
"index": 0,
@@ -335,14 +335,14 @@
"a": 255
},
"rect": {
"r_x0": 69.6796630536824,
"r_y0": 124.83139494707741,
"r_x1": 504.8720051760782,
"r_y1": 124.83139494707741,
"r_x2": 504.8720051760782,
"r_y2": 104.00000011573796,
"r_x3": 69.6796630536824,
"r_y3": 104.00000011573796,
"r_x0": 70.90211866351085,
"r_y0": 124.83139551297342,
"r_x1": 504.8720079864275,
"r_y1": 124.83139551297342,
"r_x2": 504.8720079864275,
"r_y2": 102.66666671251768,
"r_x3": 70.90211866351085,
"r_y3": 102.66666671251768,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
@@ -360,14 +360,14 @@
"a": 255
},
"rect": {
"r_x0": 71.84193505100733,
"r_y0": 152.90926970226084,
"r_x1": 153.088934155825,
"r_y1": 152.90926970226084,
"r_x2": 153.088934155825,
"r_y2": 129.797125232046,
"r_x3": 71.84193505100733,
"r_y3": 129.797125232046,
"r_x0": 73.10852522817731,
"r_y0": 152.70503335218433,
"r_x1": 153.04479435252625,
"r_y1": 152.70503335218433,
"r_x2": 153.04479435252625,
"r_y2": 130.00136157890958,
"r_x3": 73.10852522817731,
"r_y3": 130.00136157890958,
"coord_origin": "TOPLEFT"
},
"text": "package",