mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 04:24:45 +00:00
fix(layout,table): orientation-aware layout and table detection
Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>
This commit is contained in:
parent
a47fd8372d
commit
8ffa01bc9f
@ -1,8 +1,8 @@
|
|||||||
import copy
|
import copy
|
||||||
import logging
|
import logging
|
||||||
import warnings
|
import warnings
|
||||||
from copy import deepcopy
|
|
||||||
from collections.abc import Iterable
|
from collections.abc import Iterable
|
||||||
|
from copy import deepcopy
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
@ -19,7 +19,7 @@ from docling.models.base_model import BasePageModel
|
|||||||
from docling.models.utils.hf_model_download import download_hf_model
|
from docling.models.utils.hf_model_download import download_hf_model
|
||||||
from docling.utils.accelerator_utils import decide_device
|
from docling.utils.accelerator_utils import decide_device
|
||||||
from docling.utils.layout_postprocessor import LayoutPostprocessor
|
from docling.utils.layout_postprocessor import LayoutPostprocessor
|
||||||
from docling.utils.orientation import detect_orientation
|
from docling.utils.orientation import detect_orientation, rotate_bounding_box
|
||||||
from docling.utils.profiling import TimeRecorder
|
from docling.utils.profiling import TimeRecorder
|
||||||
from docling.utils.visualization import draw_clusters
|
from docling.utils.visualization import draw_clusters
|
||||||
|
|
||||||
@ -105,7 +105,6 @@ class LayoutModel(BasePageModel):
|
|||||||
self,
|
self,
|
||||||
conv_res,
|
conv_res,
|
||||||
page,
|
page,
|
||||||
page_orientation: int,
|
|
||||||
clusters,
|
clusters,
|
||||||
mode_prefix: str,
|
mode_prefix: str,
|
||||||
show: bool = False,
|
show: bool = False,
|
||||||
@ -119,10 +118,6 @@ class LayoutModel(BasePageModel):
|
|||||||
page_image = deepcopy(page.image)
|
page_image = deepcopy(page.image)
|
||||||
scale_x = page_image.width / page.size.width
|
scale_x = page_image.width / page.size.width
|
||||||
scale_y = page_image.height / page.size.height
|
scale_y = page_image.height / page.size.height
|
||||||
if page_orientation:
|
|
||||||
page_image = page_image.rotate(-page_orientation, expand=True)
|
|
||||||
if abs(page_orientation) in [90, 270]:
|
|
||||||
scale_x, scale_y = scale_y, scale_x
|
|
||||||
# Filter clusters for left and right images
|
# Filter clusters for left and right images
|
||||||
exclude_labels = {
|
exclude_labels = {
|
||||||
DocItemLabel.FORM,
|
DocItemLabel.FORM,
|
||||||
@ -138,9 +133,6 @@ class LayoutModel(BasePageModel):
|
|||||||
# Draw clusters on both images
|
# Draw clusters on both images
|
||||||
draw_clusters(left_image, left_clusters, scale_x, scale_y)
|
draw_clusters(left_image, left_clusters, scale_x, scale_y)
|
||||||
draw_clusters(right_image, right_clusters, scale_x, scale_y)
|
draw_clusters(right_image, right_clusters, scale_x, scale_y)
|
||||||
if page_orientation:
|
|
||||||
left_image = left_image.rotate(page_orientation, expand=True)
|
|
||||||
right_image = right_image.rotate(page_orientation, expand=True)
|
|
||||||
# Combine the images side by side
|
# Combine the images side by side
|
||||||
combined_width = left_image.width * 2
|
combined_width = left_image.width * 2
|
||||||
combined_height = left_image.height
|
combined_height = left_image.height
|
||||||
@ -183,11 +175,16 @@ class LayoutModel(BasePageModel):
|
|||||||
.replace(" ", "_")
|
.replace(" ", "_")
|
||||||
.replace("-", "_")
|
.replace("-", "_")
|
||||||
) # Temporary, until docling-ibm-model uses docling-core types
|
) # Temporary, until docling-ibm-model uses docling-core types
|
||||||
|
bbox = BoundingBox.model_validate(pred_item)
|
||||||
|
if page_orientation:
|
||||||
|
bbox = rotate_bounding_box(
|
||||||
|
bbox, page_orientation, page_image.size
|
||||||
|
).to_bounding_box()
|
||||||
cluster = Cluster(
|
cluster = Cluster(
|
||||||
id=ix,
|
id=ix,
|
||||||
label=label,
|
label=label,
|
||||||
confidence=pred_item["confidence"],
|
confidence=pred_item["confidence"],
|
||||||
bbox=BoundingBox.model_validate(pred_item),
|
bbox=bbox,
|
||||||
cells=[],
|
cells=[],
|
||||||
)
|
)
|
||||||
clusters.append(cluster)
|
clusters.append(cluster)
|
||||||
@ -196,7 +193,6 @@ class LayoutModel(BasePageModel):
|
|||||||
self.draw_clusters_and_cells_side_by_side(
|
self.draw_clusters_and_cells_side_by_side(
|
||||||
conv_res,
|
conv_res,
|
||||||
page,
|
page,
|
||||||
page_orientation,
|
|
||||||
clusters,
|
clusters,
|
||||||
mode_prefix="raw",
|
mode_prefix="raw",
|
||||||
)
|
)
|
||||||
@ -234,7 +230,6 @@ class LayoutModel(BasePageModel):
|
|||||||
self.draw_clusters_and_cells_side_by_side(
|
self.draw_clusters_and_cells_side_by_side(
|
||||||
conv_res,
|
conv_res,
|
||||||
page,
|
page,
|
||||||
page_orientation,
|
|
||||||
processed_clusters,
|
processed_clusters,
|
||||||
mode_prefix="postprocessed",
|
mode_prefix="postprocessed",
|
||||||
)
|
)
|
||||||
|
@ -1,8 +1,7 @@
|
|||||||
import copy
|
import copy
|
||||||
import warnings
|
import warnings
|
||||||
from collections.abc import Iterable
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Iterable, Optional, Tuple, cast
|
||||||
|
|
||||||
import numpy
|
import numpy
|
||||||
from docling_core.types.doc import BoundingBox, DocItemLabel, TableCell
|
from docling_core.types.doc import BoundingBox, DocItemLabel, TableCell
|
||||||
@ -11,6 +10,7 @@ from docling_core.types.doc.page import (
|
|||||||
TextCellUnit,
|
TextCellUnit,
|
||||||
)
|
)
|
||||||
from PIL import ImageDraw
|
from PIL import ImageDraw
|
||||||
|
from PIL.Image import Image
|
||||||
|
|
||||||
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
|
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
|
||||||
from docling.datamodel.base_models import Page, Table, TableStructurePrediction
|
from docling.datamodel.base_models import Page, Table, TableStructurePrediction
|
||||||
@ -23,6 +23,7 @@ from docling.datamodel.settings import settings
|
|||||||
from docling.models.base_model import BasePageModel
|
from docling.models.base_model import BasePageModel
|
||||||
from docling.models.utils.hf_model_download import download_hf_model
|
from docling.models.utils.hf_model_download import download_hf_model
|
||||||
from docling.utils.accelerator_utils import decide_device
|
from docling.utils.accelerator_utils import decide_device
|
||||||
|
from docling.utils.orientation import detect_orientation, rotate_bounding_box
|
||||||
from docling.utils.profiling import TimeRecorder
|
from docling.utils.profiling import TimeRecorder
|
||||||
|
|
||||||
|
|
||||||
@ -30,6 +31,8 @@ class TableStructureModel(BasePageModel):
|
|||||||
_model_repo_folder = "ds4sd--docling-models"
|
_model_repo_folder = "ds4sd--docling-models"
|
||||||
_model_path = "model_artifacts/tableformer"
|
_model_path = "model_artifacts/tableformer"
|
||||||
|
|
||||||
|
_table_labels = {DocItemLabel.TABLE, DocItemLabel.DOCUMENT_INDEX}
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
enabled: bool,
|
enabled: bool,
|
||||||
@ -186,31 +189,48 @@ class TableStructureModel(BasePageModel):
|
|||||||
page.predictions.tablestructure = (
|
page.predictions.tablestructure = (
|
||||||
TableStructurePrediction()
|
TableStructurePrediction()
|
||||||
) # dummy
|
) # dummy
|
||||||
|
cells_orientation = detect_orientation(page.cells)
|
||||||
in_tables = [
|
# Keep only table bboxes
|
||||||
(
|
in_tables_clusters = [
|
||||||
cluster,
|
cluster
|
||||||
[
|
|
||||||
round(cluster.bbox.l) * self.scale,
|
|
||||||
round(cluster.bbox.t) * self.scale,
|
|
||||||
round(cluster.bbox.r) * self.scale,
|
|
||||||
round(cluster.bbox.b) * self.scale,
|
|
||||||
],
|
|
||||||
)
|
|
||||||
for cluster in page.predictions.layout.clusters
|
for cluster in page.predictions.layout.clusters
|
||||||
if cluster.label
|
if cluster.label in self._table_labels
|
||||||
in [DocItemLabel.TABLE, DocItemLabel.DOCUMENT_INDEX]
|
|
||||||
]
|
]
|
||||||
if not len(in_tables):
|
|
||||||
|
if not len(in_tables_clusters):
|
||||||
yield page
|
yield page
|
||||||
continue
|
continue
|
||||||
|
# Rotate and scale table image
|
||||||
|
page_im = cast(Image, page.get_image())
|
||||||
|
scaled_page_im: Image = cast(
|
||||||
|
Image, page.get_image(scale=self.scale)
|
||||||
|
)
|
||||||
|
if cells_orientation:
|
||||||
|
scaled_page_im = scaled_page_im.rotate(
|
||||||
|
-cells_orientation, expand=True
|
||||||
|
)
|
||||||
page_input = {
|
page_input = {
|
||||||
"width": page.size.width * self.scale,
|
"width": scaled_page_im.size[0],
|
||||||
"height": page.size.height * self.scale,
|
"height": scaled_page_im.size[1],
|
||||||
"image": numpy.asarray(page.get_image(scale=self.scale)),
|
"image": numpy.asarray(scaled_page_im),
|
||||||
}
|
}
|
||||||
|
# Rotate and scale table cells
|
||||||
|
in_tables = [
|
||||||
|
(
|
||||||
|
c,
|
||||||
|
[
|
||||||
|
round(x) * self.scale
|
||||||
|
for x in _rotate_bbox(
|
||||||
|
c.bbox,
|
||||||
|
orientation=-cells_orientation,
|
||||||
|
im_size=page_im.size,
|
||||||
|
)
|
||||||
|
.to_top_left_origin(page_im.size[1])
|
||||||
|
.as_tuple()
|
||||||
|
],
|
||||||
|
)
|
||||||
|
for c in in_tables_clusters
|
||||||
|
]
|
||||||
table_clusters, table_bboxes = zip(*in_tables)
|
table_clusters, table_bboxes = zip(*in_tables)
|
||||||
|
|
||||||
if len(table_bboxes):
|
if len(table_bboxes):
|
||||||
@ -238,11 +258,16 @@ class TableStructureModel(BasePageModel):
|
|||||||
scale=self.scale
|
scale=self.scale
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
new_bbox = _rotate_bbox(
|
||||||
|
new_cell.to_bounding_box(),
|
||||||
|
orientation=-cells_orientation,
|
||||||
|
im_size=scaled_page_im.size,
|
||||||
|
).model_dump()
|
||||||
tokens.append(
|
tokens.append(
|
||||||
{
|
{
|
||||||
"id": new_cell.index,
|
"id": new_cell.index,
|
||||||
"text": new_cell.text,
|
"text": new_cell.text,
|
||||||
"bbox": new_cell.rect.to_bounding_box().model_dump(),
|
"bbox": new_bbox,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
page_input["tokens"] = tokens
|
page_input["tokens"] = tokens
|
||||||
@ -302,3 +327,11 @@ class TableStructureModel(BasePageModel):
|
|||||||
)
|
)
|
||||||
|
|
||||||
yield page
|
yield page
|
||||||
|
|
||||||
|
|
||||||
|
def _rotate_bbox(
|
||||||
|
bbox: BoundingBox, *, orientation: int, im_size: Tuple[int, int]
|
||||||
|
) -> BoundingBox:
|
||||||
|
if orientation:
|
||||||
|
return rotate_bounding_box(bbox, orientation, im_size).to_bounding_box()
|
||||||
|
return bbox
|
||||||
|
@ -27,7 +27,6 @@ from docling.utils.ocr_utils import (
|
|||||||
parse_tesseract_orientation,
|
parse_tesseract_orientation,
|
||||||
tesseract_box_to_bounding_rectangle,
|
tesseract_box_to_bounding_rectangle,
|
||||||
)
|
)
|
||||||
from docling.utils.orientation import Box
|
|
||||||
from docling.utils.profiling import TimeRecorder
|
from docling.utils.profiling import TimeRecorder
|
||||||
|
|
||||||
_log = logging.getLogger(__name__)
|
_log = logging.getLogger(__name__)
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from typing import Optional
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
from docling_core.types.doc import BoundingBox, CoordOrigin
|
from docling_core.types.doc import BoundingBox, CoordOrigin
|
||||||
from docling_core.types.doc.page import BoundingRectangle
|
from docling_core.types.doc.page import BoundingRectangle
|
||||||
@ -43,7 +43,9 @@ def tesseract_box_to_bounding_rectangle(
|
|||||||
orientation: int,
|
orientation: int,
|
||||||
im_size: Tuple[int, int],
|
im_size: Tuple[int, int],
|
||||||
) -> BoundingRectangle:
|
) -> BoundingRectangle:
|
||||||
# box is in the top, left, height, width format, top left coordinates
|
# bbox is in the top, left, height, width format, top left coordinates
|
||||||
|
# We detected the tesseract on the document rotated with minus orientation, we have
|
||||||
|
# to apply an orientation angle
|
||||||
rect = rotate_bounding_box(bbox, angle=orientation, im_size=im_size)
|
rect = rotate_bounding_box(bbox, angle=orientation, im_size=im_size)
|
||||||
rect = BoundingRectangle(
|
rect = BoundingRectangle(
|
||||||
r_x0=rect.r_x0 / scale,
|
r_x0=rect.r_x0 / scale,
|
||||||
@ -54,7 +56,7 @@ def tesseract_box_to_bounding_rectangle(
|
|||||||
r_y2=rect.r_y2 / scale,
|
r_y2=rect.r_y2 / scale,
|
||||||
r_x3=rect.r_x3 / scale,
|
r_x3=rect.r_x3 / scale,
|
||||||
r_y3=rect.r_y3 / scale,
|
r_y3=rect.r_y3 / scale,
|
||||||
coord_origin=CoordOrigin.TOPLEFT,
|
coord_origin=rect.coord_origin,
|
||||||
)
|
)
|
||||||
if original_offset is not None:
|
if original_offset is not None:
|
||||||
if original_offset.coord_origin is not CoordOrigin.TOPLEFT:
|
if original_offset.coord_origin is not CoordOrigin.TOPLEFT:
|
||||||
|
@ -1,13 +1,15 @@
|
|||||||
from collections import Counter
|
from collections import Counter
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
|
from typing import Tuple
|
||||||
|
|
||||||
from docling_core.types.doc.page import TextCell
|
from docling_core.types.doc import BoundingBox, CoordOrigin
|
||||||
|
from docling_core.types.doc.page import BoundingRectangle, TextCell
|
||||||
|
|
||||||
_ORIENTATIONS = [0, 90, 180, 270]
|
CLIPPED_ORIENTATIONS = [0, 90, 180, 270]
|
||||||
|
|
||||||
|
|
||||||
def _clipped_orientation(angle: float) -> int:
|
def _clipped_orientation(angle: float) -> int:
|
||||||
return min((abs(angle - o) % 360, o) for o in _ORIENTATIONS)[1]
|
return min((abs(angle - o) % 360, o) for o in CLIPPED_ORIENTATIONS)[1]
|
||||||
|
|
||||||
|
|
||||||
def detect_orientation(cells: list[TextCell]) -> int:
|
def detect_orientation(cells: list[TextCell]) -> int:
|
||||||
@ -15,12 +17,6 @@ def detect_orientation(cells: list[TextCell]) -> int:
|
|||||||
return 0
|
return 0
|
||||||
orientation_counter = Counter(_clipped_orientation(c.rect.angle_360) for c in cells)
|
orientation_counter = Counter(_clipped_orientation(c.rect.angle_360) for c in cells)
|
||||||
return max(orientation_counter.items(), key=itemgetter(1))[0]
|
return max(orientation_counter.items(), key=itemgetter(1))[0]
|
||||||
from typing import Tuple
|
|
||||||
|
|
||||||
from docling_core.types.doc import BoundingBox, CoordOrigin
|
|
||||||
from docling_core.types.doc.page import BoundingRectangle
|
|
||||||
|
|
||||||
CLIPPED_ORIENTATIONS = [0, 90, 180, 270]
|
|
||||||
|
|
||||||
|
|
||||||
def rotate_bounding_box(
|
def rotate_bounding_box(
|
||||||
|
@ -1,3 +1,9 @@
|
|||||||
<document>
|
<document>
|
||||||
<paragraph><location><page_1><loc_12><loc_82><loc_85><loc_91></location>Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package</paragraph>
|
<table>
|
||||||
|
<location><page_1><loc_12><loc_39><loc_67><loc_87></location>
|
||||||
|
<row_0><col_0><body></col_0><col_1><col_header>Column 0</col_1><col_2><col_header>Column 1</col_2><col_3><col_header>Column 2</col_3></row_0>
|
||||||
|
<row_1><col_0><row_header>this is row 0</col_0><col_1><body>some cells</col_1><col_2><body>have content</col_2><col_3><body>and</col_3></row_1>
|
||||||
|
<row_2><col_0><row_header>and row 1</col_0><col_1><body></col_1><col_2><body>other</col_2><col_3><body>have</col_3></row_2>
|
||||||
|
<row_3><col_0><row_header>and last row 2</col_0><col_1><body>nothing</col_1><col_2><body></col_2><col_3><body>inside</col_3></row_3>
|
||||||
|
</table>
|
||||||
</document>
|
</document>
|
@ -1 +1,5 @@
|
|||||||
Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package
|
| | Column 0 | Column 1 | Column 2 |
|
||||||
|
|----------------|------------|--------------|------------|
|
||||||
|
| this is row 0 | some cells | have content | and |
|
||||||
|
| and row 1 | | other | have |
|
||||||
|
| and last row 2 | nothing | | inside |
|
@ -1,3 +0,0 @@
|
|||||||
<document>
|
|
||||||
<paragraph><location><page_1><loc_16><loc_12><loc_18><loc_26></location>package</paragraph>
|
|
||||||
</document>
|
|
@ -1 +0,0 @@
|
|||||||
{"_name": "", "type": "pdf-document", "description": {"title": null, "abstract": null, "authors": null, "affiliations": null, "subjects": null, "keywords": null, "publication_date": null, "languages": null, "license": null, "publishers": null, "url_refs": null, "references": null, "publication": null, "reference_count": null, "citation_count": null, "citation_date": null, "advanced": null, "analytics": null, "logs": [], "collection": null, "acquisition": null}, "file-info": {"filename": "ocr_test_rotated.pdf", "filename-prov": null, "document-hash": "4a282813d93824eaa9bc2a0b2a0d6d626ecc8f5f380bd1320e2dd3e8e53c2ba6", "#-pages": 1, "collection-name": null, "description": null, "page-hashes": [{"hash": "f8a4dc72d8b159f69d0bc968b97f3fb9e0ac59dcb3113492432755835935d9b3", "model": "default", "page": 1}]}, "main-text": [{"prov": [{"bbox": [131.21306574279092, 74.12495603322407, 152.19606490864376, 154.19400205373182], "page": 1, "span": [0, 7], "__ref_s3_data": null}], "text": "package", "type": "paragraph", "payload": null, "name": "Text", "font": null}], "figures": [], "tables": [], "bitmaps": null, "equations": [], "footnotes": [], "page-dimensions": [{"height": 595.201171875, "page": 1, "width": 841.9216918945312}], "page-footers": [], "page-headers": [], "_s3_data": null, "identifiers": null}
|
|
@ -1 +0,0 @@
|
|||||||
package
|
|
@ -1 +0,0 @@
|
|||||||
[{"page_no": 0, "size": {"width": 841.9216918945312, "height": 595.201171875}, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 77.10171546422428, "t": 89.23887398109309, "r": 96.6831586150625, "b": 520.7638577050515, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 100.55299576256091, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}}], "predictions": {"layout": {"clusters": [{"id": 0, "label": "page_header", "bbox": {"l": 77.10171546422428, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}, "confidence": 0.6016772389411926, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 77.10171546422428, "t": 89.23887398109309, "r": 96.6831586150625, "b": 520.7638577050515, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 100.55299576256091, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}}], "children": []}, {"id": 1, "label": "text", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}, "confidence": 0.5234212875366211, "cells": [{"id": 2, "text": "package", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}}], "children": []}]}, "tablestructure": {"table_map": {}}, "figures_classification": null, "equations_prediction": null, "vlm_response": null}, "assembled": {"elements": [{"label": "page_header", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "page_header", "bbox": {"l": 77.10171546422428, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}, "confidence": 0.6016772389411926, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 77.10171546422428, "t": 89.23887398109309, "r": 96.6831586150625, "b": 520.7638577050515, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 100.55299576256091, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"}, {"label": "text", "id": 1, "page_no": 0, "cluster": {"id": 1, "label": "text", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}, "confidence": 0.5234212875366211, "cells": [{"id": 2, "text": "package", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "package"}], "body": [{"label": "text", "id": 1, "page_no": 0, "cluster": {"id": 1, "label": "text", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}, "confidence": 0.5234212875366211, "cells": [{"id": 2, "text": "package", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "package"}], "headers": [{"label": "page_header", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "page_header", "bbox": {"l": 77.10171546422428, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}, "confidence": 0.6016772389411926, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 77.10171546422428, "t": 89.23887398109309, "r": 96.6831586150625, "b": 520.7638577050515, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 100.55299576256091, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"}]}}]
|
|
@ -1,5 +1,9 @@
|
|||||||
<document>
|
<document>
|
||||||
<paragraph><location><page_1><loc_75><loc_16><loc_88><loc_18></location>package</paragraph>
|
<table>
|
||||||
<paragraph><location><page_1><loc_15><loc_12><loc_88><loc_15></location>JSON and Markdown in an easy self contained</paragraph>
|
<location><page_1><loc_33><loc_13><loc_88><loc_61></location>
|
||||||
<paragraph><location><page_1><loc_15><loc_9><loc_88><loc_11></location>Docling bundles PDF document conversion to</paragraph>
|
<row_0><col_0><col_header>inside</col_0><col_1><body></col_1><col_2><col_header>nothing</col_2><col_3><col_header>and last row 2</col_3></row_0>
|
||||||
|
<row_1><col_0><body>have</col_0><col_1><body>other</col_1><col_2><body></col_2><col_3><body>and row 1</col_3></row_1>
|
||||||
|
<row_2><col_0><body>and</col_0><col_1><body>have content</col_1><col_2><body>some cells</col_2><col_3><body>this is row 0</col_3></row_2>
|
||||||
|
<row_3><col_0><body>Column 2</col_0><col_1><body>Column 1</col_1><col_2><body>Column 0</col_2><col_3><body></col_3></row_3>
|
||||||
|
</table>
|
||||||
</document>
|
</document>
|
@ -1,5 +1,5 @@
|
|||||||
package
|
| inside | | nothing | and last row 2 |
|
||||||
|
|----------|--------------|------------|------------------|
|
||||||
JSON and Markdown in an easy self contained
|
| have | other | | and row 1 |
|
||||||
|
| and | have content | some cells | this is row 0 |
|
||||||
Docling bundles PDF document conversion to
|
| Column 2 | Column 1 | Column 0 | |
|
@ -1,3 +1,9 @@
|
|||||||
<document>
|
<document>
|
||||||
<paragraph><location><page_1><loc_82><loc_75><loc_84><loc_88></location>package</paragraph>
|
<table>
|
||||||
|
<location><page_1><loc_39><loc_33><loc_87><loc_88></location>
|
||||||
|
<row_0><col_0><body>and last row 2</col_0><col_1><body>and row 1</col_1><col_2><body>this is row 0</col_2><col_3><body></col_3></row_0>
|
||||||
|
<row_1><col_0><body>nothing</col_0><col_1><body></col_1><col_2><body>some cells</col_2><col_3><body>Column 0</col_3></row_1>
|
||||||
|
<row_2><col_0><body></col_0><col_1><body>other</col_1><col_2><body>have content</col_2><col_3><body>Column 1</col_3></row_2>
|
||||||
|
<row_3><col_0><body>inside</col_0><col_1><body>have</col_1><col_2><body>and</col_2><col_3><body>Column 2</col_3></row_3>
|
||||||
|
</table>
|
||||||
</document>
|
</document>
|
@ -1 +1,5 @@
|
|||||||
package
|
| and last row 2 | and row 1 | this is row 0 | |
|
||||||
|
|------------------|-------------|-----------------|----------|
|
||||||
|
| nothing | | some cells | Column 0 |
|
||||||
|
| | other | have content | Column 1 |
|
||||||
|
| inside | have | and | Column 2 |
|
@ -1,4 +1,9 @@
|
|||||||
<document>
|
<document>
|
||||||
<paragraph><location><page_1><loc_9><loc_12><loc_11><loc_85></location>Docling bundles PDF document conversion to</paragraph>
|
<table>
|
||||||
<paragraph><location><page_1><loc_12><loc_12><loc_15><loc_85></location><location><page_1><loc_12><loc_12><loc_15><loc_85></location>JSON and Markdown in an easy self contained package</paragraph>
|
<location><page_1><loc_13><loc_12><loc_61><loc_67></location>
|
||||||
|
<row_0><col_0><body>Column 2</col_0><col_1><body>and</col_1><col_2><body>have</col_2><col_3><body>inside</col_3></row_0>
|
||||||
|
<row_1><col_0><body>Column 1</col_0><col_1><body>have content</col_1><col_2><body>other</col_2><col_3><body></col_3></row_1>
|
||||||
|
<row_2><col_0><body>Column 0</col_0><col_1><body>some cells</col_1><col_2><body></col_2><col_3><body>nothing</col_3></row_2>
|
||||||
|
<row_3><col_0><body></col_0><col_1><body>this is row 0</col_1><col_2><body>and row 1</col_2><col_3><body>and last row 2</col_3></row_3>
|
||||||
|
</table>
|
||||||
</document>
|
</document>
|
@ -27,53 +27,468 @@
|
|||||||
"file-info": {
|
"file-info": {
|
||||||
"filename": "ocr_test_rotated_90.pdf",
|
"filename": "ocr_test_rotated_90.pdf",
|
||||||
"filename-prov": null,
|
"filename-prov": null,
|
||||||
"document-hash": "4a282813d93824eaa9bc2a0b2a0d6d626ecc8f5f380bd1320e2dd3e8e53c2ba6",
|
"document-hash": "2fb20caf4f54c878a0b454b496010d92adc6ae1b7f10fbd9ba1ba26260f818a8",
|
||||||
"#-pages": 1,
|
"#-pages": 1,
|
||||||
"collection-name": null,
|
"collection-name": null,
|
||||||
"description": null,
|
"description": null,
|
||||||
"page-hashes": [
|
"page-hashes": [
|
||||||
{
|
{
|
||||||
"hash": "f8a4dc72d8b159f69d0bc968b97f3fb9e0ac59dcb3113492432755835935d9b3",
|
"hash": "56c847ad7c5ab9f0346a325510af001ab66a9bb45f65ffc7bbfc60c929def7d2",
|
||||||
"model": "default",
|
"model": "default",
|
||||||
"page": 1
|
"page": 1
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"main-text": [
|
"main-text": [
|
||||||
|
{
|
||||||
|
"name": "Table",
|
||||||
|
"type": "table",
|
||||||
|
"$ref": "#/tables/0"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"figures": [],
|
||||||
|
"tables": [
|
||||||
{
|
{
|
||||||
"prov": [
|
"prov": [
|
||||||
{
|
{
|
||||||
"bbox": [
|
"bbox": [
|
||||||
131.21306574279092,
|
75.13359832763672,
|
||||||
74.12495603322407,
|
102.99908447265625,
|
||||||
152.19606490864376,
|
361.18695068359375,
|
||||||
154.19400205373182
|
562.1403198242188
|
||||||
],
|
],
|
||||||
"page": 1,
|
"page": 1,
|
||||||
"span": [
|
"span": [
|
||||||
0,
|
0,
|
||||||
7
|
0
|
||||||
],
|
],
|
||||||
"__ref_s3_data": null
|
"__ref_s3_data": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"text": "package",
|
"text": "",
|
||||||
"type": "paragraph",
|
"type": "table",
|
||||||
"payload": null,
|
"payload": null,
|
||||||
"name": "Text",
|
"#-cols": 4,
|
||||||
"font": null
|
"#-rows": 4,
|
||||||
|
"data": [
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"bbox": [
|
||||||
|
105.0718660651769,
|
||||||
|
304.7354643560275,
|
||||||
|
119.73306194406335,
|
||||||
|
369.59883715876185
|
||||||
|
],
|
||||||
|
"spans": [
|
||||||
|
[
|
||||||
|
0,
|
||||||
|
0
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"text": "Column 2",
|
||||||
|
"type": "body",
|
||||||
|
"col": 0,
|
||||||
|
"col-header": false,
|
||||||
|
"col-span": [
|
||||||
|
0,
|
||||||
|
1
|
||||||
|
],
|
||||||
|
"row": 0,
|
||||||
|
"row-header": false,
|
||||||
|
"row-span": [
|
||||||
|
0,
|
||||||
|
1
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"bbox": [
|
||||||
|
172.26899264661517,
|
||||||
|
324.3168597625203,
|
||||||
|
188.15195177751215,
|
||||||
|
352.46511670018316
|
||||||
|
],
|
||||||
|
"spans": [
|
||||||
|
[
|
||||||
|
0,
|
||||||
|
1
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"text": "and",
|
||||||
|
"type": "body",
|
||||||
|
"col": 1,
|
||||||
|
"col-header": false,
|
||||||
|
"col-span": [
|
||||||
|
1,
|
||||||
|
2
|
||||||
|
],
|
||||||
|
"row": 0,
|
||||||
|
"row-header": false,
|
||||||
|
"row-span": [
|
||||||
|
0,
|
||||||
|
1
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"bbox": [
|
||||||
|
240.68788382926402,
|
||||||
|
321.869185135892,
|
||||||
|
256.570842960161,
|
||||||
|
356.13662847492196
|
||||||
|
],
|
||||||
|
"spans": [
|
||||||
|
[
|
||||||
|
0,
|
||||||
|
2
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"text": "have",
|
||||||
|
"type": "body",
|
||||||
|
"col": 2,
|
||||||
|
"col-header": false,
|
||||||
|
"col-span": [
|
||||||
|
2,
|
||||||
|
3
|
||||||
|
],
|
||||||
|
"row": 0,
|
||||||
|
"row-header": false,
|
||||||
|
"row-span": [
|
||||||
|
0,
|
||||||
|
1
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"bbox": [
|
||||||
|
312.772072637728,
|
||||||
|
319.42151173034614,
|
||||||
|
326.21150018118874,
|
||||||
|
359.8081389276117
|
||||||
|
],
|
||||||
|
"spans": [
|
||||||
|
[
|
||||||
|
0,
|
||||||
|
3
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"text": "inside",
|
||||||
|
"type": "body",
|
||||||
|
"col": 3,
|
||||||
|
"col-header": false,
|
||||||
|
"col-span": [
|
||||||
|
3,
|
||||||
|
4
|
||||||
|
],
|
||||||
|
"row": 0,
|
||||||
|
"row-header": false,
|
||||||
|
"row-span": [
|
||||||
|
0,
|
||||||
|
1
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"bbox": [
|
||||||
|
105.0718660651769,
|
||||||
|
419.77616156495424,
|
||||||
|
119.73306194406335,
|
||||||
|
483.4156981046677
|
||||||
|
],
|
||||||
|
"spans": [
|
||||||
|
[
|
||||||
|
1,
|
||||||
|
0
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"text": "Column 1",
|
||||||
|
"type": "body",
|
||||||
|
"col": 0,
|
||||||
|
"col-header": false,
|
||||||
|
"col-span": [
|
||||||
|
0,
|
||||||
|
1
|
||||||
|
],
|
||||||
|
"row": 1,
|
||||||
|
"row-header": false,
|
||||||
|
"row-span": [
|
||||||
|
1,
|
||||||
|
2
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"bbox": [
|
||||||
|
172.26898999097682,
|
||||||
|
408.7616301134671,
|
||||||
|
185.70842261785268,
|
||||||
|
495.6540658231026
|
||||||
|
],
|
||||||
|
"spans": [
|
||||||
|
[
|
||||||
|
1,
|
||||||
|
1
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"text": "have content",
|
||||||
|
"type": "body",
|
||||||
|
"col": 1,
|
||||||
|
"col-header": false,
|
||||||
|
"col-span": [
|
||||||
|
1,
|
||||||
|
2
|
||||||
|
],
|
||||||
|
"row": 1,
|
||||||
|
"row-header": false,
|
||||||
|
"row-span": [
|
||||||
|
1,
|
||||||
|
2
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"bbox": [
|
||||||
|
240.68788377535307,
|
||||||
|
433.23837164942523,
|
||||||
|
255.34907711253194,
|
||||||
|
468.729651251476
|
||||||
|
],
|
||||||
|
"spans": [
|
||||||
|
[
|
||||||
|
1,
|
||||||
|
2
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"text": "other",
|
||||||
|
"type": "body",
|
||||||
|
"col": 2,
|
||||||
|
"col-header": false,
|
||||||
|
"col-span": [
|
||||||
|
2,
|
||||||
|
3
|
||||||
|
],
|
||||||
|
"row": 1,
|
||||||
|
"row-header": false,
|
||||||
|
"row-span": [
|
||||||
|
1,
|
||||||
|
2
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"bbox": null,
|
||||||
|
"spans": [
|
||||||
|
[
|
||||||
|
1,
|
||||||
|
3
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"text": "",
|
||||||
|
"type": "body"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"bbox": [
|
||||||
|
105.07186605295925,
|
||||||
|
532.3691850430223,
|
||||||
|
119.73306193184567,
|
||||||
|
597.2325578457567
|
||||||
|
],
|
||||||
|
"spans": [
|
||||||
|
[
|
||||||
|
2,
|
||||||
|
0
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"text": "Column 0",
|
||||||
|
"type": "body",
|
||||||
|
"col": 0,
|
||||||
|
"col-header": false,
|
||||||
|
"col-span": [
|
||||||
|
0,
|
||||||
|
1
|
||||||
|
],
|
||||||
|
"row": 2,
|
||||||
|
"row-header": false,
|
||||||
|
"row-span": [
|
||||||
|
2,
|
||||||
|
3
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"bbox": [
|
||||||
|
172.26899069197702,
|
||||||
|
529.9215107729757,
|
||||||
|
186.93018720629036,
|
||||||
|
600.9040699770771
|
||||||
|
],
|
||||||
|
"spans": [
|
||||||
|
[
|
||||||
|
2,
|
||||||
|
1
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"text": "some cells",
|
||||||
|
"type": "body",
|
||||||
|
"col": 1,
|
||||||
|
"col-header": false,
|
||||||
|
"col-span": [
|
||||||
|
1,
|
||||||
|
2
|
||||||
|
],
|
||||||
|
"row": 2,
|
||||||
|
"row-header": false,
|
||||||
|
"row-span": [
|
||||||
|
2,
|
||||||
|
3
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"bbox": null,
|
||||||
|
"spans": [
|
||||||
|
[
|
||||||
|
2,
|
||||||
|
2
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"text": "",
|
||||||
|
"type": "body"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"bbox": [
|
||||||
|
311.49999737299976,
|
||||||
|
536.775000315586,
|
||||||
|
332.5000022770002,
|
||||||
|
592.9083316144141
|
||||||
|
],
|
||||||
|
"spans": [
|
||||||
|
[
|
||||||
|
2,
|
||||||
|
3
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"text": "nothing",
|
||||||
|
"type": "body",
|
||||||
|
"col": 3,
|
||||||
|
"col-header": false,
|
||||||
|
"col-span": [
|
||||||
|
3,
|
||||||
|
4
|
||||||
|
],
|
||||||
|
"row": 2,
|
||||||
|
"row-header": false,
|
||||||
|
"row-span": [
|
||||||
|
2,
|
||||||
|
3
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"bbox": null,
|
||||||
|
"spans": [
|
||||||
|
[
|
||||||
|
3,
|
||||||
|
0
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"text": "",
|
||||||
|
"type": "body"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"bbox": [
|
||||||
|
172.2689900422697,
|
||||||
|
638.8430233885732,
|
||||||
|
186.93018846286373,
|
||||||
|
719.6162777831045
|
||||||
|
],
|
||||||
|
"spans": [
|
||||||
|
[
|
||||||
|
3,
|
||||||
|
1
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"text": "this is row 0",
|
||||||
|
"type": "body",
|
||||||
|
"col": 1,
|
||||||
|
"col-header": false,
|
||||||
|
"col-span": [
|
||||||
|
1,
|
||||||
|
2
|
||||||
|
],
|
||||||
|
"row": 3,
|
||||||
|
"row-header": false,
|
||||||
|
"row-span": [
|
||||||
|
3,
|
||||||
|
4
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"bbox": [
|
||||||
|
240.68788248006402,
|
||||||
|
647.4098827174411,
|
||||||
|
255.34907835895044,
|
||||||
|
712.2732555201754
|
||||||
|
],
|
||||||
|
"spans": [
|
||||||
|
[
|
||||||
|
3,
|
||||||
|
2
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"text": "and row 1",
|
||||||
|
"type": "body",
|
||||||
|
"col": 2,
|
||||||
|
"col-header": false,
|
||||||
|
"col-span": [
|
||||||
|
2,
|
||||||
|
3
|
||||||
|
],
|
||||||
|
"row": 3,
|
||||||
|
"row-header": false,
|
||||||
|
"row-span": [
|
||||||
|
3,
|
||||||
|
4
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"bbox": [
|
||||||
|
313.9938353514431,
|
||||||
|
633.9476737903873,
|
||||||
|
327.43326861374595,
|
||||||
|
725.735464724632
|
||||||
|
],
|
||||||
|
"spans": [
|
||||||
|
[
|
||||||
|
3,
|
||||||
|
3
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"text": "and last row 2",
|
||||||
|
"type": "body",
|
||||||
|
"col": 3,
|
||||||
|
"col-header": false,
|
||||||
|
"col-span": [
|
||||||
|
3,
|
||||||
|
4
|
||||||
|
],
|
||||||
|
"row": 3,
|
||||||
|
"row-header": false,
|
||||||
|
"row-span": [
|
||||||
|
3,
|
||||||
|
4
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"model": null,
|
||||||
|
"bounding-box": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"figures": [],
|
|
||||||
"tables": [],
|
|
||||||
"bitmaps": null,
|
"bitmaps": null,
|
||||||
"equations": [],
|
"equations": [],
|
||||||
"footnotes": [],
|
"footnotes": [],
|
||||||
"page-dimensions": [
|
"page-dimensions": [
|
||||||
{
|
{
|
||||||
"height": 595.201171875,
|
"height": 842.0,
|
||||||
"page": 1,
|
"page": 1,
|
||||||
"width": 841.9216918945312
|
"width": 595.0
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"page-footers": [],
|
"page-footers": [],
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
Docling bundles PDF document conversion to
|
| Column 2 | and | have | inside |
|
||||||
|
|------------|---------------|-----------|----------------|
|
||||||
JSON and Markdown in an easy self contained package
|
| Column 1 | have content | other | |
|
||||||
|
| Column 0 | some cells | | nothing |
|
||||||
|
| | this is row 0 | and row 1 | and last row 2 |
|
@ -1 +1,5 @@
|
|||||||
Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package
|
| | Column 0 | Column 1 | Column 2 |
|
||||||
|
|----------------|------------|--------------|------------|
|
||||||
|
| this is row 0 | some cells | have content | and |
|
||||||
|
| and row 1 | | other | have |
|
||||||
|
| and last row 2 | nothing | | inside |
|
@ -1,5 +1,5 @@
|
|||||||
package
|
| inside | | nothing | and last row 2 |
|
||||||
|
|----------|--------------|------------|------------------|
|
||||||
JSON and Markdown in an easy self contained
|
| have | other | | and row 1 |
|
||||||
|
| and | have content | some cells | this is row 0 |
|
||||||
Docling bundles PDF document conversion to
|
| Column 2 | Column 1 | Column 0 | |
|
@ -1 +1,5 @@
|
|||||||
package
|
| and last row 2 | and row 1 | this is row 0 | |
|
||||||
|
|------------------|-------------|-----------------|----------|
|
||||||
|
| nothing | | some cells | Column 0 |
|
||||||
|
| | other | have content | Column 1 |
|
||||||
|
| inside | have | and | Column 2 |
|
@ -1,3 +1,5 @@
|
|||||||
Docling bundles PDF document conversion to
|
| Column 2 | and | have | inside |
|
||||||
|
|------------|---------------|-----------|----------------|
|
||||||
JSON and Markdown in an easy self contained package
|
| Column 1 | have content | other | |
|
||||||
|
| Column 0 | some cells | | nothing |
|
||||||
|
| | this is row 0 | and row 1 | and last row 2 |
|
BIN
tests/data_scanned/ocr_test.pdf
vendored
BIN
tests/data_scanned/ocr_test.pdf
vendored
Binary file not shown.
BIN
tests/data_scanned/ocr_test_rotated_180.pdf
vendored
BIN
tests/data_scanned/ocr_test_rotated_180.pdf
vendored
Binary file not shown.
BIN
tests/data_scanned/ocr_test_rotated_270.pdf
vendored
BIN
tests/data_scanned/ocr_test_rotated_270.pdf
vendored
Binary file not shown.
BIN
tests/data_scanned/ocr_test_rotated_90.pdf
vendored
BIN
tests/data_scanned/ocr_test_rotated_90.pdf
vendored
Binary file not shown.
Loading…
Reference in New Issue
Block a user