Files
docling/tests/test_backend_image_native.py
Peter W. J. Staar 3495b73de8 feat: add the Image backend (#2627)
* feat: add the Image backend

Signed-off-by: Peter Staar <taa@zurich.ibm.com>

* fixed the pre-commit

Signed-off-by: Peter Staar <taa@zurich.ibm.com>

* Fixed single- versus multi-frame image formats

Signed-off-by: Peter Staar <taa@zurich.ibm.com>

* fix: Proper usage of ImageDocumentBackend in the pipeline, deprecate old code.

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* fix: Adapt tests

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* fix: correct mets_gbs backend test

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* fix: Make ImagePageBackend.get_bitmap_rects() yield

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

---------

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
Co-authored-by: Christoph Auer <cau@zurich.ibm.com>
2025-11-17 11:37:22 +01:00

219 lines
7.7 KiB
Python

from io import BytesIO
from pathlib import Path
import pytest
from docling_core.types.doc import BoundingBox, CoordOrigin
from PIL import Image
from docling.backend.image_backend import ImageDocumentBackend, _ImagePageBackend
from docling.datamodel.base_models import DocumentStream, InputFormat
from docling.datamodel.document import InputDocument, _DocumentConversionInput
from docling.document_converter import DocumentConverter, ImageFormatOption
from docling.document_extractor import DocumentExtractor
def _make_png_stream(
width: int = 64, height: int = 48, color=(123, 45, 67)
) -> DocumentStream:
img = Image.new("RGB", (width, height), color)
buf = BytesIO()
img.save(buf, format="PNG")
buf.seek(0)
return DocumentStream(name="test.png", stream=buf)
def _make_multipage_tiff_stream(num_pages: int = 3, size=(32, 32)) -> DocumentStream:
frames = [
Image.new("RGB", size, (i * 10 % 255, i * 20 % 255, i * 30 % 255))
for i in range(num_pages)
]
buf = BytesIO()
frames[0].save(buf, format="TIFF", save_all=True, append_images=frames[1:])
buf.seek(0)
return DocumentStream(name="test.tiff", stream=buf)
def test_docs_builder_uses_image_backend_for_image_stream():
stream = _make_png_stream()
conv_input = _DocumentConversionInput(path_or_stream_iterator=[stream])
# Provide format options mapping that includes IMAGE -> ImageFormatOption (which carries ImageDocumentBackend)
format_options = {InputFormat.IMAGE: ImageFormatOption()}
docs = list(conv_input.docs(format_options))
assert len(docs) == 1
in_doc = docs[0]
assert in_doc.format == InputFormat.IMAGE
assert isinstance(in_doc._backend, ImageDocumentBackend)
assert in_doc.page_count == 1
def test_docs_builder_multipage_tiff_counts_frames():
stream = _make_multipage_tiff_stream(num_pages=4)
conv_input = _DocumentConversionInput(path_or_stream_iterator=[stream])
format_options = {InputFormat.IMAGE: ImageFormatOption()}
in_doc = next(conv_input.docs(format_options))
assert isinstance(in_doc._backend, ImageDocumentBackend)
assert in_doc.page_count == 4
def test_converter_default_maps_image_to_image_backend():
converter = DocumentConverter(allowed_formats=[InputFormat.IMAGE])
backend_cls = converter.format_to_options[InputFormat.IMAGE].backend
assert backend_cls is ImageDocumentBackend
def test_extractor_default_maps_image_to_image_backend():
extractor = DocumentExtractor(allowed_formats=[InputFormat.IMAGE])
backend_cls = extractor.extraction_format_to_options[InputFormat.IMAGE].backend
assert backend_cls is ImageDocumentBackend
def _get_backend_from_stream(stream: DocumentStream):
"""Helper to create InputDocument with ImageDocumentBackend from a stream."""
in_doc = InputDocument(
path_or_stream=stream.stream,
format=InputFormat.IMAGE,
backend=ImageDocumentBackend,
filename=stream.name,
)
return in_doc._backend
def test_num_pages_single():
"""Test page count for single-page image."""
stream = _make_png_stream(width=100, height=80)
doc_backend = _get_backend_from_stream(stream)
assert doc_backend.page_count() == 1
def test_num_pages_multipage():
"""Test page count for multi-page TIFF."""
stream = _make_multipage_tiff_stream(num_pages=5, size=(64, 64))
doc_backend = _get_backend_from_stream(stream)
assert doc_backend.page_count() == 5
def test_get_size():
"""Test getting page size."""
width, height = 120, 90
stream = _make_png_stream(width=width, height=height)
doc_backend = _get_backend_from_stream(stream)
page_backend: _ImagePageBackend = doc_backend.load_page(0)
size = page_backend.get_size()
assert size.width == width
assert size.height == height
def test_get_page_image_full():
"""Test getting full page image."""
width, height = 100, 80
stream = _make_png_stream(width=width, height=height)
doc_backend = _get_backend_from_stream(stream)
page_backend: _ImagePageBackend = doc_backend.load_page(0)
img = page_backend.get_page_image()
assert img.width == width
assert img.height == height
def test_get_page_image_scaled():
"""Test getting scaled page image."""
width, height = 100, 80
scale = 2.0
stream = _make_png_stream(width=width, height=height)
doc_backend = _get_backend_from_stream(stream)
page_backend: _ImagePageBackend = doc_backend.load_page(0)
img = page_backend.get_page_image(scale=scale)
assert img.width == round(width * scale)
assert img.height == round(height * scale)
def test_crop_page_image():
"""Test cropping page image."""
width, height = 200, 150
stream = _make_png_stream(width=width, height=height)
doc_backend = _get_backend_from_stream(stream)
page_backend: _ImagePageBackend = doc_backend.load_page(0)
# Crop a region from the center
cropbox = BoundingBox(l=50, t=30, r=150, b=120, coord_origin=CoordOrigin.TOPLEFT)
img = page_backend.get_page_image(cropbox=cropbox)
assert img.width == 100 # 150 - 50
assert img.height == 90 # 120 - 30
def test_crop_page_image_scaled():
"""Test cropping and scaling page image."""
width, height = 200, 150
scale = 0.5
stream = _make_png_stream(width=width, height=height)
doc_backend = _get_backend_from_stream(stream)
page_backend: _ImagePageBackend = doc_backend.load_page(0)
cropbox = BoundingBox(l=50, t=30, r=150, b=120, coord_origin=CoordOrigin.TOPLEFT)
img = page_backend.get_page_image(scale=scale, cropbox=cropbox)
assert img.width == round(100 * scale) # cropped width * scale
assert img.height == round(90 * scale) # cropped height * scale
def test_get_bitmap_rects():
"""Test getting bitmap rects - should return full page rectangle."""
width, height = 100, 80
stream = _make_png_stream(width=width, height=height)
doc_backend = _get_backend_from_stream(stream)
page_backend: _ImagePageBackend = doc_backend.load_page(0)
rects = list(page_backend.get_bitmap_rects())
assert len(rects) == 1
bbox = rects[0]
assert bbox.l == 0.0
assert bbox.t == 0.0
assert bbox.r == float(width)
assert bbox.b == float(height)
assert bbox.coord_origin == CoordOrigin.TOPLEFT
def test_get_bitmap_rects_scaled():
"""Test getting bitmap rects with scaling."""
width, height = 100, 80
scale = 2.0
stream = _make_png_stream(width=width, height=height)
doc_backend = _get_backend_from_stream(stream)
page_backend: _ImagePageBackend = doc_backend.load_page(0)
rects = list(page_backend.get_bitmap_rects(scale=scale))
assert len(rects) == 1
bbox = rects[0]
assert bbox.l == 0.0
assert bbox.t == 0.0
assert bbox.r == float(width * scale)
assert bbox.b == float(height * scale)
assert bbox.coord_origin == CoordOrigin.TOPLEFT
def test_get_text_in_rect():
"""Test that get_text_in_rect returns empty string for images (no OCR)."""
stream = _make_png_stream()
doc_backend = _get_backend_from_stream(stream)
page_backend: _ImagePageBackend = doc_backend.load_page(0)
bbox = BoundingBox(l=10, t=10, r=50, b=50, coord_origin=CoordOrigin.TOPLEFT)
text = page_backend.get_text_in_rect(bbox)
assert text == ""
def test_multipage_access():
"""Test accessing different pages in multi-page image."""
num_pages = 4
stream = _make_multipage_tiff_stream(num_pages=num_pages, size=(64, 64))
doc_backend = _get_backend_from_stream(stream)
assert doc_backend.page_count() == num_pages
# Access each page
for i in range(num_pages):
page_backend = doc_backend.load_page(i)
assert page_backend.is_valid()
size = page_backend.get_size()
assert size.width == 64
assert size.height == 64