feat: add the Image backend (#2627)

* feat: add the Image backend Signed-off-by: Peter Staar <taa@zurich.ibm.com> * fixed the pre-commit Signed-off-by: Peter Staar <taa@zurich.ibm.com> * Fixed single- versus multi-frame image formats Signed-off-by: Peter Staar <taa@zurich.ibm.com> * fix: Proper usage of ImageDocumentBackend in the pipeline, deprecate old code. Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * fix: Adapt tests Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * fix: correct mets_gbs backend test Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * fix: Make ImagePageBackend.get_bitmap_rects() yield Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: Peter Staar <taa@zurich.ibm.com> Signed-off-by: Christoph Auer <cau@zurich.ibm.com> Co-authored-by: Christoph Auer <cau@zurich.ibm.com>
2025-12-08 12:48:28 +00:00 · 2025-11-17 11:37:22 +01:00
parent ae30373ee7
commit 3495b73de8
12 changed files with 494 additions and 82 deletions
--- a/docling/backend/image_backend.py
+++ b/docling/backend/image_backend.py
@@ -0,0 +1,188 @@
+import logging
+from io import BytesIO
+from pathlib import Path
+from typing import Iterable, List, Optional, Union
+
+from docling_core.types.doc import BoundingBox, CoordOrigin
+from docling_core.types.doc.page import (
+    BoundingRectangle,
+    PdfPageBoundaryType,
+    PdfPageGeometry,
+    SegmentedPdfPage,
+    TextCell,
+)
+from PIL import Image
+
+from docling.backend.abstract_backend import AbstractDocumentBackend
+from docling.backend.pdf_backend import PdfDocumentBackend, PdfPageBackend
+from docling.datamodel.backend_options import PdfBackendOptions
+from docling.datamodel.base_models import InputFormat, Size
+from docling.datamodel.document import InputDocument
+
+_log = logging.getLogger(__name__)
+
+
+class _ImagePageBackend(PdfPageBackend):
+    def __init__(self, image: Image.Image):
+        self._image: Optional[Image.Image] = image
+        self.valid: bool = self._image is not None
+
+    def is_valid(self) -> bool:
+        return self.valid
+
+    def get_text_in_rect(self, bbox: BoundingBox) -> str:
+        # No text extraction from raw images without OCR
+        return ""
+
+    def get_segmented_page(self) -> SegmentedPdfPage:
+        # Return empty segmented page with proper dimensions for raw images
+        assert self._image is not None
+        page_size = self.get_size()
+        bbox = BoundingBox(
+            l=0.0,
+            t=0.0,
+            r=float(page_size.width),
+            b=float(page_size.height),
+            coord_origin=CoordOrigin.BOTTOMLEFT,
+        )
+        dimension = PdfPageGeometry(
+            angle=0.0,
+            rect=BoundingRectangle.from_bounding_box(bbox),
+            boundary_type=PdfPageBoundaryType.CROP_BOX,
+            art_bbox=bbox,
+            bleed_bbox=bbox,
+            crop_bbox=bbox,
+            media_bbox=bbox,
+            trim_bbox=bbox,
+        )
+        return SegmentedPdfPage(
+            dimension=dimension,
+            char_cells=[],
+            word_cells=[],
+            textline_cells=[],
+            has_chars=False,
+            has_words=False,
+            has_lines=False,
+        )
+
+    def get_text_cells(self) -> Iterable[TextCell]:
+        # No text cells on raw images
+        return []
+
+    def get_bitmap_rects(self, scale: float = 1) -> Iterable[BoundingBox]:
+        # For raw images, the entire page is a bitmap
+        assert self._image is not None
+        page_size = self.get_size()
+        full_page_bbox = BoundingBox(
+            l=0.0,
+            t=0.0,
+            r=float(page_size.width),
+            b=float(page_size.height),
+            coord_origin=CoordOrigin.TOPLEFT,
+        )
+        if scale != 1:
+            full_page_bbox = full_page_bbox.scaled(scale=scale)
+        yield full_page_bbox
+
+    def get_page_image(
+        self, scale: float = 1, cropbox: Optional[BoundingBox] = None
+    ) -> Image.Image:
+        assert self._image is not None
+        img = self._image
+
+        if cropbox is not None:
+            # Expected cropbox comes in TOPLEFT coords in our pipeline
+            if cropbox.coord_origin != CoordOrigin.TOPLEFT:
+                # Convert to TOPLEFT relative to current image height
+                cropbox = cropbox.to_top_left_origin(img.height)
+            left, top, right, bottom = cropbox.as_tuple()
+            left = max(0, round(left))
+            top = max(0, round(top))
+            right = min(img.width, round(right))
+            bottom = min(img.height, round(bottom))
+            img = img.crop((left, top, right, bottom))
+
+        if scale != 1:
+            new_w = max(1, round(img.width * scale))
+            new_h = max(1, round(img.height * scale))
+            img = img.resize((new_w, new_h))
+
+        return img
+
+    def get_size(self) -> Size:
+        assert self._image is not None
+        return Size(width=self._image.width, height=self._image.height)
+
+    def unload(self):
+        # Help GC and free memory
+        self._image = None
+
+
+class ImageDocumentBackend(PdfDocumentBackend):
+    """Image-native backend that bypasses pypdfium2.
+
+    Notes:
+        - Subclasses PdfDocumentBackend to satisfy pipeline type checks.
+        - Intentionally avoids calling PdfDocumentBackend.__init__ to skip
+          the image→PDF conversion and any pypdfium2 usage.
+        - Handles multi-page TIFF by extracting frames eagerly to separate
+          Image objects to keep thread-safety when pages process in parallel.
+    """
+
+    def __init__(
+        self,
+        in_doc: InputDocument,
+        path_or_stream: Union[BytesIO, Path],
+        options: PdfBackendOptions = PdfBackendOptions(),
+    ):
+        # Bypass PdfDocumentBackend.__init__ to avoid image→PDF conversion
+        AbstractDocumentBackend.__init__(self, in_doc, path_or_stream, options)
+        self.options: PdfBackendOptions = options
+
+        if self.input_format not in {InputFormat.IMAGE}:
+            raise RuntimeError(
+                f"Incompatible file format {self.input_format} was passed to ImageDocumentBackend."
+            )
+
+        # Load frames eagerly for thread-safety across pages
+        self._frames: List[Image.Image] = []
+        try:
+            img = Image.open(self.path_or_stream)  # type: ignore[arg-type]
+
+            # Handle multi-frame and single-frame images
+            # - multiframe formats: TIFF, GIF, ICO
+            # - singleframe formats: JPEG (.jpg, .jpeg), PNG (.png), BMP, WEBP (unless animated), HEIC
+            frame_count = getattr(img, "n_frames", 1)
+
+            if frame_count > 1:
+                for i in range(frame_count):
+                    img.seek(i)
+                    self._frames.append(img.copy().convert("RGB"))
+            else:
+                self._frames.append(img.convert("RGB"))
+        except Exception as e:
+            raise RuntimeError(f"Could not load image for document {self.file}") from e
+
+    def is_valid(self) -> bool:
+        return len(self._frames) > 0
+
+    def page_count(self) -> int:
+        return len(self._frames)
+
+    def load_page(self, page_no: int) -> _ImagePageBackend:
+        if not (0 <= page_no < len(self._frames)):
+            raise IndexError(f"Page index out of range: {page_no}")
+        return _ImagePageBackend(self._frames[page_no])
+
+    @classmethod
+    def supported_formats(cls) -> set[InputFormat]:
+        # Only IMAGE here; PDF handling remains in PDF-oriented backends
+        return {InputFormat.IMAGE}
+
+    @classmethod
+    def supports_pagination(cls) -> bool:
+        return True
+
+    def unload(self):
+        super().unload()
+        self._frames = []
--- a/docling/backend/pdf_backend.py
+++ b/docling/backend/pdf_backend.py
@@ -60,38 +60,7 @@ class PdfDocumentBackend(PaginatedDocumentBackend):
        super().__init__(in_doc, path_or_stream, options)
        self.options: PdfBackendOptions

-        if self.input_format is not InputFormat.PDF:
-            if self.input_format is InputFormat.IMAGE:
-                buf = BytesIO()
-                img = Image.open(self.path_or_stream)
-
-                # Handle multi-page TIFF images
-                if hasattr(img, "n_frames") and img.n_frames > 1:
-                    # Extract all frames from multi-page image
-                    frames = []
-                    try:
-                        for i in range(img.n_frames):
-                            img.seek(i)
-                            frame = img.copy().convert("RGB")
-                            frames.append(frame)
-                    except EOFError:
-                        pass
-
-                    # Save as multi-page PDF
-                    if frames:
-                        frames[0].save(
-                            buf, "PDF", save_all=True, append_images=frames[1:]
-                        )
-                    else:
-                        # Fallback to single page if frame extraction fails
-                        img.convert("RGB").save(buf, "PDF")
-                else:
-                    # Single page image - convert to RGB and save
-                    img.convert("RGB").save(buf, "PDF")
-
-                buf.seek(0)
-                self.path_or_stream = buf
-            elif self.input_format not in self.supported_formats():
+        if self.input_format not in self.supported_formats():
            raise RuntimeError(
                f"Incompatible file format {self.input_format} was passed to a PdfDocumentBackend. Valid format are {','.join(self.supported_formats())}."
            )
@@ -106,7 +75,7 @@ class PdfDocumentBackend(PaginatedDocumentBackend):

    @classmethod
    def supported_formats(cls) -> Set[InputFormat]:
-        return {InputFormat.PDF, InputFormat.IMAGE}
+        return {InputFormat.PDF}

    @classmethod
    def supports_pagination(cls) -> bool:
--- a/docling/cli/main.py
+++ b/docling/cli/main.py
@@ -26,6 +26,7 @@ from rich.console import Console
 from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
 from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
 from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
+from docling.backend.image_backend import ImageDocumentBackend
 from docling.backend.mets_gbs_backend import MetsGbsDocumentBackend
 from docling.backend.pdf_backend import PdfDocumentBackend
 from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
@@ -698,9 +699,16 @@ def convert(  # noqa: C901
            if artifacts_path is not None:
                simple_format_option.artifacts_path = artifacts_path

+            # Use image-native backend for IMAGE to avoid pypdfium2 locking
+            image_format_option = PdfFormatOption(
+                pipeline_options=pipeline_options,
+                backend=ImageDocumentBackend,
+                backend_options=pdf_backend_options,
+            )
+
            format_options = {
                InputFormat.PDF: pdf_format_option,
-                InputFormat.IMAGE: pdf_format_option,
+                InputFormat.IMAGE: image_format_option,
                InputFormat.METS_GBS: mets_gbs_format_option,
                InputFormat.DOCX: WordFormatOption(
                    pipeline_options=simple_format_option
--- a/docling/datamodel/base_models.py
+++ b/docling/datamodel/base_models.py
@@ -164,6 +164,7 @@ class DoclingComponentType(str, Enum):
    MODEL = "model"
    DOC_ASSEMBLER = "doc_assembler"
    USER_INPUT = "user_input"
+    PIPELINE = "pipeline"


 class VlmStopReason(str, Enum):
--- a/docling/document_converter.py
+++ b/docling/document_converter.py
@@ -3,6 +3,7 @@ import logging
 import sys
 import threading
 import time
+import warnings
 from collections.abc import Iterable, Iterator
 from concurrent.futures import ThreadPoolExecutor
 from datetime import datetime
@@ -21,6 +22,7 @@ from docling.backend.asciidoc_backend import AsciiDocBackend
 from docling.backend.csv_backend import CsvDocumentBackend
 from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
 from docling.backend.html_backend import HTMLDocumentBackend
+from docling.backend.image_backend import ImageDocumentBackend
 from docling.backend.json.docling_json_backend import DoclingJSONBackend
 from docling.backend.md_backend import MarkdownDocumentBackend
 from docling.backend.mets_gbs_backend import MetsGbsDocumentBackend
@@ -129,7 +131,7 @@ class XMLJatsFormatOption(FormatOption):

 class ImageFormatOption(FormatOption):
    pipeline_cls: Type = StandardPdfPipeline
-    backend: Type[AbstractDocumentBackend] = DoclingParseV4DocumentBackend
+    backend: Type[AbstractDocumentBackend] = ImageDocumentBackend


 class PdfFormatOption(FormatOption):
@@ -184,10 +186,35 @@ class DocumentConverter:
        self.allowed_formats = (
            allowed_formats if allowed_formats is not None else list(InputFormat)
        )
+
+        # Normalize format options: ensure IMAGE format uses ImageDocumentBackend
+        # for backward compatibility (old code might use PdfFormatOption or other backends for images)
+        normalized_format_options: dict[InputFormat, FormatOption] = {}
+        if format_options:
+            for format, option in format_options.items():
+                if (
+                    format == InputFormat.IMAGE
+                    and option.backend is not ImageDocumentBackend
+                ):
+                    warnings.warn(
+                        f"Using {option.backend.__name__} for InputFormat.IMAGE is deprecated. "
+                        "Images should use ImageDocumentBackend via ImageFormatOption. "
+                        "Automatically correcting the backend, please update your code to avoid this warning.",
+                        DeprecationWarning,
+                        stacklevel=2,
+                    )
+                    # Convert to ImageFormatOption while preserving pipeline and backend options
+                    normalized_format_options[format] = ImageFormatOption(
+                        pipeline_options=option.pipeline_options,
+                        backend_options=option.backend_options,
+                    )
+                else:
+                    normalized_format_options[format] = option
+
        self.format_to_options: dict[InputFormat, FormatOption] = {
            format: (
                _get_default_option(format=format)
-                if (custom_option := (format_options or {}).get(format)) is None
+                if (custom_option := normalized_format_options.get(format)) is None
                else custom_option
            )
            for format in self.allowed_formats
@@ -263,8 +290,12 @@ class DocumentConverter:
                ConversionStatus.SUCCESS,
                ConversionStatus.PARTIAL_SUCCESS,
            }:
+                error_details = ""
+                if conv_res.errors:
+                    error_messages = [err.error_message for err in conv_res.errors]
+                    error_details = f" Errors: {'; '.join(error_messages)}"
                raise ConversionError(
-                    f"Conversion failed for: {conv_res.input.file} with status: {conv_res.status}"
+                    f"Conversion failed for: {conv_res.input.file} with status: {conv_res.status}.{error_details}"
                )
            else:
                yield conv_res
--- a/docling/document_extractor.py
+++ b/docling/document_extractor.py
@@ -14,6 +14,7 @@ from pydantic import ConfigDict, model_validator, validate_call
 from typing_extensions import Self

 from docling.backend.abstract_backend import AbstractDocumentBackend
+from docling.backend.image_backend import ImageDocumentBackend
 from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
 from docling.datamodel.base_models import (
    BaseFormatOption,
@@ -72,7 +73,7 @@ def _get_default_extraction_option(fmt: InputFormat) -> ExtractionFormatOption:
    the format registry between convert/extract.
    """
    format_to_default_backend: dict[InputFormat, Type[AbstractDocumentBackend]] = {
-        InputFormat.IMAGE: PyPdfiumDocumentBackend,
+        InputFormat.IMAGE: ImageDocumentBackend,
        InputFormat.PDF: PyPdfiumDocumentBackend,
    }

--- a/docling/pipeline/base_pipeline.py
+++ b/docling/pipeline/base_pipeline.py
@@ -76,8 +76,15 @@ class BasePipeline(ABC):
                conv_res.status = self._determine_status(conv_res)
        except Exception as e:
            conv_res.status = ConversionStatus.FAILURE
-            if raises_on_error:
-                raise e
+            if not raises_on_error:
+                error_item = ErrorItem(
+                    component_type=DoclingComponentType.PIPELINE,
+                    module_name=self.__class__.__name__,
+                    error_message=str(e),
+                )
+                conv_res.errors.append(error_item)
+            else:
+                raise RuntimeError(f"Pipeline {self.__class__.__name__} failed") from e
        finally:
            self._unload(conv_res)

--- a/docling/pipeline/standard_pdf_pipeline.py
+++ b/docling/pipeline/standard_pdf_pipeline.py
@@ -30,7 +30,13 @@ from docling_core.types.doc import DocItem, ImageRef, PictureItem, TableItem

 from docling.backend.abstract_backend import AbstractDocumentBackend
 from docling.backend.pdf_backend import PdfDocumentBackend
-from docling.datamodel.base_models import AssembledUnit, ConversionStatus, Page
+from docling.datamodel.base_models import (
+    AssembledUnit,
+    ConversionStatus,
+    DoclingComponentType,
+    ErrorItem,
+    Page,
+)
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import ThreadedPdfPipelineOptions
 from docling.datamodel.settings import settings
@@ -265,7 +271,9 @@ class ThreadedPipelineStage:
                        )
                    )
            except Exception as exc:
-                _log.error("Stage %s failed for run %d: %s", self.name, rid, exc)
+                _log.error(
+                    "Stage %s failed for run %d: %s", self.name, rid, exc, exc_info=True
+                )
                for it in items:
                    it.is_failed = True
                    it.error = exc
@@ -598,6 +606,16 @@ class StandardPdfPipeline(ConvertPipeline):
            if p.page_no in page_map
            or not any(fp == p.page_no for fp, _ in proc.failed_pages)
        ]
+        # Add error details from failed pages
+        for page_no, error in proc.failed_pages:
+            page_label = f"Page {page_no + 1}" if page_no >= 0 else "Unknown page"
+            error_msg = str(error) if error else ""
+            error_item = ErrorItem(
+                component_type=DoclingComponentType.PIPELINE,
+                module_name=self.__class__.__name__,
+                error_message=f"{page_label}: {error_msg}" if error_msg else page_label,
+            )
+            conv_res.errors.append(error_item)
        if proc.is_complete_failure:
            conv_res.status = ConversionStatus.FAILURE
        elif proc.is_partial_success:
--- a/tests/test_backend_image_native.py
+++ b/tests/test_backend_image_native.py
@@ -0,0 +1,218 @@
+from io import BytesIO
+from pathlib import Path
+
+import pytest
+from docling_core.types.doc import BoundingBox, CoordOrigin
+from PIL import Image
+
+from docling.backend.image_backend import ImageDocumentBackend, _ImagePageBackend
+from docling.datamodel.base_models import DocumentStream, InputFormat
+from docling.datamodel.document import InputDocument, _DocumentConversionInput
+from docling.document_converter import DocumentConverter, ImageFormatOption
+from docling.document_extractor import DocumentExtractor
+
+
+def _make_png_stream(
+    width: int = 64, height: int = 48, color=(123, 45, 67)
+) -> DocumentStream:
+    img = Image.new("RGB", (width, height), color)
+    buf = BytesIO()
+    img.save(buf, format="PNG")
+    buf.seek(0)
+    return DocumentStream(name="test.png", stream=buf)
+
+
+def _make_multipage_tiff_stream(num_pages: int = 3, size=(32, 32)) -> DocumentStream:
+    frames = [
+        Image.new("RGB", size, (i * 10 % 255, i * 20 % 255, i * 30 % 255))
+        for i in range(num_pages)
+    ]
+    buf = BytesIO()
+    frames[0].save(buf, format="TIFF", save_all=True, append_images=frames[1:])
+    buf.seek(0)
+    return DocumentStream(name="test.tiff", stream=buf)
+
+
+def test_docs_builder_uses_image_backend_for_image_stream():
+    stream = _make_png_stream()
+    conv_input = _DocumentConversionInput(path_or_stream_iterator=[stream])
+    # Provide format options mapping that includes IMAGE -> ImageFormatOption (which carries ImageDocumentBackend)
+    format_options = {InputFormat.IMAGE: ImageFormatOption()}
+
+    docs = list(conv_input.docs(format_options))
+    assert len(docs) == 1
+    in_doc = docs[0]
+    assert in_doc.format == InputFormat.IMAGE
+    assert isinstance(in_doc._backend, ImageDocumentBackend)
+    assert in_doc.page_count == 1
+
+
+def test_docs_builder_multipage_tiff_counts_frames():
+    stream = _make_multipage_tiff_stream(num_pages=4)
+    conv_input = _DocumentConversionInput(path_or_stream_iterator=[stream])
+    format_options = {InputFormat.IMAGE: ImageFormatOption()}
+
+    in_doc = next(conv_input.docs(format_options))
+    assert isinstance(in_doc._backend, ImageDocumentBackend)
+    assert in_doc.page_count == 4
+
+
+def test_converter_default_maps_image_to_image_backend():
+    converter = DocumentConverter(allowed_formats=[InputFormat.IMAGE])
+    backend_cls = converter.format_to_options[InputFormat.IMAGE].backend
+    assert backend_cls is ImageDocumentBackend
+
+
+def test_extractor_default_maps_image_to_image_backend():
+    extractor = DocumentExtractor(allowed_formats=[InputFormat.IMAGE])
+    backend_cls = extractor.extraction_format_to_options[InputFormat.IMAGE].backend
+    assert backend_cls is ImageDocumentBackend
+
+
+def _get_backend_from_stream(stream: DocumentStream):
+    """Helper to create InputDocument with ImageDocumentBackend from a stream."""
+    in_doc = InputDocument(
+        path_or_stream=stream.stream,
+        format=InputFormat.IMAGE,
+        backend=ImageDocumentBackend,
+        filename=stream.name,
+    )
+    return in_doc._backend
+
+
+def test_num_pages_single():
+    """Test page count for single-page image."""
+    stream = _make_png_stream(width=100, height=80)
+    doc_backend = _get_backend_from_stream(stream)
+    assert doc_backend.page_count() == 1
+
+
+def test_num_pages_multipage():
+    """Test page count for multi-page TIFF."""
+    stream = _make_multipage_tiff_stream(num_pages=5, size=(64, 64))
+    doc_backend = _get_backend_from_stream(stream)
+    assert doc_backend.page_count() == 5
+
+
+def test_get_size():
+    """Test getting page size."""
+    width, height = 120, 90
+    stream = _make_png_stream(width=width, height=height)
+    doc_backend = _get_backend_from_stream(stream)
+    page_backend: _ImagePageBackend = doc_backend.load_page(0)
+    size = page_backend.get_size()
+    assert size.width == width
+    assert size.height == height
+
+
+def test_get_page_image_full():
+    """Test getting full page image."""
+    width, height = 100, 80
+    stream = _make_png_stream(width=width, height=height)
+    doc_backend = _get_backend_from_stream(stream)
+    page_backend: _ImagePageBackend = doc_backend.load_page(0)
+    img = page_backend.get_page_image()
+    assert img.width == width
+    assert img.height == height
+
+
+def test_get_page_image_scaled():
+    """Test getting scaled page image."""
+    width, height = 100, 80
+    scale = 2.0
+    stream = _make_png_stream(width=width, height=height)
+    doc_backend = _get_backend_from_stream(stream)
+    page_backend: _ImagePageBackend = doc_backend.load_page(0)
+    img = page_backend.get_page_image(scale=scale)
+    assert img.width == round(width * scale)
+    assert img.height == round(height * scale)
+
+
+def test_crop_page_image():
+    """Test cropping page image."""
+    width, height = 200, 150
+    stream = _make_png_stream(width=width, height=height)
+    doc_backend = _get_backend_from_stream(stream)
+    page_backend: _ImagePageBackend = doc_backend.load_page(0)
+
+    # Crop a region from the center
+    cropbox = BoundingBox(l=50, t=30, r=150, b=120, coord_origin=CoordOrigin.TOPLEFT)
+    img = page_backend.get_page_image(cropbox=cropbox)
+    assert img.width == 100  # 150 - 50
+    assert img.height == 90  # 120 - 30
+
+
+def test_crop_page_image_scaled():
+    """Test cropping and scaling page image."""
+    width, height = 200, 150
+    scale = 0.5
+    stream = _make_png_stream(width=width, height=height)
+    doc_backend = _get_backend_from_stream(stream)
+    page_backend: _ImagePageBackend = doc_backend.load_page(0)
+
+    cropbox = BoundingBox(l=50, t=30, r=150, b=120, coord_origin=CoordOrigin.TOPLEFT)
+    img = page_backend.get_page_image(scale=scale, cropbox=cropbox)
+    assert img.width == round(100 * scale)  # cropped width * scale
+    assert img.height == round(90 * scale)  # cropped height * scale
+
+
+def test_get_bitmap_rects():
+    """Test getting bitmap rects - should return full page rectangle."""
+    width, height = 100, 80
+    stream = _make_png_stream(width=width, height=height)
+    doc_backend = _get_backend_from_stream(stream)
+    page_backend: _ImagePageBackend = doc_backend.load_page(0)
+
+    rects = list(page_backend.get_bitmap_rects())
+    assert len(rects) == 1
+    bbox = rects[0]
+    assert bbox.l == 0.0
+    assert bbox.t == 0.0
+    assert bbox.r == float(width)
+    assert bbox.b == float(height)
+    assert bbox.coord_origin == CoordOrigin.TOPLEFT
+
+
+def test_get_bitmap_rects_scaled():
+    """Test getting bitmap rects with scaling."""
+    width, height = 100, 80
+    scale = 2.0
+    stream = _make_png_stream(width=width, height=height)
+    doc_backend = _get_backend_from_stream(stream)
+    page_backend: _ImagePageBackend = doc_backend.load_page(0)
+
+    rects = list(page_backend.get_bitmap_rects(scale=scale))
+    assert len(rects) == 1
+    bbox = rects[0]
+    assert bbox.l == 0.0
+    assert bbox.t == 0.0
+    assert bbox.r == float(width * scale)
+    assert bbox.b == float(height * scale)
+    assert bbox.coord_origin == CoordOrigin.TOPLEFT
+
+
+def test_get_text_in_rect():
+    """Test that get_text_in_rect returns empty string for images (no OCR)."""
+    stream = _make_png_stream()
+    doc_backend = _get_backend_from_stream(stream)
+    page_backend: _ImagePageBackend = doc_backend.load_page(0)
+
+    bbox = BoundingBox(l=10, t=10, r=50, b=50, coord_origin=CoordOrigin.TOPLEFT)
+    text = page_backend.get_text_in_rect(bbox)
+    assert text == ""
+
+
+def test_multipage_access():
+    """Test accessing different pages in multi-page image."""
+    num_pages = 4
+    stream = _make_multipage_tiff_stream(num_pages=num_pages, size=(64, 64))
+    doc_backend = _get_backend_from_stream(stream)
+    assert doc_backend.page_count() == num_pages
+
+    # Access each page
+    for i in range(num_pages):
+        page_backend = doc_backend.load_page(i)
+        assert page_backend.is_valid()
+        size = page_backend.get_size()
+        assert size.width == 64
+        assert size.height == 64
--- a/tests/test_backend_mets_gbs.py
+++ b/tests/test_backend_mets_gbs.py
@@ -15,7 +15,7 @@ def test_doc_path():
 def _get_backend(pdf_doc):
    in_doc = InputDocument(
        path_or_stream=pdf_doc,
-        format=InputFormat.PDF,
+        format=InputFormat.METS_GBS,
        backend=MetsGbsDocumentBackend,
    )

--- a/tests/test_backend_webp.py
+++ b/tests/test_backend_webp.py
@@ -2,6 +2,8 @@ import sys
 from pathlib import Path
 from typing import List

+from pydantic.type_adapter import R
+
 from docling.datamodel.base_models import InputFormat
 from docling.datamodel.document import ConversionResult, DoclingDocument
 from docling.datamodel.pipeline_options import (
@@ -72,7 +74,9 @@ def test_e2e_webp_conversions():
        for webp_path in webp_paths:
            print(f"converting {webp_path}")

-            doc_result: ConversionResult = converter.convert(webp_path)
+            doc_result: ConversionResult = converter.convert(
+                webp_path, raises_on_error=True
+            )

            verify_conversion_result_v2(
                input_path=webp_path,
--- a/tests/test_input_doc.py
+++ b/tests/test_input_doc.py
@@ -4,9 +4,6 @@ from pathlib import Path
 import pytest
 from pydantic import ValidationError

-from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
-from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
-from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
 from docling.backend.html_backend import HTMLDocumentBackend
 from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
 from docling.datamodel.backend_options import (
@@ -17,7 +14,7 @@ from docling.datamodel.backend_options import (
 from docling.datamodel.base_models import DocumentStream, InputFormat
 from docling.datamodel.document import InputDocument, _DocumentConversionInput
 from docling.datamodel.settings import DocumentLimits
-from docling.document_converter import PdfFormatOption
+from docling.document_converter import ImageFormatOption, PdfFormatOption


 def test_in_doc_from_valid_path():
@@ -51,36 +48,6 @@ def test_in_doc_from_invalid_buf():
    assert doc.valid is False


-def test_image_in_pdf_backend():
-    in_doc = InputDocument(
-        path_or_stream=Path("tests/data/2305.03393v1-pg9-img.png"),
-        format=InputFormat.IMAGE,
-        backend=PyPdfiumDocumentBackend,
-    )
-
-    assert in_doc.valid
-    in_doc = InputDocument(
-        path_or_stream=Path("tests/data/2305.03393v1-pg9-img.png"),
-        format=InputFormat.IMAGE,
-        backend=DoclingParseDocumentBackend,
-    )
-    assert in_doc.valid
-
-    in_doc = InputDocument(
-        path_or_stream=Path("tests/data/2305.03393v1-pg9-img.png"),
-        format=InputFormat.IMAGE,
-        backend=DoclingParseV2DocumentBackend,
-    )
-    assert in_doc.valid
-
-    in_doc = InputDocument(
-        path_or_stream=Path("tests/data/2305.03393v1-pg9-img.png"),
-        format=InputFormat.IMAGE,
-        backend=DoclingParseV4DocumentBackend,
-    )
-    assert in_doc.valid
-
-
 def test_in_doc_with_page_range():
    test_doc_path = Path("./tests/data/pdf/2206.01062.pdf")
    limits = DocumentLimits()
@@ -297,7 +264,7 @@ def test_tiff_two_pages():
    doc = InputDocument(
        path_or_stream=tiff_path,
        format=InputFormat.IMAGE,
-        backend=PdfFormatOption().backend,  # use default backend
+        backend=ImageFormatOption().backend,  # use default backend
    )
    assert doc.valid is True
    assert doc.page_count == 2