feat: add the Image backend (#2627)

* feat: add the Image backend Signed-off-by: Peter Staar <taa@zurich.ibm.com> * fixed the pre-commit Signed-off-by: Peter Staar <taa@zurich.ibm.com> * Fixed single- versus multi-frame image formats Signed-off-by: Peter Staar <taa@zurich.ibm.com> * fix: Proper usage of ImageDocumentBackend in the pipeline, deprecate old code. Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * fix: Adapt tests Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * fix: correct mets_gbs backend test Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * fix: Make ImagePageBackend.get_bitmap_rects() yield Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: Peter Staar <taa@zurich.ibm.com> Signed-off-by: Christoph Auer <cau@zurich.ibm.com> Co-authored-by: Christoph Auer <cau@zurich.ibm.com>
2025-12-08 12:48:28 +00:00 · 2025-11-17 11:37:22 +01:00
parent ae30373ee7
commit 3495b73de8
12 changed files with 494 additions and 82 deletions
--- a/docling/backend/image_backend.py
+++ b/docling/backend/image_backend.py
@@ -0,0 +1,188 @@
 import logging
 from io import BytesIO
 from pathlib import Path
 from typing import Iterable, List, Optional, Union
 from docling_core.types.doc import BoundingBox, CoordOrigin
 from docling_core.types.doc.page import (
    BoundingRectangle,
    PdfPageBoundaryType,
    PdfPageGeometry,
    SegmentedPdfPage,
    TextCell,
 )
 from PIL import Image
 from docling.backend.abstract_backend import AbstractDocumentBackend
 from docling.backend.pdf_backend import PdfDocumentBackend, PdfPageBackend
 from docling.datamodel.backend_options import PdfBackendOptions
 from docling.datamodel.base_models import InputFormat, Size
 from docling.datamodel.document import InputDocument
 _log = logging.getLogger(__name__)
 class _ImagePageBackend(PdfPageBackend):
    def __init__(self, image: Image.Image):
        self._image: Optional[Image.Image] = image
        self.valid: bool = self._image is not None
    def is_valid(self) -> bool:
        return self.valid
    def get_text_in_rect(self, bbox: BoundingBox) -> str:
        # No text extraction from raw images without OCR
        return ""
    def get_segmented_page(self) -> SegmentedPdfPage:
        # Return empty segmented page with proper dimensions for raw images
        assert self._image is not None
        page_size = self.get_size()
        bbox = BoundingBox(
            l=0.0,
            t=0.0,
            r=float(page_size.width),
            b=float(page_size.height),
            coord_origin=CoordOrigin.BOTTOMLEFT,
        )
        dimension = PdfPageGeometry(
            angle=0.0,
            rect=BoundingRectangle.from_bounding_box(bbox),
            boundary_type=PdfPageBoundaryType.CROP_BOX,
            art_bbox=bbox,
            bleed_bbox=bbox,
            crop_bbox=bbox,
            media_bbox=bbox,
            trim_bbox=bbox,
        )
        return SegmentedPdfPage(
            dimension=dimension,
            char_cells=[],
            word_cells=[],
            textline_cells=[],
            has_chars=False,
            has_words=False,
            has_lines=False,
        )
    def get_text_cells(self) -> Iterable[TextCell]:
        # No text cells on raw images
        return []
    def get_bitmap_rects(self, scale: float = 1) -> Iterable[BoundingBox]:
        # For raw images, the entire page is a bitmap
        assert self._image is not None
        page_size = self.get_size()
        full_page_bbox = BoundingBox(
            l=0.0,
            t=0.0,
            r=float(page_size.width),
            b=float(page_size.height),
            coord_origin=CoordOrigin.TOPLEFT,
        )
        if scale != 1:
            full_page_bbox = full_page_bbox.scaled(scale=scale)
        yield full_page_bbox
    def get_page_image(
        self, scale: float = 1, cropbox: Optional[BoundingBox] = None
    ) -> Image.Image:
        assert self._image is not None
        img = self._image
        if cropbox is not None:
            # Expected cropbox comes in TOPLEFT coords in our pipeline
            if cropbox.coord_origin != CoordOrigin.TOPLEFT:
                # Convert to TOPLEFT relative to current image height
                cropbox = cropbox.to_top_left_origin(img.height)
            left, top, right, bottom = cropbox.as_tuple()
            left = max(0, round(left))
            top = max(0, round(top))
            right = min(img.width, round(right))
            bottom = min(img.height, round(bottom))
            img = img.crop((left, top, right, bottom))
        if scale != 1:
            new_w = max(1, round(img.width * scale))
            new_h = max(1, round(img.height * scale))
            img = img.resize((new_w, new_h))
        return img
    def get_size(self) -> Size:
        assert self._image is not None
        return Size(width=self._image.width, height=self._image.height)
    def unload(self):
        # Help GC and free memory
        self._image = None
 class ImageDocumentBackend(PdfDocumentBackend):
    """Image-native backend that bypasses pypdfium2.
    Notes:
        - Subclasses PdfDocumentBackend to satisfy pipeline type checks.
        - Intentionally avoids calling PdfDocumentBackend.__init__ to skip
          the image→PDF conversion and any pypdfium2 usage.
        - Handles multi-page TIFF by extracting frames eagerly to separate
          Image objects to keep thread-safety when pages process in parallel.
    """
    def __init__(
        self,
        in_doc: InputDocument,
        path_or_stream: Union[BytesIO, Path],
        options: PdfBackendOptions = PdfBackendOptions(),
    ):
        # Bypass PdfDocumentBackend.__init__ to avoid image→PDF conversion
        AbstractDocumentBackend.__init__(self, in_doc, path_or_stream, options)
        self.options: PdfBackendOptions = options
        if self.input_format not in {InputFormat.IMAGE}:
            raise RuntimeError(
                f"Incompatible file format {self.input_format} was passed to ImageDocumentBackend."
            )
        # Load frames eagerly for thread-safety across pages
        self._frames: List[Image.Image] = []
        try:
            img = Image.open(self.path_or_stream)  # type: ignore[arg-type]
            # Handle multi-frame and single-frame images
            # - multiframe formats: TIFF, GIF, ICO
            # - singleframe formats: JPEG (.jpg, .jpeg), PNG (.png), BMP, WEBP (unless animated), HEIC
            frame_count = getattr(img, "n_frames", 1)
            if frame_count > 1:
                for i in range(frame_count):
                    img.seek(i)
                    self._frames.append(img.copy().convert("RGB"))
            else:
                self._frames.append(img.convert("RGB"))
        except Exception as e:
            raise RuntimeError(f"Could not load image for document {self.file}") from e
    def is_valid(self) -> bool:
        return len(self._frames) > 0
    def page_count(self) -> int:
        return len(self._frames)
    def load_page(self, page_no: int) -> _ImagePageBackend:
        if not (0 <= page_no < len(self._frames)):
            raise IndexError(f"Page index out of range: {page_no}")
        return _ImagePageBackend(self._frames[page_no])
    @classmethod
    def supported_formats(cls) -> set[InputFormat]:
        # Only IMAGE here; PDF handling remains in PDF-oriented backends
        return {InputFormat.IMAGE}
    @classmethod
    def supports_pagination(cls) -> bool:
        return True
    def unload(self):
        super().unload()
        self._frames = []
--- a/docling/backend/pdf_backend.py
+++ b/docling/backend/pdf_backend.py
@@ -60,38 +60,7 @@ class PdfDocumentBackend(PaginatedDocumentBackend):
        super().__init__(in_doc, path_or_stream, options)
        self.options: PdfBackendOptions
-        if self.input_format is not InputFormat.PDF:
+        if self.input_format not in self.supported_formats():
            if self.input_format is InputFormat.IMAGE:
                buf = BytesIO()
                img = Image.open(self.path_or_stream)
                # Handle multi-page TIFF images
                if hasattr(img, "n_frames") and img.n_frames > 1:
                    # Extract all frames from multi-page image
                    frames = []
                    try:
                        for i in range(img.n_frames):
                            img.seek(i)
                            frame = img.copy().convert("RGB")
                            frames.append(frame)
                    except EOFError:
                        pass
                    # Save as multi-page PDF
                    if frames:
                        frames[0].save(
                            buf, "PDF", save_all=True, append_images=frames[1:]
                        )
                    else:
                        # Fallback to single page if frame extraction fails
                        img.convert("RGB").save(buf, "PDF")
                else:
                    # Single page image - convert to RGB and save
                    img.convert("RGB").save(buf, "PDF")
                buf.seek(0)
                self.path_or_stream = buf
            elif self.input_format not in self.supported_formats():
            raise RuntimeError(
                f"Incompatible file format {self.input_format} was passed to a PdfDocumentBackend. Valid format are {','.join(self.supported_formats())}."
            )
@@ -106,7 +75,7 @@ class PdfDocumentBackend(PaginatedDocumentBackend):
    @classmethod
    def supported_formats(cls) -> Set[InputFormat]:
-        return {InputFormat.PDF, InputFormat.IMAGE}
+        return {InputFormat.PDF}
    @classmethod
    def supports_pagination(cls) -> bool:
--- a/docling/cli/main.py
+++ b/docling/cli/main.py
@@ -26,6 +26,7 @@ from rich.console import Console
 from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
 from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
 from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
 from docling.backend.image_backend import ImageDocumentBackend
 from docling.backend.mets_gbs_backend import MetsGbsDocumentBackend
 from docling.backend.pdf_backend import PdfDocumentBackend
 from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
@@ -698,9 +699,16 @@ def convert(  # noqa: C901
            if artifacts_path is not None:
                simple_format_option.artifacts_path = artifacts_path
            # Use image-native backend for IMAGE to avoid pypdfium2 locking
            image_format_option = PdfFormatOption(
                pipeline_options=pipeline_options,
                backend=ImageDocumentBackend,
                backend_options=pdf_backend_options,
            )
            format_options = {
                InputFormat.PDF: pdf_format_option,
-                InputFormat.IMAGE: pdf_format_option,
+                InputFormat.IMAGE: image_format_option,
                InputFormat.METS_GBS: mets_gbs_format_option,
                InputFormat.DOCX: WordFormatOption(
                    pipeline_options=simple_format_option
--- a/docling/datamodel/base_models.py
+++ b/docling/datamodel/base_models.py
@@ -164,6 +164,7 @@ class DoclingComponentType(str, Enum):
    MODEL = "model"
    DOC_ASSEMBLER = "doc_assembler"
    USER_INPUT = "user_input"
    PIPELINE = "pipeline"
 class VlmStopReason(str, Enum):
--- a/docling/document_converter.py
+++ b/docling/document_converter.py
@@ -3,6 +3,7 @@ import logging
 import sys
 import threading
 import time
 import warnings
 from collections.abc import Iterable, Iterator
 from concurrent.futures import ThreadPoolExecutor
 from datetime import datetime
@@ -21,6 +22,7 @@ from docling.backend.asciidoc_backend import AsciiDocBackend
 from docling.backend.csv_backend import CsvDocumentBackend
 from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
 from docling.backend.html_backend import HTMLDocumentBackend
 from docling.backend.image_backend import ImageDocumentBackend
 from docling.backend.json.docling_json_backend import DoclingJSONBackend
 from docling.backend.md_backend import MarkdownDocumentBackend
 from docling.backend.mets_gbs_backend import MetsGbsDocumentBackend
@@ -129,7 +131,7 @@ class XMLJatsFormatOption(FormatOption):
 class ImageFormatOption(FormatOption):
    pipeline_cls: Type = StandardPdfPipeline
-    backend: Type[AbstractDocumentBackend] = DoclingParseV4DocumentBackend
+    backend: Type[AbstractDocumentBackend] = ImageDocumentBackend
 class PdfFormatOption(FormatOption):
@@ -184,10 +186,35 @@ class DocumentConverter:
        self.allowed_formats = (
            allowed_formats if allowed_formats is not None else list(InputFormat)
        )
        # Normalize format options: ensure IMAGE format uses ImageDocumentBackend
        # for backward compatibility (old code might use PdfFormatOption or other backends for images)
        normalized_format_options: dict[InputFormat, FormatOption] = {}
        if format_options:
            for format, option in format_options.items():
                if (
                    format == InputFormat.IMAGE
                    and option.backend is not ImageDocumentBackend
                ):
                    warnings.warn(
                        f"Using {option.backend.__name__} for InputFormat.IMAGE is deprecated. "
                        "Images should use ImageDocumentBackend via ImageFormatOption. "
                        "Automatically correcting the backend, please update your code to avoid this warning.",
                        DeprecationWarning,
                        stacklevel=2,
                    )
                    # Convert to ImageFormatOption while preserving pipeline and backend options
                    normalized_format_options[format] = ImageFormatOption(
                        pipeline_options=option.pipeline_options,
                        backend_options=option.backend_options,
                    )
                else:
                    normalized_format_options[format] = option
        self.format_to_options: dict[InputFormat, FormatOption] = {
            format: (
                _get_default_option(format=format)
-                if (custom_option := (format_options or {}).get(format)) is None
+                if (custom_option := normalized_format_options.get(format)) is None
                else custom_option
            )
            for format in self.allowed_formats
@@ -263,8 +290,12 @@ class DocumentConverter:
                ConversionStatus.SUCCESS,
                ConversionStatus.PARTIAL_SUCCESS,
            }:
                error_details = ""
                if conv_res.errors:
                    error_messages = [err.error_message for err in conv_res.errors]
                    error_details = f" Errors: {'; '.join(error_messages)}"
                raise ConversionError(
-                    f"Conversion failed for: {conv_res.input.file} with status: {conv_res.status}"
+                    f"Conversion failed for: {conv_res.input.file} with status: {conv_res.status}.{error_details}"
                )
            else:
                yield conv_res
--- a/docling/document_extractor.py
+++ b/docling/document_extractor.py
@@ -14,6 +14,7 @@ from pydantic import ConfigDict, model_validator, validate_call
 from typing_extensions import Self
 from docling.backend.abstract_backend import AbstractDocumentBackend
 from docling.backend.image_backend import ImageDocumentBackend
 from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
 from docling.datamodel.base_models import (
    BaseFormatOption,
@@ -72,7 +73,7 @@ def _get_default_extraction_option(fmt: InputFormat) -> ExtractionFormatOption:
    the format registry between convert/extract.
    """
    format_to_default_backend: dict[InputFormat, Type[AbstractDocumentBackend]] = {
-        InputFormat.IMAGE: PyPdfiumDocumentBackend,
+        InputFormat.IMAGE: ImageDocumentBackend,
        InputFormat.PDF: PyPdfiumDocumentBackend,
    }
--- a/docling/pipeline/base_pipeline.py
+++ b/docling/pipeline/base_pipeline.py
@@ -76,8 +76,15 @@ class BasePipeline(ABC):
                conv_res.status = self._determine_status(conv_res)
        except Exception as e:
            conv_res.status = ConversionStatus.FAILURE
-            if raises_on_error:
+            if not raises_on_error:
-                raise e
+                error_item = ErrorItem(
                    component_type=DoclingComponentType.PIPELINE,
                    module_name=self.__class__.__name__,
                    error_message=str(e),
                )
                conv_res.errors.append(error_item)
            else:
                raise RuntimeError(f"Pipeline {self.__class__.__name__} failed") from e
        finally:
            self._unload(conv_res)
--- a/docling/pipeline/standard_pdf_pipeline.py
+++ b/docling/pipeline/standard_pdf_pipeline.py
@@ -30,7 +30,13 @@ from docling_core.types.doc import DocItem, ImageRef, PictureItem, TableItem
 from docling.backend.abstract_backend import AbstractDocumentBackend
 from docling.backend.pdf_backend import PdfDocumentBackend
-from docling.datamodel.base_models import AssembledUnit, ConversionStatus, Page
+from docling.datamodel.base_models import (
    AssembledUnit,
    ConversionStatus,
    DoclingComponentType,
    ErrorItem,
    Page,
 )
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import ThreadedPdfPipelineOptions
 from docling.datamodel.settings import settings
@@ -265,7 +271,9 @@ class ThreadedPipelineStage:
                        )
                    )
            except Exception as exc:
-                _log.error("Stage %s failed for run %d: %s", self.name, rid, exc)
+                _log.error(
                    "Stage %s failed for run %d: %s", self.name, rid, exc, exc_info=True
                )
                for it in items:
                    it.is_failed = True
                    it.error = exc
@@ -598,6 +606,16 @@ class StandardPdfPipeline(ConvertPipeline):
            if p.page_no in page_map
            or not any(fp == p.page_no for fp, _ in proc.failed_pages)
        ]
        # Add error details from failed pages
        for page_no, error in proc.failed_pages:
            page_label = f"Page {page_no + 1}" if page_no >= 0 else "Unknown page"
            error_msg = str(error) if error else ""
            error_item = ErrorItem(
                component_type=DoclingComponentType.PIPELINE,
                module_name=self.__class__.__name__,
                error_message=f"{page_label}: {error_msg}" if error_msg else page_label,
            )
            conv_res.errors.append(error_item)
        if proc.is_complete_failure:
            conv_res.status = ConversionStatus.FAILURE
        elif proc.is_partial_success:
--- a/tests/test_backend_image_native.py
+++ b/tests/test_backend_image_native.py
@@ -0,0 +1,218 @@
 from io import BytesIO
 from pathlib import Path
 import pytest
 from docling_core.types.doc import BoundingBox, CoordOrigin
 from PIL import Image
 from docling.backend.image_backend import ImageDocumentBackend, _ImagePageBackend
 from docling.datamodel.base_models import DocumentStream, InputFormat
 from docling.datamodel.document import InputDocument, _DocumentConversionInput
 from docling.document_converter import DocumentConverter, ImageFormatOption
 from docling.document_extractor import DocumentExtractor
 def _make_png_stream(
    width: int = 64, height: int = 48, color=(123, 45, 67)
 ) -> DocumentStream:
    img = Image.new("RGB", (width, height), color)
    buf = BytesIO()
    img.save(buf, format="PNG")
    buf.seek(0)
    return DocumentStream(name="test.png", stream=buf)
 def _make_multipage_tiff_stream(num_pages: int = 3, size=(32, 32)) -> DocumentStream:
    frames = [
        Image.new("RGB", size, (i * 10 % 255, i * 20 % 255, i * 30 % 255))
        for i in range(num_pages)
    ]
    buf = BytesIO()
    frames[0].save(buf, format="TIFF", save_all=True, append_images=frames[1:])
    buf.seek(0)
    return DocumentStream(name="test.tiff", stream=buf)
 def test_docs_builder_uses_image_backend_for_image_stream():
    stream = _make_png_stream()
    conv_input = _DocumentConversionInput(path_or_stream_iterator=[stream])
    # Provide format options mapping that includes IMAGE -> ImageFormatOption (which carries ImageDocumentBackend)
    format_options = {InputFormat.IMAGE: ImageFormatOption()}
    docs = list(conv_input.docs(format_options))
    assert len(docs) == 1
    in_doc = docs[0]
    assert in_doc.format == InputFormat.IMAGE
    assert isinstance(in_doc._backend, ImageDocumentBackend)
    assert in_doc.page_count == 1
 def test_docs_builder_multipage_tiff_counts_frames():
    stream = _make_multipage_tiff_stream(num_pages=4)
    conv_input = _DocumentConversionInput(path_or_stream_iterator=[stream])
    format_options = {InputFormat.IMAGE: ImageFormatOption()}
    in_doc = next(conv_input.docs(format_options))
    assert isinstance(in_doc._backend, ImageDocumentBackend)
    assert in_doc.page_count == 4
 def test_converter_default_maps_image_to_image_backend():
    converter = DocumentConverter(allowed_formats=[InputFormat.IMAGE])
    backend_cls = converter.format_to_options[InputFormat.IMAGE].backend
    assert backend_cls is ImageDocumentBackend
 def test_extractor_default_maps_image_to_image_backend():
    extractor = DocumentExtractor(allowed_formats=[InputFormat.IMAGE])
    backend_cls = extractor.extraction_format_to_options[InputFormat.IMAGE].backend
    assert backend_cls is ImageDocumentBackend
 def _get_backend_from_stream(stream: DocumentStream):
    """Helper to create InputDocument with ImageDocumentBackend from a stream."""
    in_doc = InputDocument(
        path_or_stream=stream.stream,
        format=InputFormat.IMAGE,
        backend=ImageDocumentBackend,
        filename=stream.name,
    )
    return in_doc._backend
 def test_num_pages_single():
    """Test page count for single-page image."""
    stream = _make_png_stream(width=100, height=80)
    doc_backend = _get_backend_from_stream(stream)
    assert doc_backend.page_count() == 1
 def test_num_pages_multipage():
    """Test page count for multi-page TIFF."""
    stream = _make_multipage_tiff_stream(num_pages=5, size=(64, 64))
    doc_backend = _get_backend_from_stream(stream)
    assert doc_backend.page_count() == 5
 def test_get_size():
    """Test getting page size."""
    width, height = 120, 90
    stream = _make_png_stream(width=width, height=height)
    doc_backend = _get_backend_from_stream(stream)
    page_backend: _ImagePageBackend = doc_backend.load_page(0)
    size = page_backend.get_size()
    assert size.width == width
    assert size.height == height
 def test_get_page_image_full():
    """Test getting full page image."""
    width, height = 100, 80
    stream = _make_png_stream(width=width, height=height)
    doc_backend = _get_backend_from_stream(stream)
    page_backend: _ImagePageBackend = doc_backend.load_page(0)
    img = page_backend.get_page_image()
    assert img.width == width
    assert img.height == height
 def test_get_page_image_scaled():
    """Test getting scaled page image."""
    width, height = 100, 80
    scale = 2.0
    stream = _make_png_stream(width=width, height=height)
    doc_backend = _get_backend_from_stream(stream)
    page_backend: _ImagePageBackend = doc_backend.load_page(0)
    img = page_backend.get_page_image(scale=scale)
    assert img.width == round(width * scale)
    assert img.height == round(height * scale)
 def test_crop_page_image():
    """Test cropping page image."""
    width, height = 200, 150
    stream = _make_png_stream(width=width, height=height)
    doc_backend = _get_backend_from_stream(stream)
    page_backend: _ImagePageBackend = doc_backend.load_page(0)
    # Crop a region from the center
    cropbox = BoundingBox(l=50, t=30, r=150, b=120, coord_origin=CoordOrigin.TOPLEFT)
    img = page_backend.get_page_image(cropbox=cropbox)
    assert img.width == 100  # 150 - 50
    assert img.height == 90  # 120 - 30
 def test_crop_page_image_scaled():
    """Test cropping and scaling page image."""
    width, height = 200, 150
    scale = 0.5
    stream = _make_png_stream(width=width, height=height)
    doc_backend = _get_backend_from_stream(stream)
    page_backend: _ImagePageBackend = doc_backend.load_page(0)
    cropbox = BoundingBox(l=50, t=30, r=150, b=120, coord_origin=CoordOrigin.TOPLEFT)
    img = page_backend.get_page_image(scale=scale, cropbox=cropbox)
    assert img.width == round(100 * scale)  # cropped width * scale
    assert img.height == round(90 * scale)  # cropped height * scale
 def test_get_bitmap_rects():
    """Test getting bitmap rects - should return full page rectangle."""
    width, height = 100, 80
    stream = _make_png_stream(width=width, height=height)
    doc_backend = _get_backend_from_stream(stream)
    page_backend: _ImagePageBackend = doc_backend.load_page(0)
    rects = list(page_backend.get_bitmap_rects())
    assert len(rects) == 1
    bbox = rects[0]
    assert bbox.l == 0.0
    assert bbox.t == 0.0
    assert bbox.r == float(width)
    assert bbox.b == float(height)
    assert bbox.coord_origin == CoordOrigin.TOPLEFT
 def test_get_bitmap_rects_scaled():
    """Test getting bitmap rects with scaling."""
    width, height = 100, 80
    scale = 2.0
    stream = _make_png_stream(width=width, height=height)
    doc_backend = _get_backend_from_stream(stream)
    page_backend: _ImagePageBackend = doc_backend.load_page(0)
    rects = list(page_backend.get_bitmap_rects(scale=scale))
    assert len(rects) == 1
    bbox = rects[0]
    assert bbox.l == 0.0
    assert bbox.t == 0.0
    assert bbox.r == float(width * scale)
    assert bbox.b == float(height * scale)
    assert bbox.coord_origin == CoordOrigin.TOPLEFT
 def test_get_text_in_rect():
    """Test that get_text_in_rect returns empty string for images (no OCR)."""
    stream = _make_png_stream()
    doc_backend = _get_backend_from_stream(stream)
    page_backend: _ImagePageBackend = doc_backend.load_page(0)
    bbox = BoundingBox(l=10, t=10, r=50, b=50, coord_origin=CoordOrigin.TOPLEFT)
    text = page_backend.get_text_in_rect(bbox)
    assert text == ""
 def test_multipage_access():
    """Test accessing different pages in multi-page image."""
    num_pages = 4
    stream = _make_multipage_tiff_stream(num_pages=num_pages, size=(64, 64))
    doc_backend = _get_backend_from_stream(stream)
    assert doc_backend.page_count() == num_pages
    # Access each page
    for i in range(num_pages):
        page_backend = doc_backend.load_page(i)
        assert page_backend.is_valid()
        size = page_backend.get_size()
        assert size.width == 64
        assert size.height == 64
--- a/tests/test_backend_mets_gbs.py
+++ b/tests/test_backend_mets_gbs.py
@@ -15,7 +15,7 @@ def test_doc_path():
 def _get_backend(pdf_doc):
    in_doc = InputDocument(
        path_or_stream=pdf_doc,
-        format=InputFormat.PDF,
+        format=InputFormat.METS_GBS,
        backend=MetsGbsDocumentBackend,
    )
--- a/tests/test_backend_webp.py
+++ b/tests/test_backend_webp.py
@@ -2,6 +2,8 @@ import sys
 from pathlib import Path
 from typing import List
 from pydantic.type_adapter import R
 from docling.datamodel.base_models import InputFormat
 from docling.datamodel.document import ConversionResult, DoclingDocument
 from docling.datamodel.pipeline_options import (
@@ -72,7 +74,9 @@ def test_e2e_webp_conversions():
        for webp_path in webp_paths:
            print(f"converting {webp_path}")
-            doc_result: ConversionResult = converter.convert(webp_path)
+            doc_result: ConversionResult = converter.convert(
                webp_path, raises_on_error=True
            )
            verify_conversion_result_v2(
                input_path=webp_path,
--- a/tests/test_input_doc.py
+++ b/tests/test_input_doc.py
@@ -4,9 +4,6 @@ from pathlib import Path
 import pytest
 from pydantic import ValidationError
 from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
 from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
 from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
 from docling.backend.html_backend import HTMLDocumentBackend
 from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
 from docling.datamodel.backend_options import (
@@ -17,7 +14,7 @@ from docling.datamodel.backend_options import (
 from docling.datamodel.base_models import DocumentStream, InputFormat
 from docling.datamodel.document import InputDocument, _DocumentConversionInput
 from docling.datamodel.settings import DocumentLimits
-from docling.document_converter import PdfFormatOption
+from docling.document_converter import ImageFormatOption, PdfFormatOption
 def test_in_doc_from_valid_path():
@@ -51,36 +48,6 @@ def test_in_doc_from_invalid_buf():
    assert doc.valid is False
 def test_image_in_pdf_backend():
    in_doc = InputDocument(
        path_or_stream=Path("tests/data/2305.03393v1-pg9-img.png"),
        format=InputFormat.IMAGE,
        backend=PyPdfiumDocumentBackend,
    )
    assert in_doc.valid
    in_doc = InputDocument(
        path_or_stream=Path("tests/data/2305.03393v1-pg9-img.png"),
        format=InputFormat.IMAGE,
        backend=DoclingParseDocumentBackend,
    )
    assert in_doc.valid
    in_doc = InputDocument(
        path_or_stream=Path("tests/data/2305.03393v1-pg9-img.png"),
        format=InputFormat.IMAGE,
        backend=DoclingParseV2DocumentBackend,
    )
    assert in_doc.valid
    in_doc = InputDocument(
        path_or_stream=Path("tests/data/2305.03393v1-pg9-img.png"),
        format=InputFormat.IMAGE,
        backend=DoclingParseV4DocumentBackend,
    )
    assert in_doc.valid
 def test_in_doc_with_page_range():
    test_doc_path = Path("./tests/data/pdf/2206.01062.pdf")
    limits = DocumentLimits()
@@ -297,7 +264,7 @@ def test_tiff_two_pages():
    doc = InputDocument(
        path_or_stream=tiff_path,
        format=InputFormat.IMAGE,
-        backend=PdfFormatOption().backend,  # use default backend
+        backend=ImageFormatOption().backend,  # use default backend
    )
    assert doc.valid is True
    assert doc.page_count == 2