diff --git a/docling/backend/mets_gbs_backend.py b/docling/backend/mets_gbs_backend.py index b3ca2fb2..09ac84ae 100644 --- a/docling/backend/mets_gbs_backend.py +++ b/docling/backend/mets_gbs_backend.py @@ -22,7 +22,7 @@ from PIL import Image from PIL.Image import Image as PILImage from docling.backend.abstract_backend import PaginatedDocumentBackend -from docling.backend.pdf_backend import PdfPageBackend +from docling.backend.pdf_backend import PdfDocumentBackend, PdfPageBackend from docling.datamodel.base_models import InputFormat if TYPE_CHECKING: @@ -194,7 +194,7 @@ def _extract_confidence(title_str) -> float: return 1 -class MetsGbsDocumentBackend(PaginatedDocumentBackend): +class MetsGbsDocumentBackend(PdfDocumentBackend): def __init__(self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path]): super().__init__(in_doc, path_or_stream) diff --git a/docling/backend/pdf_backend.py b/docling/backend/pdf_backend.py index 1b0d612e..e14b147a 100644 --- a/docling/backend/pdf_backend.py +++ b/docling/backend/pdf_backend.py @@ -84,9 +84,9 @@ class PdfDocumentBackend(PaginatedDocumentBackend): buf.seek(0) self.path_or_stream = buf - else: + elif self.input_format not in self.supported_formats(): raise RuntimeError( - f"Incompatible file format {self.input_format} was passed to a PdfDocumentBackend." + f"Incompatible file format {self.input_format} was passed to a PdfDocumentBackend. Valid format are {','.join(self.supported_formats())}." ) @abstractmethod @@ -99,7 +99,7 @@ class PdfDocumentBackend(PaginatedDocumentBackend): @classmethod def supported_formats(cls) -> Set[InputFormat]: - return {InputFormat.PDF} + return {InputFormat.PDF, InputFormat.IMAGE} @classmethod def supports_pagination(cls) -> bool: