use PdfDocumentBackend

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi 2025-07-25 15:19:53 +02:00
parent 46b904e059
commit 9da610e95b
2 changed files with 5 additions and 5 deletions

View File

@ -22,7 +22,7 @@ from PIL import Image
from PIL.Image import Image as PILImage from PIL.Image import Image as PILImage
from docling.backend.abstract_backend import PaginatedDocumentBackend from docling.backend.abstract_backend import PaginatedDocumentBackend
from docling.backend.pdf_backend import PdfPageBackend from docling.backend.pdf_backend import PdfDocumentBackend, PdfPageBackend
from docling.datamodel.base_models import InputFormat from docling.datamodel.base_models import InputFormat
if TYPE_CHECKING: if TYPE_CHECKING:
@ -194,7 +194,7 @@ def _extract_confidence(title_str) -> float:
return 1 return 1
class MetsGbsDocumentBackend(PaginatedDocumentBackend): class MetsGbsDocumentBackend(PdfDocumentBackend):
def __init__(self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path]): def __init__(self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path]):
super().__init__(in_doc, path_or_stream) super().__init__(in_doc, path_or_stream)

View File

@ -84,9 +84,9 @@ class PdfDocumentBackend(PaginatedDocumentBackend):
buf.seek(0) buf.seek(0)
self.path_or_stream = buf self.path_or_stream = buf
else: elif self.input_format not in self.supported_formats():
raise RuntimeError( raise RuntimeError(
f"Incompatible file format {self.input_format} was passed to a PdfDocumentBackend." f"Incompatible file format {self.input_format} was passed to a PdfDocumentBackend. Valid format are {','.join(self.supported_formats())}."
) )
@abstractmethod @abstractmethod
@ -99,7 +99,7 @@ class PdfDocumentBackend(PaginatedDocumentBackend):
@classmethod @classmethod
def supported_formats(cls) -> Set[InputFormat]: def supported_formats(cls) -> Set[InputFormat]:
return {InputFormat.PDF} return {InputFormat.PDF, InputFormat.IMAGE}
@classmethod @classmethod
def supports_pagination(cls) -> bool: def supports_pagination(cls) -> bool: