mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 20:14:47 +00:00
use PdfDocumentBackend
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
parent
46b904e059
commit
9da610e95b
@ -22,7 +22,7 @@ from PIL import Image
|
||||
from PIL.Image import Image as PILImage
|
||||
|
||||
from docling.backend.abstract_backend import PaginatedDocumentBackend
|
||||
from docling.backend.pdf_backend import PdfPageBackend
|
||||
from docling.backend.pdf_backend import PdfDocumentBackend, PdfPageBackend
|
||||
from docling.datamodel.base_models import InputFormat
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@ -194,7 +194,7 @@ def _extract_confidence(title_str) -> float:
|
||||
return 1
|
||||
|
||||
|
||||
class MetsGbsDocumentBackend(PaginatedDocumentBackend):
|
||||
class MetsGbsDocumentBackend(PdfDocumentBackend):
|
||||
def __init__(self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path]):
|
||||
super().__init__(in_doc, path_or_stream)
|
||||
|
||||
|
@ -84,9 +84,9 @@ class PdfDocumentBackend(PaginatedDocumentBackend):
|
||||
|
||||
buf.seek(0)
|
||||
self.path_or_stream = buf
|
||||
else:
|
||||
elif self.input_format not in self.supported_formats():
|
||||
raise RuntimeError(
|
||||
f"Incompatible file format {self.input_format} was passed to a PdfDocumentBackend."
|
||||
f"Incompatible file format {self.input_format} was passed to a PdfDocumentBackend. Valid format are {','.join(self.supported_formats())}."
|
||||
)
|
||||
|
||||
@abstractmethod
|
||||
@ -99,7 +99,7 @@ class PdfDocumentBackend(PaginatedDocumentBackend):
|
||||
|
||||
@classmethod
|
||||
def supported_formats(cls) -> Set[InputFormat]:
|
||||
return {InputFormat.PDF}
|
||||
return {InputFormat.PDF, InputFormat.IMAGE}
|
||||
|
||||
@classmethod
|
||||
def supports_pagination(cls) -> bool:
|
||||
|
Loading…
Reference in New Issue
Block a user