From 05f51b30d964ddaffaa399548dc066fbffbad6a4 Mon Sep 17 00:00:00 2001 From: Christoph Auer Date: Fri, 11 Jul 2025 10:26:43 +0200 Subject: [PATCH] add RGB conversion Signed-off-by: Christoph Auer --- docling/backend/pdf_backend.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/docling/backend/pdf_backend.py b/docling/backend/pdf_backend.py index cd4a07a3..1b0d612e 100644 --- a/docling/backend/pdf_backend.py +++ b/docling/backend/pdf_backend.py @@ -57,28 +57,31 @@ class PdfDocumentBackend(PaginatedDocumentBackend): if self.input_format is InputFormat.IMAGE: buf = BytesIO() img = Image.open(self.path_or_stream) - + # Handle multi-page TIFF images - if hasattr(img, 'n_frames') and img.n_frames > 1: + if hasattr(img, "n_frames") and img.n_frames > 1: # Extract all frames from multi-page image frames = [] try: for i in range(img.n_frames): img.seek(i) - frames.append(img.copy()) + frame = img.copy().convert("RGB") + frames.append(frame) except EOFError: pass - + # Save as multi-page PDF if frames: - frames[0].save(buf, "PDF", save_all=True, append_images=frames[1:]) + frames[0].save( + buf, "PDF", save_all=True, append_images=frames[1:] + ) else: # Fallback to single page if frame extraction fails - img.save(buf, "PDF") + img.convert("RGB").save(buf, "PDF") else: - # Single page image - use existing behavior - img.save(buf, "PDF") - + # Single page image - convert to RGB and save + img.convert("RGB").save(buf, "PDF") + buf.seek(0) self.path_or_stream = buf else: