From cd8e3dce769d6deabd76ad67d4f71d7900846fd8 Mon Sep 17 00:00:00 2001 From: Michele Dolfi Date: Tue, 15 Oct 2024 17:43:47 +0200 Subject: [PATCH] fix generation of images and adapt examples Signed-off-by: Michele Dolfi --- docling/datamodel/document.py | 19 --------- docling/pipeline/standard_pdf_pipeline.py | 16 ++++---- docs/examples/develop_picture_enrichment.py | 2 +- docs/examples/export_figures.py | 44 +++++++++++++++------ docs/examples/export_multimodal.py | 1 + 5 files changed, 41 insertions(+), 41 deletions(-) diff --git a/docling/datamodel/document.py b/docling/datamodel/document.py index 1b39ff7d..239be3a6 100644 --- a/docling/datamodel/document.py +++ b/docling/datamodel/document.py @@ -437,25 +437,6 @@ class ConversionResult(BaseModel): return ds_doc - def render_element_images( - self, element_types: Tuple[Type[PageElement]] = (FigureElement,) - ): - for element in self.assembled.elements: - if isinstance(element, element_types): - page_ix = element.page_no - page = self.pages[page_ix] - - assert page.size is not None - - scale = page._default_image_scale - crop_bbox = element.cluster.bbox.scaled(scale=scale).to_top_left_origin( - page_height=page.size.height * scale - ) - page_img = page.image - if page_img is not None: - cropped_im = page_img.crop(crop_bbox.as_tuple()) - yield element, cropped_im - class _DocumentConversionInput(BaseModel): diff --git a/docling/pipeline/standard_pdf_pipeline.py b/docling/pipeline/standard_pdf_pipeline.py index acfe495f..ee67e6a5 100644 --- a/docling/pipeline/standard_pdf_pipeline.py +++ b/docling/pipeline/standard_pdf_pipeline.py @@ -152,8 +152,8 @@ class StandardPdfPipeline(PaginatedPipeline): if self.pipeline_options.generate_page_images: for page in conv_res.pages: assert page.image is not None - page_ix = page.page_no - 1 - conv_res.document.pages[page_ix].image = ImageRef.from_pil( + page_no = page.page_no + 1 + conv_res.document.pages[page_no].image = ImageRef.from_pil( page.image, dpi=int(72 * self.pipeline_options.images_scale) ) @@ -174,17 +174,17 @@ class StandardPdfPipeline(PaginatedPipeline): and self.pipeline_options.generate_table_images ): page_ix = element.prov[0].page_no - 1 + page = conv_res.pages[page_ix] + assert page.size is not None + assert page.image is not None + crop_bbox = ( element.prov[0] .bbox.scaled(scale=scale) - .to_top_left_origin( - page_height=conv_res.pages[page_ix].size.height * scale - ) + .to_top_left_origin(page_height=page.size.height * scale) ) - cropped_im = conv_res.pages[page_ix].image.crop( - crop_bbox.as_tuple() - ) + cropped_im = page.image.crop(crop_bbox.as_tuple()) element.image = ImageRef.from_pil(cropped_im, dpi=int(72 * scale)) return conv_res diff --git a/docs/examples/develop_picture_enrichment.py b/docs/examples/develop_picture_enrichment.py index aeec2f32..71286320 100644 --- a/docs/examples/develop_picture_enrichment.py +++ b/docs/examples/develop_picture_enrichment.py @@ -34,7 +34,7 @@ class ExamplePictureClassifierEnrichmentModel(BaseEnrichmentModel): assert isinstance(element, PictureItem) # uncomment this to interactively visualize the image - element.image.pil_image.show() + # element.image.pil_image.show() element.data.classification = PictureClassificationData( provenance="example_classifier-0.0.1", diff --git a/docs/examples/export_figures.py b/docs/examples/export_figures.py index 90f465b4..d3ecc05c 100644 --- a/docs/examples/export_figures.py +++ b/docs/examples/export_figures.py @@ -1,7 +1,8 @@ import logging +import time from pathlib import Path -import time +from docling_core.types.doc.document import PictureItem, TableItem from docling.datamodel.base_models import FigureElement, InputFormat, Table from docling.datamodel.pipeline_options import PdfPipelineOptions @@ -20,10 +21,15 @@ def main(): # Important: For operating with page images, we must keep them, otherwise the DocumentConverter # will destroy them for cleaning up memory. - # This is done by setting AssembleOptions.images_scale, which also defines the scale of images. + # This is done by setting PdfPipelineOptions.images_scale, which also defines the scale of images. # scale=1 correspond of a standard 72 DPI image + # The PdfPipelineOptions.generate_* are the selectors for the document elements which will be enriched + # with the image field pipeline_options = PdfPipelineOptions() pipeline_options.images_scale = IMAGE_RESOLUTION_SCALE + pipeline_options.generate_page_images = True + pipeline_options.generate_table_images = True + pipeline_options.generate_picture_images = True doc_converter = DocumentConverter( format_options={ @@ -38,20 +44,32 @@ def main(): output_dir.mkdir(parents=True, exist_ok=True) doc_filename = conv_res.input.file.stem - # Export page images - for page in conv_res.pages: - page_no = page.page_no + 1 + # Save page images + for page_no, page in conv_res.document.pages.items(): + page_no = page.page_no page_image_filename = output_dir / f"{doc_filename}-{page_no}.png" with page_image_filename.open("wb") as fp: - page.image.save(fp, format="PNG") + page.image.pil_image.save(fp, format="PNG") - # Export figures and tables - for element, image in conv_res.render_element_images( - element_types=(FigureElement, Table) - ): - element_image_filename = output_dir / f"{doc_filename}-element-{element.id}.png" - with element_image_filename.open("wb") as fp: - image.save(fp, "PNG") + # Save images of figures and tables + table_counter = 0 + picture_counter = 0 + for element, _level in conv_res.document.iterate_items(): + if isinstance(element, TableItem): + table_counter += 1 + element_image_filename = ( + output_dir / f"{doc_filename}-table-{table_counter}.png" + ) + with element_image_filename.open("wb") as fp: + element.image.pil_image.save(fp, "PNG") + + if isinstance(element, PictureItem): + picture_counter += 1 + element_image_filename = ( + output_dir / f"{doc_filename}-picture-{picture_counter}.png" + ) + with element_image_filename.open("wb") as fp: + element.image.pil_image.save(fp, "PNG") end_time = time.time() - start_time diff --git a/docs/examples/export_multimodal.py b/docs/examples/export_multimodal.py index 5ead9a0a..1de714ed 100644 --- a/docs/examples/export_multimodal.py +++ b/docs/examples/export_multimodal.py @@ -28,6 +28,7 @@ def main(): # scale=1 correspond of a standard 72 DPI image pipeline_options = PdfPipelineOptions() pipeline_options.images_scale = IMAGE_RESOLUTION_SCALE + pipeline_options.generate_page_images = True doc_converter = DocumentConverter( format_options={