fix generation of images and adapt examples

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi 2024-10-15 17:43:47 +02:00
parent 75feef259d
commit cd8e3dce76
5 changed files with 41 additions and 41 deletions

View File

@ -437,25 +437,6 @@ class ConversionResult(BaseModel):
return ds_doc
def render_element_images(
self, element_types: Tuple[Type[PageElement]] = (FigureElement,)
):
for element in self.assembled.elements:
if isinstance(element, element_types):
page_ix = element.page_no
page = self.pages[page_ix]
assert page.size is not None
scale = page._default_image_scale
crop_bbox = element.cluster.bbox.scaled(scale=scale).to_top_left_origin(
page_height=page.size.height * scale
)
page_img = page.image
if page_img is not None:
cropped_im = page_img.crop(crop_bbox.as_tuple())
yield element, cropped_im
class _DocumentConversionInput(BaseModel):

View File

@ -152,8 +152,8 @@ class StandardPdfPipeline(PaginatedPipeline):
if self.pipeline_options.generate_page_images:
for page in conv_res.pages:
assert page.image is not None
page_ix = page.page_no - 1
conv_res.document.pages[page_ix].image = ImageRef.from_pil(
page_no = page.page_no + 1
conv_res.document.pages[page_no].image = ImageRef.from_pil(
page.image, dpi=int(72 * self.pipeline_options.images_scale)
)
@ -174,17 +174,17 @@ class StandardPdfPipeline(PaginatedPipeline):
and self.pipeline_options.generate_table_images
):
page_ix = element.prov[0].page_no - 1
page = conv_res.pages[page_ix]
assert page.size is not None
assert page.image is not None
crop_bbox = (
element.prov[0]
.bbox.scaled(scale=scale)
.to_top_left_origin(
page_height=conv_res.pages[page_ix].size.height * scale
)
.to_top_left_origin(page_height=page.size.height * scale)
)
cropped_im = conv_res.pages[page_ix].image.crop(
crop_bbox.as_tuple()
)
cropped_im = page.image.crop(crop_bbox.as_tuple())
element.image = ImageRef.from_pil(cropped_im, dpi=int(72 * scale))
return conv_res

View File

@ -34,7 +34,7 @@ class ExamplePictureClassifierEnrichmentModel(BaseEnrichmentModel):
assert isinstance(element, PictureItem)
# uncomment this to interactively visualize the image
element.image.pil_image.show()
# element.image.pil_image.show()
element.data.classification = PictureClassificationData(
provenance="example_classifier-0.0.1",

View File

@ -1,7 +1,8 @@
import logging
import time
from pathlib import Path
import time
from docling_core.types.doc.document import PictureItem, TableItem
from docling.datamodel.base_models import FigureElement, InputFormat, Table
from docling.datamodel.pipeline_options import PdfPipelineOptions
@ -20,10 +21,15 @@ def main():
# Important: For operating with page images, we must keep them, otherwise the DocumentConverter
# will destroy them for cleaning up memory.
# This is done by setting AssembleOptions.images_scale, which also defines the scale of images.
# This is done by setting PdfPipelineOptions.images_scale, which also defines the scale of images.
# scale=1 correspond of a standard 72 DPI image
# The PdfPipelineOptions.generate_* are the selectors for the document elements which will be enriched
# with the image field
pipeline_options = PdfPipelineOptions()
pipeline_options.images_scale = IMAGE_RESOLUTION_SCALE
pipeline_options.generate_page_images = True
pipeline_options.generate_table_images = True
pipeline_options.generate_picture_images = True
doc_converter = DocumentConverter(
format_options={
@ -38,20 +44,32 @@ def main():
output_dir.mkdir(parents=True, exist_ok=True)
doc_filename = conv_res.input.file.stem
# Export page images
for page in conv_res.pages:
page_no = page.page_no + 1
# Save page images
for page_no, page in conv_res.document.pages.items():
page_no = page.page_no
page_image_filename = output_dir / f"{doc_filename}-{page_no}.png"
with page_image_filename.open("wb") as fp:
page.image.save(fp, format="PNG")
page.image.pil_image.save(fp, format="PNG")
# Export figures and tables
for element, image in conv_res.render_element_images(
element_types=(FigureElement, Table)
):
element_image_filename = output_dir / f"{doc_filename}-element-{element.id}.png"
with element_image_filename.open("wb") as fp:
image.save(fp, "PNG")
# Save images of figures and tables
table_counter = 0
picture_counter = 0
for element, _level in conv_res.document.iterate_items():
if isinstance(element, TableItem):
table_counter += 1
element_image_filename = (
output_dir / f"{doc_filename}-table-{table_counter}.png"
)
with element_image_filename.open("wb") as fp:
element.image.pil_image.save(fp, "PNG")
if isinstance(element, PictureItem):
picture_counter += 1
element_image_filename = (
output_dir / f"{doc_filename}-picture-{picture_counter}.png"
)
with element_image_filename.open("wb") as fp:
element.image.pil_image.save(fp, "PNG")
end_time = time.time() - start_time

View File

@ -28,6 +28,7 @@ def main():
# scale=1 correspond of a standard 72 DPI image
pipeline_options = PdfPipelineOptions()
pipeline_options.images_scale = IMAGE_RESOLUTION_SCALE
pipeline_options.generate_page_images = True
doc_converter = DocumentConverter(
format_options={