mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 04:24:45 +00:00
fix generation of images and adapt examples
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
parent
75feef259d
commit
cd8e3dce76
@ -437,25 +437,6 @@ class ConversionResult(BaseModel):
|
||||
|
||||
return ds_doc
|
||||
|
||||
def render_element_images(
|
||||
self, element_types: Tuple[Type[PageElement]] = (FigureElement,)
|
||||
):
|
||||
for element in self.assembled.elements:
|
||||
if isinstance(element, element_types):
|
||||
page_ix = element.page_no
|
||||
page = self.pages[page_ix]
|
||||
|
||||
assert page.size is not None
|
||||
|
||||
scale = page._default_image_scale
|
||||
crop_bbox = element.cluster.bbox.scaled(scale=scale).to_top_left_origin(
|
||||
page_height=page.size.height * scale
|
||||
)
|
||||
page_img = page.image
|
||||
if page_img is not None:
|
||||
cropped_im = page_img.crop(crop_bbox.as_tuple())
|
||||
yield element, cropped_im
|
||||
|
||||
|
||||
class _DocumentConversionInput(BaseModel):
|
||||
|
||||
|
@ -152,8 +152,8 @@ class StandardPdfPipeline(PaginatedPipeline):
|
||||
if self.pipeline_options.generate_page_images:
|
||||
for page in conv_res.pages:
|
||||
assert page.image is not None
|
||||
page_ix = page.page_no - 1
|
||||
conv_res.document.pages[page_ix].image = ImageRef.from_pil(
|
||||
page_no = page.page_no + 1
|
||||
conv_res.document.pages[page_no].image = ImageRef.from_pil(
|
||||
page.image, dpi=int(72 * self.pipeline_options.images_scale)
|
||||
)
|
||||
|
||||
@ -174,17 +174,17 @@ class StandardPdfPipeline(PaginatedPipeline):
|
||||
and self.pipeline_options.generate_table_images
|
||||
):
|
||||
page_ix = element.prov[0].page_no - 1
|
||||
page = conv_res.pages[page_ix]
|
||||
assert page.size is not None
|
||||
assert page.image is not None
|
||||
|
||||
crop_bbox = (
|
||||
element.prov[0]
|
||||
.bbox.scaled(scale=scale)
|
||||
.to_top_left_origin(
|
||||
page_height=conv_res.pages[page_ix].size.height * scale
|
||||
)
|
||||
.to_top_left_origin(page_height=page.size.height * scale)
|
||||
)
|
||||
|
||||
cropped_im = conv_res.pages[page_ix].image.crop(
|
||||
crop_bbox.as_tuple()
|
||||
)
|
||||
cropped_im = page.image.crop(crop_bbox.as_tuple())
|
||||
element.image = ImageRef.from_pil(cropped_im, dpi=int(72 * scale))
|
||||
|
||||
return conv_res
|
||||
|
@ -34,7 +34,7 @@ class ExamplePictureClassifierEnrichmentModel(BaseEnrichmentModel):
|
||||
assert isinstance(element, PictureItem)
|
||||
|
||||
# uncomment this to interactively visualize the image
|
||||
element.image.pil_image.show()
|
||||
# element.image.pil_image.show()
|
||||
|
||||
element.data.classification = PictureClassificationData(
|
||||
provenance="example_classifier-0.0.1",
|
||||
|
@ -1,7 +1,8 @@
|
||||
import logging
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import time
|
||||
from docling_core.types.doc.document import PictureItem, TableItem
|
||||
|
||||
from docling.datamodel.base_models import FigureElement, InputFormat, Table
|
||||
from docling.datamodel.pipeline_options import PdfPipelineOptions
|
||||
@ -20,10 +21,15 @@ def main():
|
||||
|
||||
# Important: For operating with page images, we must keep them, otherwise the DocumentConverter
|
||||
# will destroy them for cleaning up memory.
|
||||
# This is done by setting AssembleOptions.images_scale, which also defines the scale of images.
|
||||
# This is done by setting PdfPipelineOptions.images_scale, which also defines the scale of images.
|
||||
# scale=1 correspond of a standard 72 DPI image
|
||||
# The PdfPipelineOptions.generate_* are the selectors for the document elements which will be enriched
|
||||
# with the image field
|
||||
pipeline_options = PdfPipelineOptions()
|
||||
pipeline_options.images_scale = IMAGE_RESOLUTION_SCALE
|
||||
pipeline_options.generate_page_images = True
|
||||
pipeline_options.generate_table_images = True
|
||||
pipeline_options.generate_picture_images = True
|
||||
|
||||
doc_converter = DocumentConverter(
|
||||
format_options={
|
||||
@ -38,20 +44,32 @@ def main():
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
doc_filename = conv_res.input.file.stem
|
||||
|
||||
# Export page images
|
||||
for page in conv_res.pages:
|
||||
page_no = page.page_no + 1
|
||||
# Save page images
|
||||
for page_no, page in conv_res.document.pages.items():
|
||||
page_no = page.page_no
|
||||
page_image_filename = output_dir / f"{doc_filename}-{page_no}.png"
|
||||
with page_image_filename.open("wb") as fp:
|
||||
page.image.save(fp, format="PNG")
|
||||
page.image.pil_image.save(fp, format="PNG")
|
||||
|
||||
# Export figures and tables
|
||||
for element, image in conv_res.render_element_images(
|
||||
element_types=(FigureElement, Table)
|
||||
):
|
||||
element_image_filename = output_dir / f"{doc_filename}-element-{element.id}.png"
|
||||
# Save images of figures and tables
|
||||
table_counter = 0
|
||||
picture_counter = 0
|
||||
for element, _level in conv_res.document.iterate_items():
|
||||
if isinstance(element, TableItem):
|
||||
table_counter += 1
|
||||
element_image_filename = (
|
||||
output_dir / f"{doc_filename}-table-{table_counter}.png"
|
||||
)
|
||||
with element_image_filename.open("wb") as fp:
|
||||
image.save(fp, "PNG")
|
||||
element.image.pil_image.save(fp, "PNG")
|
||||
|
||||
if isinstance(element, PictureItem):
|
||||
picture_counter += 1
|
||||
element_image_filename = (
|
||||
output_dir / f"{doc_filename}-picture-{picture_counter}.png"
|
||||
)
|
||||
with element_image_filename.open("wb") as fp:
|
||||
element.image.pil_image.save(fp, "PNG")
|
||||
|
||||
end_time = time.time() - start_time
|
||||
|
||||
|
@ -28,6 +28,7 @@ def main():
|
||||
# scale=1 correspond of a standard 72 DPI image
|
||||
pipeline_options = PdfPipelineOptions()
|
||||
pipeline_options.images_scale = IMAGE_RESOLUTION_SCALE
|
||||
pipeline_options.generate_page_images = True
|
||||
|
||||
doc_converter = DocumentConverter(
|
||||
format_options={
|
||||
|
Loading…
Reference in New Issue
Block a user