feat: added support for exporting DocItem to an image when page image is available (#379)

* Updated minimum docling-core version to 2.4.0

Signed-off-by: Shubham Gupta <26436285+sh-gupta@users.noreply.github.com>

* Deprecated the generate_table_images option

Signed-off-by: Shubham Gupta <26436285+sh-gupta@users.noreply.github.com>

* Updated examples to use get_image instead of element.image

Signed-off-by: Shubham Gupta <26436285+sh-gupta@users.noreply.github.com>

---------

Signed-off-by: Shubham Gupta <26436285+sh-gupta@users.noreply.github.com>
This commit is contained in:
Shubham Gupta
2024-11-19 16:28:52 +01:00
committed by GitHub
parent 911c3bda27
commit 3f91e7d3f1
5 changed files with 31 additions and 8 deletions

View File

@@ -39,7 +39,7 @@ class ExamplePictureClassifierEnrichmentModel(BaseEnrichmentModel):
assert isinstance(element, PictureItem)
# uncomment this to interactively visualize the image
# element.image.pil_image.show()
# element.get_image(doc).show()
element.annotations.append(
PictureClassificationData(

View File

@@ -28,7 +28,6 @@ def main():
pipeline_options = PdfPipelineOptions()
pipeline_options.images_scale = IMAGE_RESOLUTION_SCALE
pipeline_options.generate_page_images = True
pipeline_options.generate_table_images = True
pipeline_options.generate_picture_images = True
doc_converter = DocumentConverter(
@@ -61,7 +60,7 @@ def main():
output_dir / f"{doc_filename}-table-{table_counter}.png"
)
with element_image_filename.open("wb") as fp:
element.image.pil_image.save(fp, "PNG")
element.get_image(conv_res.document).save(fp, "PNG")
if isinstance(element, PictureItem):
picture_counter += 1
@@ -69,7 +68,7 @@ def main():
output_dir / f"{doc_filename}-picture-{picture_counter}.png"
)
with element_image_filename.open("wb") as fp:
element.image.pil_image.save(fp, "PNG")
element.get_image(conv_res.document).save(fp, "PNG")
# Save markdown with embedded pictures
content_md = conv_res.document.export_to_markdown(image_mode=ImageRefMode.EMBEDDED)