mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 04:24:45 +00:00
fix generation of images and adapt examples
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
parent
75feef259d
commit
cd8e3dce76
@ -437,25 +437,6 @@ class ConversionResult(BaseModel):
|
|||||||
|
|
||||||
return ds_doc
|
return ds_doc
|
||||||
|
|
||||||
def render_element_images(
|
|
||||||
self, element_types: Tuple[Type[PageElement]] = (FigureElement,)
|
|
||||||
):
|
|
||||||
for element in self.assembled.elements:
|
|
||||||
if isinstance(element, element_types):
|
|
||||||
page_ix = element.page_no
|
|
||||||
page = self.pages[page_ix]
|
|
||||||
|
|
||||||
assert page.size is not None
|
|
||||||
|
|
||||||
scale = page._default_image_scale
|
|
||||||
crop_bbox = element.cluster.bbox.scaled(scale=scale).to_top_left_origin(
|
|
||||||
page_height=page.size.height * scale
|
|
||||||
)
|
|
||||||
page_img = page.image
|
|
||||||
if page_img is not None:
|
|
||||||
cropped_im = page_img.crop(crop_bbox.as_tuple())
|
|
||||||
yield element, cropped_im
|
|
||||||
|
|
||||||
|
|
||||||
class _DocumentConversionInput(BaseModel):
|
class _DocumentConversionInput(BaseModel):
|
||||||
|
|
||||||
|
@ -152,8 +152,8 @@ class StandardPdfPipeline(PaginatedPipeline):
|
|||||||
if self.pipeline_options.generate_page_images:
|
if self.pipeline_options.generate_page_images:
|
||||||
for page in conv_res.pages:
|
for page in conv_res.pages:
|
||||||
assert page.image is not None
|
assert page.image is not None
|
||||||
page_ix = page.page_no - 1
|
page_no = page.page_no + 1
|
||||||
conv_res.document.pages[page_ix].image = ImageRef.from_pil(
|
conv_res.document.pages[page_no].image = ImageRef.from_pil(
|
||||||
page.image, dpi=int(72 * self.pipeline_options.images_scale)
|
page.image, dpi=int(72 * self.pipeline_options.images_scale)
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -174,17 +174,17 @@ class StandardPdfPipeline(PaginatedPipeline):
|
|||||||
and self.pipeline_options.generate_table_images
|
and self.pipeline_options.generate_table_images
|
||||||
):
|
):
|
||||||
page_ix = element.prov[0].page_no - 1
|
page_ix = element.prov[0].page_no - 1
|
||||||
|
page = conv_res.pages[page_ix]
|
||||||
|
assert page.size is not None
|
||||||
|
assert page.image is not None
|
||||||
|
|
||||||
crop_bbox = (
|
crop_bbox = (
|
||||||
element.prov[0]
|
element.prov[0]
|
||||||
.bbox.scaled(scale=scale)
|
.bbox.scaled(scale=scale)
|
||||||
.to_top_left_origin(
|
.to_top_left_origin(page_height=page.size.height * scale)
|
||||||
page_height=conv_res.pages[page_ix].size.height * scale
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
cropped_im = conv_res.pages[page_ix].image.crop(
|
cropped_im = page.image.crop(crop_bbox.as_tuple())
|
||||||
crop_bbox.as_tuple()
|
|
||||||
)
|
|
||||||
element.image = ImageRef.from_pil(cropped_im, dpi=int(72 * scale))
|
element.image = ImageRef.from_pil(cropped_im, dpi=int(72 * scale))
|
||||||
|
|
||||||
return conv_res
|
return conv_res
|
||||||
|
@ -34,7 +34,7 @@ class ExamplePictureClassifierEnrichmentModel(BaseEnrichmentModel):
|
|||||||
assert isinstance(element, PictureItem)
|
assert isinstance(element, PictureItem)
|
||||||
|
|
||||||
# uncomment this to interactively visualize the image
|
# uncomment this to interactively visualize the image
|
||||||
element.image.pil_image.show()
|
# element.image.pil_image.show()
|
||||||
|
|
||||||
element.data.classification = PictureClassificationData(
|
element.data.classification = PictureClassificationData(
|
||||||
provenance="example_classifier-0.0.1",
|
provenance="example_classifier-0.0.1",
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
import logging
|
import logging
|
||||||
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import time
|
from docling_core.types.doc.document import PictureItem, TableItem
|
||||||
|
|
||||||
from docling.datamodel.base_models import FigureElement, InputFormat, Table
|
from docling.datamodel.base_models import FigureElement, InputFormat, Table
|
||||||
from docling.datamodel.pipeline_options import PdfPipelineOptions
|
from docling.datamodel.pipeline_options import PdfPipelineOptions
|
||||||
@ -20,10 +21,15 @@ def main():
|
|||||||
|
|
||||||
# Important: For operating with page images, we must keep them, otherwise the DocumentConverter
|
# Important: For operating with page images, we must keep them, otherwise the DocumentConverter
|
||||||
# will destroy them for cleaning up memory.
|
# will destroy them for cleaning up memory.
|
||||||
# This is done by setting AssembleOptions.images_scale, which also defines the scale of images.
|
# This is done by setting PdfPipelineOptions.images_scale, which also defines the scale of images.
|
||||||
# scale=1 correspond of a standard 72 DPI image
|
# scale=1 correspond of a standard 72 DPI image
|
||||||
|
# The PdfPipelineOptions.generate_* are the selectors for the document elements which will be enriched
|
||||||
|
# with the image field
|
||||||
pipeline_options = PdfPipelineOptions()
|
pipeline_options = PdfPipelineOptions()
|
||||||
pipeline_options.images_scale = IMAGE_RESOLUTION_SCALE
|
pipeline_options.images_scale = IMAGE_RESOLUTION_SCALE
|
||||||
|
pipeline_options.generate_page_images = True
|
||||||
|
pipeline_options.generate_table_images = True
|
||||||
|
pipeline_options.generate_picture_images = True
|
||||||
|
|
||||||
doc_converter = DocumentConverter(
|
doc_converter = DocumentConverter(
|
||||||
format_options={
|
format_options={
|
||||||
@ -38,20 +44,32 @@ def main():
|
|||||||
output_dir.mkdir(parents=True, exist_ok=True)
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
doc_filename = conv_res.input.file.stem
|
doc_filename = conv_res.input.file.stem
|
||||||
|
|
||||||
# Export page images
|
# Save page images
|
||||||
for page in conv_res.pages:
|
for page_no, page in conv_res.document.pages.items():
|
||||||
page_no = page.page_no + 1
|
page_no = page.page_no
|
||||||
page_image_filename = output_dir / f"{doc_filename}-{page_no}.png"
|
page_image_filename = output_dir / f"{doc_filename}-{page_no}.png"
|
||||||
with page_image_filename.open("wb") as fp:
|
with page_image_filename.open("wb") as fp:
|
||||||
page.image.save(fp, format="PNG")
|
page.image.pil_image.save(fp, format="PNG")
|
||||||
|
|
||||||
# Export figures and tables
|
# Save images of figures and tables
|
||||||
for element, image in conv_res.render_element_images(
|
table_counter = 0
|
||||||
element_types=(FigureElement, Table)
|
picture_counter = 0
|
||||||
):
|
for element, _level in conv_res.document.iterate_items():
|
||||||
element_image_filename = output_dir / f"{doc_filename}-element-{element.id}.png"
|
if isinstance(element, TableItem):
|
||||||
with element_image_filename.open("wb") as fp:
|
table_counter += 1
|
||||||
image.save(fp, "PNG")
|
element_image_filename = (
|
||||||
|
output_dir / f"{doc_filename}-table-{table_counter}.png"
|
||||||
|
)
|
||||||
|
with element_image_filename.open("wb") as fp:
|
||||||
|
element.image.pil_image.save(fp, "PNG")
|
||||||
|
|
||||||
|
if isinstance(element, PictureItem):
|
||||||
|
picture_counter += 1
|
||||||
|
element_image_filename = (
|
||||||
|
output_dir / f"{doc_filename}-picture-{picture_counter}.png"
|
||||||
|
)
|
||||||
|
with element_image_filename.open("wb") as fp:
|
||||||
|
element.image.pil_image.save(fp, "PNG")
|
||||||
|
|
||||||
end_time = time.time() - start_time
|
end_time = time.time() - start_time
|
||||||
|
|
||||||
|
@ -28,6 +28,7 @@ def main():
|
|||||||
# scale=1 correspond of a standard 72 DPI image
|
# scale=1 correspond of a standard 72 DPI image
|
||||||
pipeline_options = PdfPipelineOptions()
|
pipeline_options = PdfPipelineOptions()
|
||||||
pipeline_options.images_scale = IMAGE_RESOLUTION_SCALE
|
pipeline_options.images_scale = IMAGE_RESOLUTION_SCALE
|
||||||
|
pipeline_options.generate_page_images = True
|
||||||
|
|
||||||
doc_converter = DocumentConverter(
|
doc_converter = DocumentConverter(
|
||||||
format_options={
|
format_options={
|
||||||
|
Loading…
Reference in New Issue
Block a user