From aa6d1bae7cd3d9d20544084636681fb82f47c4b2 Mon Sep 17 00:00:00 2001 From: Rowan Skewes Date: Thu, 27 Mar 2025 17:08:55 +1100 Subject: [PATCH] fix: Implement PictureDescriptionApiOptions.picture_area_threshold Signed-off-by: Rowan Skewes --- docling/datamodel/pipeline_options.py | 4 ++-- docling/models/picture_description_base_model.py | 16 ++++++++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py index 654e04df..c8f9ccfe 100644 --- a/docling/datamodel/pipeline_options.py +++ b/docling/datamodel/pipeline_options.py @@ -213,8 +213,8 @@ class PictureDescriptionBaseOptions(BaseOptions): batch_size: int = 8 scale: float = 2 - bitmap_area_threshold: float = ( - 0.2 # percentage of the area for a bitmap to processed with the models + picture_area_threshold: float = ( + 0.05 # percentage of the area for a picture to processed with the models ) diff --git a/docling/models/picture_description_base_model.py b/docling/models/picture_description_base_model.py index 129387b3..96169227 100644 --- a/docling/models/picture_description_base_model.py +++ b/docling/models/picture_description_base_model.py @@ -63,8 +63,20 @@ class PictureDescriptionBaseModel( elements: List[PictureItem] = [] for el in element_batch: assert isinstance(el.item, PictureItem) - elements.append(el.item) - images.append(el.image) + describe_image = True + # Don't describe the image if it's smaller than the threshold + if len(el.item.prov) > 0: + prov = el.item.prov[0] # PictureItems have at most a single provenance + page = doc.pages.get(prov.page_no) + if page is not None: + page_area = page.size.width * page.size.height + if page_area > 0: + area_fraction = prov.bbox.area() / page_area + if area_fraction < self.options.picture_area_threshold: + describe_image = False + if describe_image: + elements.append(el.item) + images.append(el.image) outputs = self._annotate_images(images)