diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py index a24df89d..1d35227c 100644 --- a/docling/datamodel/pipeline_options.py +++ b/docling/datamodel/pipeline_options.py @@ -225,6 +225,7 @@ class PictureDescriptionApiOptions(PictureDescriptionBaseOptions): headers: Dict[str, str] = {} params: Dict[str, Any] = {} timeout: float = 20 + concurrency: int = 1 prompt: str = "Describe this image in a few sentences." provenance: str = "" @@ -295,6 +296,7 @@ class ApiVlmOptions(BaseVlmOptions): params: Dict[str, Any] = {} scale: float = 2.0 timeout: float = 60 + concurrency: int = 1 response_format: ResponseFormat diff --git a/docling/models/api_vlm_model.py b/docling/models/api_vlm_model.py index acad8a70..60bc6fce 100644 --- a/docling/models/api_vlm_model.py +++ b/docling/models/api_vlm_model.py @@ -28,6 +28,7 @@ class ApiVlmModel(BasePageModel): ) self.timeout = self.vlm_options.timeout + self.concurrency = self.vlm_options.concurrency self.prompt_content = ( f"This is a page from a document.\n{self.vlm_options.prompt}" ) @@ -37,10 +38,7 @@ class ApiVlmModel(BasePageModel): } def __call__( - self, - conv_res: ConversionResult, - page_batch: Iterable[Page], - concurrency: int = 1, + self, conv_res: ConversionResult, page_batch: Iterable[Page] ) -> Iterable[Page]: def _vlm_request(page): assert page._backend is not None @@ -69,5 +67,5 @@ class ApiVlmModel(BasePageModel): return page - with ThreadPoolExecutor(max_workers=concurrency) as executor: + with ThreadPoolExecutor(max_workers=self.concurrency) as executor: yield from executor.map(_vlm_request, page_batch) diff --git a/docling/models/picture_description_api_model.py b/docling/models/picture_description_api_model.py index 2debc839..eb331b29 100644 --- a/docling/models/picture_description_api_model.py +++ b/docling/models/picture_description_api_model.py @@ -38,6 +38,7 @@ class PictureDescriptionApiModel(PictureDescriptionBaseModel): accelerator_options=accelerator_options, ) self.options: PictureDescriptionApiOptions + self.concurrency = self.options.concurrency if self.enabled: if not enable_remote_services: @@ -46,9 +47,7 @@ class PictureDescriptionApiModel(PictureDescriptionBaseModel): "pipeline_options.enable_remote_services=True." ) - def _annotate_images( - self, images: Iterable[Image.Image], concurrency: int = 1 - ) -> Iterable[str]: + def _annotate_images(self, images: Iterable[Image.Image]) -> Iterable[str]: # Note: technically we could make a batch request here, # but not all APIs will allow for it. For example, vllm won't allow more than 1. def _api_request(image): @@ -61,5 +60,5 @@ class PictureDescriptionApiModel(PictureDescriptionBaseModel): **self.options.params, ) - with ThreadPoolExecutor(max_workers=concurrency) as executor: + with ThreadPoolExecutor(max_workers=self.concurrency) as executor: yield from executor.map(_api_request, images)