diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py
index a24df89d..1d35227c 100644
--- a/docling/datamodel/pipeline_options.py
+++ b/docling/datamodel/pipeline_options.py
@@ -225,6 +225,7 @@ class PictureDescriptionApiOptions(PictureDescriptionBaseOptions):
     headers: Dict[str, str] = {}
     params: Dict[str, Any] = {}
     timeout: float = 20
+    concurrency: int = 1
 
     prompt: str = "Describe this image in a few sentences."
     provenance: str = ""
@@ -295,6 +296,7 @@ class ApiVlmOptions(BaseVlmOptions):
     params: Dict[str, Any] = {}
     scale: float = 2.0
     timeout: float = 60
+    concurrency: int = 1
     response_format: ResponseFormat
 
 
diff --git a/docling/models/api_vlm_model.py b/docling/models/api_vlm_model.py
index acad8a70..60bc6fce 100644
--- a/docling/models/api_vlm_model.py
+++ b/docling/models/api_vlm_model.py
@@ -28,6 +28,7 @@ class ApiVlmModel(BasePageModel):
                 )
 
             self.timeout = self.vlm_options.timeout
+            self.concurrency = self.vlm_options.concurrency
             self.prompt_content = (
                 f"This is a page from a document.\n{self.vlm_options.prompt}"
             )
@@ -37,10 +38,7 @@ class ApiVlmModel(BasePageModel):
             }
 
     def __call__(
-        self,
-        conv_res: ConversionResult,
-        page_batch: Iterable[Page],
-        concurrency: int = 1,
+        self, conv_res: ConversionResult, page_batch: Iterable[Page]
     ) -> Iterable[Page]:
         def _vlm_request(page):
             assert page._backend is not None
@@ -69,5 +67,5 @@ class ApiVlmModel(BasePageModel):
 
                 return page
 
-        with ThreadPoolExecutor(max_workers=concurrency) as executor:
+        with ThreadPoolExecutor(max_workers=self.concurrency) as executor:
             yield from executor.map(_vlm_request, page_batch)
diff --git a/docling/models/picture_description_api_model.py b/docling/models/picture_description_api_model.py
index 2debc839..eb331b29 100644
--- a/docling/models/picture_description_api_model.py
+++ b/docling/models/picture_description_api_model.py
@@ -38,6 +38,7 @@ class PictureDescriptionApiModel(PictureDescriptionBaseModel):
             accelerator_options=accelerator_options,
         )
         self.options: PictureDescriptionApiOptions
+        self.concurrency = self.options.concurrency
 
         if self.enabled:
             if not enable_remote_services:
@@ -46,9 +47,7 @@ class PictureDescriptionApiModel(PictureDescriptionBaseModel):
                     "pipeline_options.enable_remote_services=True."
                 )
 
-    def _annotate_images(
-        self, images: Iterable[Image.Image], concurrency: int = 1
-    ) -> Iterable[str]:
+    def _annotate_images(self, images: Iterable[Image.Image]) -> Iterable[str]:
         # Note: technically we could make a batch request here,
         # but not all APIs will allow for it. For example, vllm won't allow more than 1.
         def _api_request(image):
@@ -61,5 +60,5 @@ class PictureDescriptionApiModel(PictureDescriptionBaseModel):
                 **self.options.params,
             )
 
-        with ThreadPoolExecutor(max_workers=concurrency) as executor:
+        with ThreadPoolExecutor(max_workers=self.concurrency) as executor:
             yield from executor.map(_api_request, images)