mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 20:58:11 +00:00
feat: Introduce the enable_remote_services option to allow remote connections while processing (#941)
* feat: Introduce the allow_remote_services option to allow remote connections while processing Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add option in the example Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * enhance docs Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * rename to enable_remote_services Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> --------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
@@ -234,6 +234,12 @@ def convert(
|
||||
Optional[Path],
|
||||
typer.Option(..., help="If provided, the location of the model artifacts."),
|
||||
] = None,
|
||||
enable_remote_services: Annotated[
|
||||
bool,
|
||||
typer.Option(
|
||||
..., help="Must be enabled when using models connecting to remote services."
|
||||
),
|
||||
] = False,
|
||||
abort_on_error: Annotated[
|
||||
bool,
|
||||
typer.Option(
|
||||
@@ -380,6 +386,7 @@ def convert(
|
||||
|
||||
accelerator_options = AcceleratorOptions(num_threads=num_threads, device=device)
|
||||
pipeline_options = PdfPipelineOptions(
|
||||
enable_remote_services=enable_remote_services,
|
||||
accelerator_options=accelerator_options,
|
||||
do_ocr=ocr,
|
||||
ocr_options=ocr_options,
|
||||
|
||||
@@ -257,6 +257,7 @@ class PipelineOptions(BaseModel):
|
||||
)
|
||||
document_timeout: Optional[float] = None
|
||||
accelerator_options: AcceleratorOptions = AcceleratorOptions()
|
||||
enable_remote_services: bool = False
|
||||
|
||||
|
||||
class PdfPipelineOptions(PipelineOptions):
|
||||
|
||||
@@ -4,3 +4,7 @@ class BaseError(RuntimeError):
|
||||
|
||||
class ConversionError(BaseError):
|
||||
pass
|
||||
|
||||
|
||||
class OperationNotAllowed(BaseError):
|
||||
pass
|
||||
|
||||
@@ -8,6 +8,7 @@ from PIL import Image
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
|
||||
from docling.datamodel.pipeline_options import PictureDescriptionApiOptions
|
||||
from docling.exceptions import OperationNotAllowed
|
||||
from docling.models.picture_description_base_model import PictureDescriptionBaseModel
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
@@ -45,14 +46,20 @@ class ApiResponse(BaseModel):
|
||||
class PictureDescriptionApiModel(PictureDescriptionBaseModel):
|
||||
# elements_batch_size = 4
|
||||
|
||||
def __init__(self, enabled: bool, options: PictureDescriptionApiOptions):
|
||||
def __init__(
|
||||
self,
|
||||
enabled: bool,
|
||||
enable_remote_services: bool,
|
||||
options: PictureDescriptionApiOptions,
|
||||
):
|
||||
super().__init__(enabled=enabled, options=options)
|
||||
self.options: PictureDescriptionApiOptions
|
||||
|
||||
if self.enabled:
|
||||
if options.url.host != "localhost":
|
||||
raise NotImplementedError(
|
||||
"The options try to connect to remote APIs which are not yet allowed."
|
||||
if not enable_remote_services:
|
||||
raise OperationNotAllowed(
|
||||
"Connections to remote services is only allowed when set explicitly. "
|
||||
"pipeline_options.enable_remote_services=True."
|
||||
)
|
||||
|
||||
def _annotate_images(self, images: Iterable[Image.Image]) -> Iterable[str]:
|
||||
|
||||
@@ -209,6 +209,7 @@ class StandardPdfPipeline(PaginatedPipeline):
|
||||
):
|
||||
return PictureDescriptionApiModel(
|
||||
enabled=self.pipeline_options.do_picture_description,
|
||||
enable_remote_services=self.pipeline_options.enable_remote_services,
|
||||
options=self.pipeline_options.picture_description_options,
|
||||
)
|
||||
elif isinstance(
|
||||
|
||||
Reference in New Issue
Block a user