diff --git a/docling/cli/main.py b/docling/cli/main.py index 79cc04c6..32d57325 100644 --- a/docling/cli/main.py +++ b/docling/cli/main.py @@ -165,7 +165,9 @@ def convert( None, "--to", help="Specify output formats. Defaults to Markdown." ), headers: str = typer.Option( - None, "--headers", help="Specify http request headers used when fetching url input sources in the form of a JSON string" + None, + "--headers", + help="Specify http request headers used when fetching url input sources in the form of a JSON string", ), image_export_mode: Annotated[ ImageRefMode, @@ -265,7 +267,7 @@ def convert( num_threads: Annotated[int, typer.Option(..., help="Number of threads")] = 4, device: Annotated[ AcceleratorDevice, typer.Option(..., help="Accelerator device") - ] = AcceleratorDevice.AUTO + ] = AcceleratorDevice.AUTO, ): if verbose == 0: logging.basicConfig(level=logging.WARNING) @@ -282,15 +284,18 @@ def convert( if from_formats is None: from_formats = [e for e in InputFormat] + parsed_headers: Optional[Dict[str, str]] = None if headers is not None: - headers = json.loads(headers) + parsed_headers = json.loads(headers) with tempfile.TemporaryDirectory() as tempdir: input_doc_paths: List[Path] = [] for src in input_sources: try: # check if we can fetch some remote url - source = resolve_source_to_path(source=src, headers=headers, workdir=Path(tempdir)) + source = resolve_source_to_path( + source=src, headers=parsed_headers, workdir=Path(tempdir) + ) input_doc_paths.append(source) except FileNotFoundError: err_console.print( @@ -396,7 +401,7 @@ def convert( start_time = time.time() conv_results = doc_converter.convert_all( - input_doc_paths, headers=headers, raises_on_error=abort_on_error + input_doc_paths, headers=parsed_headers, raises_on_error=abort_on_error ) output.mkdir(parents=True, exist_ok=True) diff --git a/docling/datamodel/document.py b/docling/datamodel/document.py index 64b30467..136428e8 100644 --- a/docling/datamodel/document.py +++ b/docling/datamodel/document.py @@ -234,7 +234,11 @@ class _DocumentConversionInput(BaseModel): self, format_options: Dict[InputFormat, "FormatOption"] ) -> Iterable[InputDocument]: for item in self.path_or_stream_iterator: - obj = resolve_source_to_stream(item, self.headers) if isinstance(item, str) else item + obj = ( + resolve_source_to_stream(item, self.headers) + if isinstance(item, str) + else item + ) format = self._guess_format(obj) backend: Type[AbstractDocumentBackend] if format not in format_options.keys(): diff --git a/docling/document_converter.py b/docling/document_converter.py index 569f058b..cb073949 100644 --- a/docling/document_converter.py +++ b/docling/document_converter.py @@ -186,7 +186,7 @@ class DocumentConverter: raises_on_error=raises_on_error, max_num_pages=max_num_pages, max_file_size=max_file_size, - headers=headers + headers=headers, ) return next(all_res) @@ -204,9 +204,7 @@ class DocumentConverter: max_file_size=max_file_size, ) conv_input = _DocumentConversionInput( - path_or_stream_iterator=source, - limits=limits, - headers=headers + path_or_stream_iterator=source, limits=limits, headers=headers ) conv_res_iter = self._convert(conv_input, raises_on_error=raises_on_error)