fixed formatting and typing issues

Signed-off-by: Luke Harrison <Luke.Harrison1@ibm.com>
This commit is contained in:
Luke Harrison 2024-12-21 06:48:35 -05:00
parent ea4e92527d
commit f3d9c3bfc9
No known key found for this signature in database
GPG Key ID: 012ADEEE52EF0444
3 changed files with 17 additions and 10 deletions

View File

@ -165,7 +165,9 @@ def convert(
None, "--to", help="Specify output formats. Defaults to Markdown." None, "--to", help="Specify output formats. Defaults to Markdown."
), ),
headers: str = typer.Option( headers: str = typer.Option(
None, "--headers", help="Specify http request headers used when fetching url input sources in the form of a JSON string" None,
"--headers",
help="Specify http request headers used when fetching url input sources in the form of a JSON string",
), ),
image_export_mode: Annotated[ image_export_mode: Annotated[
ImageRefMode, ImageRefMode,
@ -265,7 +267,7 @@ def convert(
num_threads: Annotated[int, typer.Option(..., help="Number of threads")] = 4, num_threads: Annotated[int, typer.Option(..., help="Number of threads")] = 4,
device: Annotated[ device: Annotated[
AcceleratorDevice, typer.Option(..., help="Accelerator device") AcceleratorDevice, typer.Option(..., help="Accelerator device")
] = AcceleratorDevice.AUTO ] = AcceleratorDevice.AUTO,
): ):
if verbose == 0: if verbose == 0:
logging.basicConfig(level=logging.WARNING) logging.basicConfig(level=logging.WARNING)
@ -282,15 +284,18 @@ def convert(
if from_formats is None: if from_formats is None:
from_formats = [e for e in InputFormat] from_formats = [e for e in InputFormat]
parsed_headers: Optional[Dict[str, str]] = None
if headers is not None: if headers is not None:
headers = json.loads(headers) parsed_headers = json.loads(headers)
with tempfile.TemporaryDirectory() as tempdir: with tempfile.TemporaryDirectory() as tempdir:
input_doc_paths: List[Path] = [] input_doc_paths: List[Path] = []
for src in input_sources: for src in input_sources:
try: try:
# check if we can fetch some remote url # check if we can fetch some remote url
source = resolve_source_to_path(source=src, headers=headers, workdir=Path(tempdir)) source = resolve_source_to_path(
source=src, headers=parsed_headers, workdir=Path(tempdir)
)
input_doc_paths.append(source) input_doc_paths.append(source)
except FileNotFoundError: except FileNotFoundError:
err_console.print( err_console.print(
@ -396,7 +401,7 @@ def convert(
start_time = time.time() start_time = time.time()
conv_results = doc_converter.convert_all( conv_results = doc_converter.convert_all(
input_doc_paths, headers=headers, raises_on_error=abort_on_error input_doc_paths, headers=parsed_headers, raises_on_error=abort_on_error
) )
output.mkdir(parents=True, exist_ok=True) output.mkdir(parents=True, exist_ok=True)

View File

@ -234,7 +234,11 @@ class _DocumentConversionInput(BaseModel):
self, format_options: Dict[InputFormat, "FormatOption"] self, format_options: Dict[InputFormat, "FormatOption"]
) -> Iterable[InputDocument]: ) -> Iterable[InputDocument]:
for item in self.path_or_stream_iterator: for item in self.path_or_stream_iterator:
obj = resolve_source_to_stream(item, self.headers) if isinstance(item, str) else item obj = (
resolve_source_to_stream(item, self.headers)
if isinstance(item, str)
else item
)
format = self._guess_format(obj) format = self._guess_format(obj)
backend: Type[AbstractDocumentBackend] backend: Type[AbstractDocumentBackend]
if format not in format_options.keys(): if format not in format_options.keys():

View File

@ -186,7 +186,7 @@ class DocumentConverter:
raises_on_error=raises_on_error, raises_on_error=raises_on_error,
max_num_pages=max_num_pages, max_num_pages=max_num_pages,
max_file_size=max_file_size, max_file_size=max_file_size,
headers=headers headers=headers,
) )
return next(all_res) return next(all_res)
@ -204,9 +204,7 @@ class DocumentConverter:
max_file_size=max_file_size, max_file_size=max_file_size,
) )
conv_input = _DocumentConversionInput( conv_input = _DocumentConversionInput(
path_or_stream_iterator=source, path_or_stream_iterator=source, limits=limits, headers=headers
limits=limits,
headers=headers
) )
conv_res_iter = self._convert(conv_input, raises_on_error=raises_on_error) conv_res_iter = self._convert(conv_input, raises_on_error=raises_on_error)