mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-01 15:02:21 +00:00
fixed formatting and typing issues
Signed-off-by: Luke Harrison <Luke.Harrison1@ibm.com>
This commit is contained in:
parent
ea4e92527d
commit
f3d9c3bfc9
@ -165,7 +165,9 @@ def convert(
|
||||
None, "--to", help="Specify output formats. Defaults to Markdown."
|
||||
),
|
||||
headers: str = typer.Option(
|
||||
None, "--headers", help="Specify http request headers used when fetching url input sources in the form of a JSON string"
|
||||
None,
|
||||
"--headers",
|
||||
help="Specify http request headers used when fetching url input sources in the form of a JSON string",
|
||||
),
|
||||
image_export_mode: Annotated[
|
||||
ImageRefMode,
|
||||
@ -265,7 +267,7 @@ def convert(
|
||||
num_threads: Annotated[int, typer.Option(..., help="Number of threads")] = 4,
|
||||
device: Annotated[
|
||||
AcceleratorDevice, typer.Option(..., help="Accelerator device")
|
||||
] = AcceleratorDevice.AUTO
|
||||
] = AcceleratorDevice.AUTO,
|
||||
):
|
||||
if verbose == 0:
|
||||
logging.basicConfig(level=logging.WARNING)
|
||||
@ -282,15 +284,18 @@ def convert(
|
||||
if from_formats is None:
|
||||
from_formats = [e for e in InputFormat]
|
||||
|
||||
parsed_headers: Optional[Dict[str, str]] = None
|
||||
if headers is not None:
|
||||
headers = json.loads(headers)
|
||||
parsed_headers = json.loads(headers)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tempdir:
|
||||
input_doc_paths: List[Path] = []
|
||||
for src in input_sources:
|
||||
try:
|
||||
# check if we can fetch some remote url
|
||||
source = resolve_source_to_path(source=src, headers=headers, workdir=Path(tempdir))
|
||||
source = resolve_source_to_path(
|
||||
source=src, headers=parsed_headers, workdir=Path(tempdir)
|
||||
)
|
||||
input_doc_paths.append(source)
|
||||
except FileNotFoundError:
|
||||
err_console.print(
|
||||
@ -396,7 +401,7 @@ def convert(
|
||||
start_time = time.time()
|
||||
|
||||
conv_results = doc_converter.convert_all(
|
||||
input_doc_paths, headers=headers, raises_on_error=abort_on_error
|
||||
input_doc_paths, headers=parsed_headers, raises_on_error=abort_on_error
|
||||
)
|
||||
|
||||
output.mkdir(parents=True, exist_ok=True)
|
||||
|
@ -234,7 +234,11 @@ class _DocumentConversionInput(BaseModel):
|
||||
self, format_options: Dict[InputFormat, "FormatOption"]
|
||||
) -> Iterable[InputDocument]:
|
||||
for item in self.path_or_stream_iterator:
|
||||
obj = resolve_source_to_stream(item, self.headers) if isinstance(item, str) else item
|
||||
obj = (
|
||||
resolve_source_to_stream(item, self.headers)
|
||||
if isinstance(item, str)
|
||||
else item
|
||||
)
|
||||
format = self._guess_format(obj)
|
||||
backend: Type[AbstractDocumentBackend]
|
||||
if format not in format_options.keys():
|
||||
|
@ -186,7 +186,7 @@ class DocumentConverter:
|
||||
raises_on_error=raises_on_error,
|
||||
max_num_pages=max_num_pages,
|
||||
max_file_size=max_file_size,
|
||||
headers=headers
|
||||
headers=headers,
|
||||
)
|
||||
return next(all_res)
|
||||
|
||||
@ -204,9 +204,7 @@ class DocumentConverter:
|
||||
max_file_size=max_file_size,
|
||||
)
|
||||
conv_input = _DocumentConversionInput(
|
||||
path_or_stream_iterator=source,
|
||||
limits=limits,
|
||||
headers=headers
|
||||
path_or_stream_iterator=source, limits=limits, headers=headers
|
||||
)
|
||||
conv_res_iter = self._convert(conv_input, raises_on_error=raises_on_error)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user