switch convert_all output type from Iterable to Iterator

Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com>
This commit is contained in:
Panos Vagenas 2024-10-15 10:11:29 +02:00 committed by GitHub
parent 8710506072
commit 4fe98f1a0a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -3,7 +3,7 @@ import sys
import time import time
from functools import partial from functools import partial
from pathlib import Path from pathlib import Path
from typing import Dict, Iterable, List, Optional, Type from typing import Dict, Iterable, Iterator, List, Optional, Type
from pydantic import BaseModel, ConfigDict, model_validator, validate_call from pydantic import BaseModel, ConfigDict, model_validator, validate_call
@ -137,7 +137,7 @@ class DocumentConverter:
raises_on_error: bool = True, # True: raises on first conversion error; False: does not raise on conv error raises_on_error: bool = True, # True: raises on first conversion error; False: does not raise on conv error
max_num_pages: int = sys.maxsize, max_num_pages: int = sys.maxsize,
max_file_size: int = sys.maxsize, max_file_size: int = sys.maxsize,
) -> Iterable[ConversionResult]: ) -> Iterator[ConversionResult]:
limits = DocumentLimits( limits = DocumentLimits(
max_num_pages=max_num_pages, max_num_pages=max_num_pages,
max_file_size=max_file_size, max_file_size=max_file_size,
@ -160,7 +160,7 @@ class DocumentConverter:
def _convert( def _convert(
self, conv_input: _DocumentConversionInput, raises_on_error: bool self, conv_input: _DocumentConversionInput, raises_on_error: bool
) -> Iterable[ConversionResult]: ) -> Iterator[ConversionResult]:
for input_batch in chunkify( for input_batch in chunkify(
conv_input.docs(self.format_to_options), conv_input.docs(self.format_to_options),
settings.perf.doc_batch_size, # pass format_options settings.perf.doc_batch_size, # pass format_options