feat: Add option to define page range (#852)

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer
2025-01-31 15:23:00 +01:00
committed by GitHub
parent d727b04ad0
commit 70d68b6164
6 changed files with 82 additions and 4 deletions

View File

@@ -105,6 +105,20 @@ def test_e2e_conversions(test_doc_path):
assert doc_result.status == ConversionStatus.SUCCESS
def test_page_range(test_doc_path):
converter = DocumentConverter()
doc_result: ConversionResult = converter.convert(test_doc_path, page_range=(9, 9))
assert doc_result.status == ConversionStatus.SUCCESS
assert doc_result.input.page_count == 9
assert doc_result.document.num_pages() == 1
doc_result: ConversionResult = converter.convert(
test_doc_path, page_range=(10, 10), raises_on_error=False
)
assert doc_result.status == ConversionStatus.FAILURE
def test_ocr_coverage_threshold(test_doc_path):
pipeline_options = PdfPipelineOptions()
pipeline_options.do_ocr = True