mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 12:48:28 +00:00
fix(pdf): threadsafe for pypdfium2 backend (#2527)
* add threadsafe test Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * test backend Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * test threaded pipeline Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add test_pypdfium_threaded_pipeline Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add more threadsafe blocks Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * fix threadsafe in pypdfium backend Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * remove unneccessary tests Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * restore clean test Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> --------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
@@ -5,6 +5,7 @@ from typing import List
|
||||
|
||||
import pytest
|
||||
|
||||
from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
|
||||
from docling.datamodel.base_models import ConversionStatus, InputFormat
|
||||
from docling.datamodel.document import ConversionResult
|
||||
from docling.datamodel.pipeline_options import (
|
||||
@@ -171,6 +172,27 @@ def test_pipeline_comparison():
|
||||
assert len(sync_doc.texts) == len(threaded_doc.texts)
|
||||
|
||||
|
||||
def test_pypdfium_threaded_pipeline():
|
||||
doc_converter = (
|
||||
DocumentConverter( # all of the below is optional, has internal defaults.
|
||||
format_options={
|
||||
InputFormat.PDF: PdfFormatOption(
|
||||
pipeline_cls=ThreadedStandardPdfPipeline,
|
||||
backend=PyPdfiumDocumentBackend,
|
||||
),
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
test_file = "tests/data/pdf/2206.01062.pdf"
|
||||
for i in range(6):
|
||||
print(f"iteration {i=}")
|
||||
conv_result = doc_converter.convert(test_file)
|
||||
assert conv_result.status == ConversionStatus.SUCCESS
|
||||
print(f"[{i=}] Success")
|
||||
print("All done!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Run basic performance test
|
||||
test_pipeline_comparison()
|
||||
|
||||
Reference in New Issue
Block a user