From 01066f0b6eed5c43552ca58f7292e283db66eece Mon Sep 17 00:00:00 2001 From: Christoph Auer Date: Fri, 18 Jul 2025 16:48:35 +0200 Subject: [PATCH 1/3] Unload doc backend Signed-off-by: Christoph Auer --- docling/pipeline/threaded_standard_pdf_pipeline.py | 3 +++ tests/test_threaded_pipeline.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docling/pipeline/threaded_standard_pdf_pipeline.py b/docling/pipeline/threaded_standard_pdf_pipeline.py index d3ea2f82..6c734bd9 100644 --- a/docling/pipeline/threaded_standard_pdf_pipeline.py +++ b/docling/pipeline/threaded_standard_pdf_pipeline.py @@ -527,6 +527,9 @@ class ThreadedStandardPdfPipeline(BasePipeline): for st in ctx.stages: st.stop() ctx.output_queue.close() + + if conv_res.input._backend is not None: + conv_res.input._backend.unload() return conv_res # -------------------------------------------------------------- feed_pages diff --git a/tests/test_threaded_pipeline.py b/tests/test_threaded_pipeline.py index b431c508..bc6fb4e5 100644 --- a/tests/test_threaded_pipeline.py +++ b/tests/test_threaded_pipeline.py @@ -18,7 +18,7 @@ from docling.pipeline.threaded_standard_pdf_pipeline import ThreadedStandardPdfP def test_threaded_pipeline_multiple_documents(): """Test threaded pipeline with multiple documents and compare with standard pipeline""" - test_files = [ + test_files = [str(f) for f in Path("tests/data/pdf").rglob("*.pdf")] or [ "tests/data/pdf/2203.01017v2.pdf", "tests/data/pdf/2206.01062.pdf", "tests/data/pdf/2305.03393v1.pdf", From fa71cde950fd3dec08dd39d7be6a9de711a9dfc9 Mon Sep 17 00:00:00 2001 From: Christoph Auer Date: Fri, 18 Jul 2025 16:54:27 +0200 Subject: [PATCH 2/3] Revert "Unload doc backend" This reverts commit 01066f0b6eed5c43552ca58f7292e283db66eece. --- docling/pipeline/threaded_standard_pdf_pipeline.py | 3 --- tests/test_threaded_pipeline.py | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/docling/pipeline/threaded_standard_pdf_pipeline.py b/docling/pipeline/threaded_standard_pdf_pipeline.py index 6c734bd9..d3ea2f82 100644 --- a/docling/pipeline/threaded_standard_pdf_pipeline.py +++ b/docling/pipeline/threaded_standard_pdf_pipeline.py @@ -527,9 +527,6 @@ class ThreadedStandardPdfPipeline(BasePipeline): for st in ctx.stages: st.stop() ctx.output_queue.close() - - if conv_res.input._backend is not None: - conv_res.input._backend.unload() return conv_res # -------------------------------------------------------------- feed_pages diff --git a/tests/test_threaded_pipeline.py b/tests/test_threaded_pipeline.py index bc6fb4e5..b431c508 100644 --- a/tests/test_threaded_pipeline.py +++ b/tests/test_threaded_pipeline.py @@ -18,7 +18,7 @@ from docling.pipeline.threaded_standard_pdf_pipeline import ThreadedStandardPdfP def test_threaded_pipeline_multiple_documents(): """Test threaded pipeline with multiple documents and compare with standard pipeline""" - test_files = [str(f) for f in Path("tests/data/pdf").rglob("*.pdf")] or [ + test_files = [ "tests/data/pdf/2203.01017v2.pdf", "tests/data/pdf/2206.01062.pdf", "tests/data/pdf/2305.03393v1.pdf", From f6015bf8ae25942861a373cef343a286bc6c7b2d Mon Sep 17 00:00:00 2001 From: Christoph Auer Date: Fri, 18 Jul 2025 17:17:24 +0200 Subject: [PATCH 3/3] Remove redundant method Signed-off-by: Christoph Auer --- .../threaded_standard_pdf_pipeline.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/docling/pipeline/threaded_standard_pdf_pipeline.py b/docling/pipeline/threaded_standard_pdf_pipeline.py index d3ea2f82..cafca746 100644 --- a/docling/pipeline/threaded_standard_pdf_pipeline.py +++ b/docling/pipeline/threaded_standard_pdf_pipeline.py @@ -308,25 +308,6 @@ class ThreadedStandardPdfPipeline(BasePipeline): # initialise heavy models once self._init_models() - # ---------------------------------------------------------------- execute - def execute( - self, in_doc: InputDocument, raises_on_error: bool = True - ) -> ConversionResult: # type: ignore[override] - conv_res = ConversionResult(input=in_doc) - - if not isinstance(in_doc._backend, PdfDocumentBackend): - conv_res.status = ConversionStatus.FAILURE - return conv_res - - with TimeRecorder(conv_res, "pipeline_total", scope=ProfilingScope.DOCUMENT): - try: - conv_res = self._build_document(conv_res) - conv_res = self._assemble_document(conv_res) - conv_res.status = self._determine_status(conv_res) - finally: - self._unload(conv_res) - return conv_res - # ──────────────────────────────────────────────────────────────────────── # Heavy-model initialisation & helpers # ────────────────────────────────────────────────────────────────────────