Clean up unused code

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
2025-07-25 19:44:34 +00:00 · 2025-07-23 16:03:25 +02:00 · 2025-07-23 16:03:25 +02:00 · 425f38a5aa
commit 425f38a5aa
parent de0d9b50a2
2 changed files with 1 additions and 56 deletions
--- a/docling/datamodel/pipeline_options.py
+++ b/docling/datamodel/pipeline_options.py
@ -347,14 +347,3 @@ class ThreadedPdfPipelineOptions(PdfPipelineOptions):

    # Backpressure and queue control
    queue_max_size: int = 100
-
-    # Pipeline coordination - batch_timeout_seconds is the only safe timeout
-    # stage_timeout_seconds and collection_timeout_seconds removed to prevent data loss
-
-    @classmethod
-    def from_sync_options(
-        cls, sync_options: PdfPipelineOptions
-    ) -> "ThreadedPdfPipelineOptions":
-        """Convert sync options to threaded options"""
-        data = sync_options.model_dump()
-        return cls(**data)
--- a/docling/pipeline/threaded_standard_pdf_pipeline.py
+++ b/docling/pipeline/threaded_standard_pdf_pipeline.py
@ -21,18 +21,15 @@ import itertools
 import logging
 import threading
 import time
-import warnings
 from collections import defaultdict, deque
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Iterable, List, Optional, Sequence, Tuple

-from docling_core.types.doc import NodeItem
-
 from docling.backend.abstract_backend import AbstractDocumentBackend
 from docling.backend.pdf_backend import PdfDocumentBackend
 from docling.datamodel.base_models import AssembledUnit, ConversionStatus, Page
-from docling.datamodel.document import ConversionResult, InputDocument
+from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import ThreadedPdfPipelineOptions
 from docling.datamodel.settings import settings
 from docling.models.code_formula_model import CodeFormulaModel, CodeFormulaModelOptions
@ -549,47 +546,6 @@ class ThreadedStandardPdfPipeline(BasePipeline):
        self._integrate_results(conv_res, proc)
        return conv_res

-    # -------------------------------------------------------------- feed_pages
-    def _feed_pages(
-        self,
-        stage: ThreadedPipelineStage,
-        pages: Sequence[Page],
-        conv_res: ConversionResult,
-        run_id: int,
-    ) -> None:
-        for pg in pages:
-            ok = stage.input_queue.put(
-                ThreadedItem(
-                    payload=pg, run_id=run_id, page_no=pg.page_no, conv_res=conv_res
-                )
-            )
-            if not ok:
-                raise RuntimeError("Input queue closed while feeding pages")
-        stage.input_queue.close()
-
-    # ------------------------------------------------------------- _collect()
-    def _collect(
-        self, ctx: RunContext, conv_res: ConversionResult, run_id: int, expected: int
-    ) -> ProcessingResult:
-        res = ProcessingResult(total_expected=expected)
-        while True:
-            batch = ctx.output_queue.get_batch(expected, timeout=None)
-            if not batch and ctx.output_queue.closed:
-                break
-            for itm in batch:
-                if itm.run_id != run_id:
-                    continue  # not our run (should not happen due to isolation)
-                if itm.is_failed or itm.error:
-                    res.failed_pages.append(
-                        (itm.page_no, itm.error or Exception("unknown error"))
-                    )
-                else:
-                    assert itm.payload is not None
-                    res.pages.append(itm.payload)
-            if res.success_count + res.failure_count >= expected:
-                break
-        return res
-
    # ---------------------------------------------------- integrate_results()
    def _integrate_results(
        self, conv_res: ConversionResult, proc: ProcessingResult