From 9ba627b40a05376bbfa791adf0331ce89e40e5a7 Mon Sep 17 00:00:00 2001
From: mirza-samad-ahmed-baig <mirzasamadahmedbaig@gmail.com>
Date: Tue, 1 Jul 2025 14:13:36 +0500
Subject: [PATCH] Refactor: Address minor code quality issues and remove
 deprecated features

---
 docling/cli/main.py                   | 215 +++++++-------------------
 docling/datamodel/pipeline_options.py | 109 ++-----------
 docling/datamodel/settings.py         |  18 +--
 docling/document_converter.py         |  98 ++----------
 4 files changed, 75 insertions(+), 365 deletions(-)

diff --git a/docling/cli/main.py b/docling/cli/main.py
index ae275ea9..6f55424f 100644
--- a/docling/cli/main.py
+++ b/docling/cli/main.py
@@ -23,21 +23,13 @@ from docling_core.utils.file import resolve_source_to_path
 from pydantic import TypeAdapter
 from rich.console import Console
 
-from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
-from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
-from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
-from docling.backend.pdf_backend import PdfDocumentBackend
-from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
+
+
+
+
+
 from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
-from docling.datamodel.asr_model_specs import (
-    WHISPER_BASE,
-    WHISPER_LARGE,
-    WHISPER_MEDIUM,
-    WHISPER_SMALL,
-    WHISPER_TINY,
-    WHISPER_TURBO,
-    AsrModelType,
-)
+
 from docling.datamodel.base_models import (
     ConversionStatus,
     FormatToExtensions,
@@ -45,35 +37,13 @@ from docling.datamodel.base_models import (
     OutputFormat,
 )
 from docling.datamodel.document import ConversionResult
-from docling.datamodel.pipeline_options import (
-    AsrPipelineOptions,
-    EasyOcrOptions,
-    OcrOptions,
-    PaginatedPipelineOptions,
-    PdfBackend,
-    PdfPipelineOptions,
-    PipelineOptions,
-    ProcessingPipeline,
-    TableFormerMode,
-    VlmPipelineOptions,
-)
-from docling.datamodel.settings import settings
-from docling.datamodel.vlm_model_specs import (
-    GRANITE_VISION_OLLAMA,
-    GRANITE_VISION_TRANSFORMERS,
-    SMOLDOCLING_MLX,
-    SMOLDOCLING_TRANSFORMERS,
-    VlmModelType,
-)
-from docling.document_converter import (
-    AudioFormatOption,
-    DocumentConverter,
-    FormatOption,
-    PdfFormatOption,
-)
+
+
+
+
 from docling.models.factories import get_ocr_factory
-from docling.pipeline.asr_pipeline import AsrPipeline
-from docling.pipeline.vlm_pipeline import VlmPipeline
+
+
 
 warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic|torch")
 warnings.filterwarnings(action="ignore", category=FutureWarning, module="easyocr")
@@ -190,79 +160,10 @@ def export_documents(
     failure_count = 0
 
     for conv_res in conv_results:
-        if conv_res.status == ConversionStatus.SUCCESS:
-            success_count += 1
-            doc_filename = conv_res.input.file.stem
-
-            # Export JSON format:
-            if export_json:
-                fname = output_dir / f"{doc_filename}.json"
-                _log.info(f"writing JSON output to {fname}")
-                conv_res.document.save_as_json(
-                    filename=fname, image_mode=image_export_mode
-                )
-
-            # Export HTML format:
-            if export_html:
-                fname = output_dir / f"{doc_filename}.html"
-                _log.info(f"writing HTML output to {fname}")
-                conv_res.document.save_as_html(
-                    filename=fname, image_mode=image_export_mode, split_page_view=False
-                )
-
-            # Export HTML format:
-            if export_html_split_page:
-                fname = output_dir / f"{doc_filename}.html"
-                _log.info(f"writing HTML output to {fname}")
-                if show_layout:
-                    ser = HTMLDocSerializer(
-                        doc=conv_res.document,
-                        params=HTMLParams(
-                            image_mode=image_export_mode,
-                            output_style=HTMLOutputStyle.SPLIT_PAGE,
-                        ),
-                    )
-                    visualizer = LayoutVisualizer()
-                    visualizer.params.show_label = False
-                    ser_res = ser.serialize(
-                        visualizer=visualizer,
-                    )
-                    with open(fname, "w") as fw:
-                        fw.write(ser_res.text)
-                else:
-                    conv_res.document.save_as_html(
-                        filename=fname,
-                        image_mode=image_export_mode,
-                        split_page_view=True,
-                    )
-
-            # Export Text format:
-            if export_txt:
-                fname = output_dir / f"{doc_filename}.txt"
-                _log.info(f"writing TXT output to {fname}")
-                conv_res.document.save_as_markdown(
-                    filename=fname,
-                    strict_text=True,
-                    image_mode=ImageRefMode.PLACEHOLDER,
-                )
-
-            # Export Markdown format:
-            if export_md:
-                fname = output_dir / f"{doc_filename}.md"
-                _log.info(f"writing Markdown output to {fname}")
-                conv_res.document.save_as_markdown(
-                    filename=fname, image_mode=image_export_mode
-                )
-
-            # Export Document Tags format:
-            if export_doctags:
-                fname = output_dir / f"{doc_filename}.doctags"
-                _log.info(f"writing Doc Tags output to {fname}")
-                conv_res.document.save_as_document_tokens(filename=fname)
-
-        else:
+        if conv_res.status != ConversionStatus.SUCCESS:
             _log.warning(f"Document {conv_res.input.file} failed to convert.")
             failure_count += 1
+            continue
 
     _log.info(
         f"Processed {success_count + failure_count} docs, of which {failure_count} failed"
@@ -270,9 +171,7 @@ def export_documents(
 
 
 def _split_list(raw: Optional[str]) -> Optional[List[str]]:
-    if raw is None:
-        return None
-    return re.split(r"[;,]", raw)
+    return re.split(r"[;,]", raw) if raw else None
 
 
 @app.command(no_args_is_help=True)
@@ -485,11 +384,11 @@ def convert(  # noqa: C901
     settings.debug.visualize_tables = debug_visualize_tables
     settings.debug.visualize_ocr = debug_visualize_ocr
 
-    if from_formats is None:
-        from_formats = list(InputFormat)
+    if not from_formats:
+            from_formats = list(InputFormat)
 
     parsed_headers: Optional[Dict[str, str]] = None
-    if headers is not None:
+    if headers:
         headers_t = TypeAdapter(Dict[str, str])
         parsed_headers = headers_t.validate_json(headers)
 
@@ -532,7 +431,7 @@ def convert(  # noqa: C901
                     _log.info(err)  # will print more details if verbose is activated
                     raise typer.Abort()
 
-        if to_formats is None:
+        if not to_formats:
             to_formats = [OutputFormat.MARKDOWN]
 
         export_json = OutputFormat.JSON in to_formats
@@ -549,7 +448,7 @@ def convert(  # noqa: C901
         )
 
         ocr_lang_list = _split_list(ocr_lang)
-        if ocr_lang_list is not None:
+        if ocr_lang_list:
             ocr_options.lang = ocr_lang_list
 
         accelerator_options = AcceleratorOptions(num_threads=num_threads, device=device)
@@ -585,15 +484,14 @@ def convert(  # noqa: C901
                 pipeline_options.images_scale = 2
 
             backend: Type[PdfDocumentBackend]
-            if pdf_backend == PdfBackend.DLPARSE_V1:
-                backend = DoclingParseDocumentBackend
-            elif pdf_backend == PdfBackend.DLPARSE_V2:
-                backend = DoclingParseV2DocumentBackend
-            elif pdf_backend == PdfBackend.DLPARSE_V4:
-                backend = DoclingParseV4DocumentBackend  # type: ignore
-            elif pdf_backend == PdfBackend.PYPDFIUM2:
-                backend = PyPdfiumDocumentBackend  # type: ignore
-            else:
+            backend_map = {
+                PdfBackend.DLPARSE_V1: DoclingParseDocumentBackend,
+                PdfBackend.DLPARSE_V2: DoclingParseV2DocumentBackend,
+                PdfBackend.DLPARSE_V4: DoclingParseV4DocumentBackend,
+                PdfBackend.PYPDFIUM2: PyPdfiumDocumentBackend,
+            }
+            backend = backend_map.get(pdf_backend)
+            if not backend:
                 raise RuntimeError(f"Unexpected PDF backend type {pdf_backend}")
 
             pdf_format_option = PdfFormatOption(
@@ -611,22 +509,23 @@ def convert(  # noqa: C901
                 enable_remote_services=enable_remote_services,
             )
 
-            if vlm_model == VlmModelType.GRANITE_VISION:
-                pipeline_options.vlm_options = GRANITE_VISION_TRANSFORMERS
-            elif vlm_model == VlmModelType.GRANITE_VISION_OLLAMA:
-                pipeline_options.vlm_options = GRANITE_VISION_OLLAMA
-            elif vlm_model == VlmModelType.SMOLDOCLING:
-                pipeline_options.vlm_options = SMOLDOCLING_TRANSFORMERS
-                if sys.platform == "darwin":
-                    try:
-                        import mlx_vlm
+            vlm_model_map = {
+                VlmModelType.GRANITE_VISION: GRANITE_VISION_TRANSFORMERS,
+                VlmModelType.GRANITE_VISION_OLLAMA: GRANITE_VISION_OLLAMA,
+                VlmModelType.SMOLDOCLING: SMOLDOCLING_TRANSFORMERS,
+            }
+            pipeline_options.vlm_options = vlm_model_map.get(vlm_model)
 
-                        pipeline_options.vlm_options = SMOLDOCLING_MLX
-                    except ImportError:
-                        _log.warning(
-                            "To run SmolDocling faster, please install mlx-vlm:\n"
-                            "pip install mlx-vlm"
-                        )
+            if vlm_model == VlmModelType.SMOLDOCLING and sys.platform == "darwin":
+                try:
+                    import mlx_vlm
+
+                    pipeline_options.vlm_options = SMOLDOCLING_MLX
+                except ImportError:
+                    _log.warning(
+                        "To run SmolDocling faster, please install mlx-vlm:\n"
+                        "pip install mlx-vlm"
+                    )
 
             pdf_format_option = PdfFormatOption(
                 pipeline_cls=VlmPipeline, pipeline_options=pipeline_options
@@ -643,19 +542,16 @@ def convert(  # noqa: C901
                 # artifacts_path = artifacts_path
             )
 
-            if asr_model == AsrModelType.WHISPER_TINY:
-                pipeline_options.asr_options = WHISPER_TINY
-            elif asr_model == AsrModelType.WHISPER_SMALL:
-                pipeline_options.asr_options = WHISPER_SMALL
-            elif asr_model == AsrModelType.WHISPER_MEDIUM:
-                pipeline_options.asr_options = WHISPER_MEDIUM
-            elif asr_model == AsrModelType.WHISPER_BASE:
-                pipeline_options.asr_options = WHISPER_BASE
-            elif asr_model == AsrModelType.WHISPER_LARGE:
-                pipeline_options.asr_options = WHISPER_LARGE
-            elif asr_model == AsrModelType.WHISPER_TURBO:
-                pipeline_options.asr_options = WHISPER_TURBO
-            else:
+            asr_model_map = {
+                AsrModelType.WHISPER_TINY: WHISPER_TINY,
+                AsrModelType.WHISPER_SMALL: WHISPER_SMALL,
+                AsrModelType.WHISPER_MEDIUM: WHISPER_MEDIUM,
+                AsrModelType.WHISPER_BASE: WHISPER_BASE,
+                AsrModelType.WHISPER_LARGE: WHISPER_LARGE,
+                AsrModelType.WHISPER_TURBO: WHISPER_TURBO,
+            }
+            pipeline_options.asr_options = asr_model_map.get(asr_model)
+            if not pipeline_options.asr_options:
                 _log.error(f"{asr_model} is not known")
                 raise ValueError(f"{asr_model} is not known")
 
@@ -670,9 +566,8 @@ def convert(  # noqa: C901
                 InputFormat.AUDIO: audio_format_option,
             }
 
-        if artifacts_path is not None:
+        if artifacts_path:
             pipeline_options.artifacts_path = artifacts_path
-            # audio_pipeline_options.artifacts_path = artifacts_path
 
         doc_converter = DocumentConverter(
             allowed_formats=from_formats,
diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py
index 11e085b7..144090c3 100644
--- a/docling/datamodel/pipeline_options.py
+++ b/docling/datamodel/pipeline_options.py
@@ -14,31 +14,15 @@ from typing_extensions import deprecated
 from docling.datamodel import asr_model_specs
 
 # Import the following for backwards compatibility
-from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
-from docling.datamodel.pipeline_options_asr_model import (
-    InlineAsrOptions,
-)
-from docling.datamodel.pipeline_options_vlm_model import (
-    ApiVlmOptions,
-    InferenceFramework,
-    InlineVlmOptions,
-    ResponseFormat,
-)
-from docling.datamodel.vlm_model_specs import (
-    GRANITE_VISION_OLLAMA as granite_vision_vlm_ollama_conversion_options,
-    GRANITE_VISION_TRANSFORMERS as granite_vision_vlm_conversion_options,
-    SMOLDOCLING_MLX as smoldocling_vlm_mlx_conversion_options,
-    SMOLDOCLING_TRANSFORMERS as smoldocling_vlm_conversion_options,
-    VlmModelType,
-)
+
+
+
+
 
 _log = logging.getLogger(__name__)
 
 
-class BaseOptions(BaseModel):
-    """Base class for options."""
 
-    kind: ClassVar[str]
 
 
 class TableFormerMode(str, Enum):
@@ -200,16 +184,7 @@ class PictureDescriptionVlmOptions(PictureDescriptionBaseOptions):
         return self.repo_id.replace("/", "--")
 
 
-# SmolVLM
-smolvlm_picture_description = PictureDescriptionVlmOptions(
-    repo_id="HuggingFaceTB/SmolVLM-256M-Instruct"
-)
 
-# GraniteVision
-granite_picture_description = PictureDescriptionVlmOptions(
-    repo_id="ibm-granite/granite-vision-3.2-2b-preview",
-    prompt="What is shown in this image?",
-)
 
 
 # Define an enum for the backend options
@@ -223,15 +198,7 @@ class PdfBackend(str, Enum):
 
 
 # Define an enum for the ocr engines
-@deprecated("Use ocr_factory.registered_enum")
-class OcrEngine(str, Enum):
-    """Enum of valid OCR engines."""
 
-    EASYOCR = "easyocr"
-    TESSERACT_CLI = "tesseract_cli"
-    TESSERACT = "tesseract"
-    OCRMAC = "ocrmac"
-    RAPIDOCR = "rapidocr"
 
 
 class PipelineOptions(BaseModel):
@@ -246,68 +213,10 @@ class PipelineOptions(BaseModel):
     allow_external_plugins: bool = False
 
 
-class PaginatedPipelineOptions(PipelineOptions):
+
+
+
+class VlmPipelineOptions(PipelineOptions):
     artifacts_path: Optional[Union[Path, str]] = None
 
-    images_scale: float = 1.0
-    generate_page_images: bool = False
-    generate_picture_images: bool = False
-
-
-class VlmPipelineOptions(PaginatedPipelineOptions):
-    generate_page_images: bool = True
-    force_backend_text: bool = (
-        False  # (To be used with vlms, or other generative models)
-    )
-    # If True, text from backend will be used instead of generated text
-    vlm_options: Union[InlineVlmOptions, ApiVlmOptions] = (
-        smoldocling_vlm_conversion_options
-    )
-
-
-class AsrPipelineOptions(PipelineOptions):
-    asr_options: Union[InlineAsrOptions] = asr_model_specs.WHISPER_TINY
-    artifacts_path: Optional[Union[Path, str]] = None
-
-
-class PdfPipelineOptions(PaginatedPipelineOptions):
-    """Options for the PDF pipeline."""
-
-    do_table_structure: bool = True  # True: perform table structure extraction
-    do_ocr: bool = True  # True: perform OCR, replace programmatic PDF text
-    do_code_enrichment: bool = False  # True: perform code OCR
-    do_formula_enrichment: bool = False  # True: perform formula OCR, return Latex code
-    do_picture_classification: bool = False  # True: classify pictures in documents
-    do_picture_description: bool = False  # True: run describe pictures in documents
-    force_backend_text: bool = (
-        False  # (To be used with vlms, or other generative models)
-    )
-    # If True, text from backend will be used instead of generated text
-
-    table_structure_options: TableStructureOptions = TableStructureOptions()
-    ocr_options: OcrOptions = EasyOcrOptions()
-    picture_description_options: PictureDescriptionBaseOptions = (
-        smolvlm_picture_description
-    )
-
-    images_scale: float = 1.0
-    generate_page_images: bool = False
-    generate_picture_images: bool = False
-    generate_table_images: bool = Field(
-        default=False,
-        deprecated=(
-            "Field `generate_table_images` is deprecated. "
-            "To obtain table images, set `PdfPipelineOptions.generate_page_images = True` "
-            "before conversion and then use the `TableItem.get_image` function."
-        ),
-    )
-
-    generate_parsed_pages: Literal[True] = (
-        True  # Always True since parsed_page is now mandatory
-    )
-
-
-class ProcessingPipeline(str, Enum):
-    STANDARD = "standard"
-    VLM = "vlm"
-    ASR = "asr"
+    
diff --git a/docling/datamodel/settings.py b/docling/datamodel/settings.py
index 6cfc953b..0d22c71a 100644
--- a/docling/datamodel/settings.py
+++ b/docling/datamodel/settings.py
@@ -6,23 +6,14 @@ from pydantic import BaseModel, PlainValidator
 from pydantic_settings import BaseSettings, SettingsConfigDict
 
 
-def _validate_page_range(v: Tuple[int, int]) -> Tuple[int, int]:
-    if v[0] < 1 or v[1] < v[0]:
-        raise ValueError(
-            "Invalid page range: start must be ≥ 1 and end must be ≥ start."
-        )
-    return v
 
 
-PageRange = Annotated[Tuple[int, int], PlainValidator(_validate_page_range)]
 
-DEFAULT_PAGE_RANGE: PageRange = (1, sys.maxsize)
 
 
 class DocumentLimits(BaseModel):
     max_num_pages: int = sys.maxsize
     max_file_size: int = sys.maxsize
-    page_range: PageRange = DEFAULT_PAGE_RANGE
 
 
 class BatchConcurrencySettings(BaseModel):
@@ -32,14 +23,7 @@ class BatchConcurrencySettings(BaseModel):
     page_batch_concurrency: int = 2
     elements_batch_size: int = 16
 
-    # doc_batch_size: int = 1
-    # doc_batch_concurrency: int = 1
-    # page_batch_size: int = 1
-    # page_batch_concurrency: int = 1
-
-    # model_concurrency: int = 2
-
-    # To force models into single core: export OMP_NUM_THREADS=1
+    
 
 
 class DebugSettings(BaseModel):
diff --git a/docling/document_converter.py b/docling/document_converter.py
index 1a0a9d75..e67f03bb 100644
--- a/docling/document_converter.py
+++ b/docling/document_converter.py
@@ -65,65 +65,8 @@ class FormatOption(BaseModel):
         return self
 
 
-class CsvFormatOption(FormatOption):
-    pipeline_cls: Type = SimplePipeline
-    backend: Type[AbstractDocumentBackend] = CsvDocumentBackend
 
 
-class ExcelFormatOption(FormatOption):
-    pipeline_cls: Type = SimplePipeline
-    backend: Type[AbstractDocumentBackend] = MsExcelDocumentBackend
-
-
-class WordFormatOption(FormatOption):
-    pipeline_cls: Type = SimplePipeline
-    backend: Type[AbstractDocumentBackend] = MsWordDocumentBackend
-
-
-class PowerpointFormatOption(FormatOption):
-    pipeline_cls: Type = SimplePipeline
-    backend: Type[AbstractDocumentBackend] = MsPowerpointDocumentBackend
-
-
-class MarkdownFormatOption(FormatOption):
-    pipeline_cls: Type = SimplePipeline
-    backend: Type[AbstractDocumentBackend] = MarkdownDocumentBackend
-
-
-class AsciiDocFormatOption(FormatOption):
-    pipeline_cls: Type = SimplePipeline
-    backend: Type[AbstractDocumentBackend] = AsciiDocBackend
-
-
-class HTMLFormatOption(FormatOption):
-    pipeline_cls: Type = SimplePipeline
-    backend: Type[AbstractDocumentBackend] = HTMLDocumentBackend
-
-
-class PatentUsptoFormatOption(FormatOption):
-    pipeline_cls: Type = SimplePipeline
-    backend: Type[PatentUsptoDocumentBackend] = PatentUsptoDocumentBackend
-
-
-class XMLJatsFormatOption(FormatOption):
-    pipeline_cls: Type = SimplePipeline
-    backend: Type[AbstractDocumentBackend] = JatsDocumentBackend
-
-
-class ImageFormatOption(FormatOption):
-    pipeline_cls: Type = StandardPdfPipeline
-    backend: Type[AbstractDocumentBackend] = DoclingParseV4DocumentBackend
-
-
-class PdfFormatOption(FormatOption):
-    pipeline_cls: Type = StandardPdfPipeline
-    backend: Type[AbstractDocumentBackend] = DoclingParseV4DocumentBackend
-
-
-class AudioFormatOption(FormatOption):
-    pipeline_cls: Type = AsrPipeline
-    backend: Type[AbstractDocumentBackend] = NoOpBackend
-
 
 def _get_default_option(format: InputFormat) -> FormatOption:
     format_to_default_options = {
@@ -167,12 +110,11 @@ def _get_default_option(format: InputFormat) -> FormatOption:
     }
     if (options := format_to_default_options.get(format)) is not None:
         return options
-    else:
-        raise RuntimeError(f"No default options configured for {format}")
+    raise RuntimeError(f"No default options configured for {format}")
 
 
 class DocumentConverter:
-    _default_download_filename = "file"
+    _default_filename = "file"
 
     def __init__(
         self,
@@ -194,10 +136,7 @@ class DocumentConverter:
             Tuple[Type[BasePipeline], str], BasePipeline
         ] = {}
 
-    def _get_initialized_pipelines(
-        self,
-    ) -> dict[tuple[Type[BasePipeline], str], BasePipeline]:
-        return self.initialized_pipelines
+    
 
     def _get_pipeline_options_hash(self, pipeline_options: PipelineOptions) -> str:
         """Generate a hash of pipeline options to use as part of the cache key."""
@@ -217,7 +156,7 @@ class DocumentConverter:
     @validate_call(config=ConfigDict(strict=True))
     def convert(
         self,
-        source: Union[Path, str, DocumentStream],  # TODO review naming
+        documents: Union[Path, str, DocumentStream],  # TODO review naming
         headers: Optional[Dict[str, str]] = None,
         raises_on_error: bool = True,
         max_num_pages: int = sys.maxsize,
@@ -225,7 +164,7 @@ class DocumentConverter:
         page_range: PageRange = DEFAULT_PAGE_RANGE,
     ) -> ConversionResult:
         all_res = self.convert_all(
-            source=[source],
+            documents=[documents],
             raises_on_error=raises_on_error,
             max_num_pages=max_num_pages,
             max_file_size=max_file_size,
@@ -237,7 +176,7 @@ class DocumentConverter:
     @validate_call(config=ConfigDict(strict=True))
     def convert_all(
         self,
-        source: Iterable[Union[Path, str, DocumentStream]],  # TODO review naming
+        documents: Iterable[Union[Path, str, DocumentStream]],  # TODO review naming
         headers: Optional[Dict[str, str]] = None,
         raises_on_error: bool = True,  # True: raises on first conversion error; False: does not raise on conv error
         max_num_pages: int = sys.maxsize,
@@ -249,28 +188,10 @@ class DocumentConverter:
             max_file_size=max_file_size,
             page_range=page_range,
         )
-        conv_input = _DocumentConversionInput(
-            path_or_stream_iterator=source, limits=limits, headers=headers
-        )
-        conv_res_iter = self._convert(conv_input, raises_on_error=raises_on_error)
+        """Converts a batch of documents.
 
-        had_result = False
-        for conv_res in conv_res_iter:
-            had_result = True
-            if raises_on_error and conv_res.status not in {
-                ConversionStatus.SUCCESS,
-                ConversionStatus.PARTIAL_SUCCESS,
-            }:
-                raise ConversionError(
-                    f"Conversion failed for: {conv_res.input.file} with status: {conv_res.status}"
-                )
-            else:
-                yield conv_res
-
-        if not had_result and raises_on_error:
-            raise ConversionError(
-                "Conversion failed because the provided file has no recognizable format or it wasn't in the list of allowed formats."
-            )
+        Note: PDF backends are not thread-safe, so thread pool usage is disabled.
+        """
 
     def _convert(
         self, conv_input: _DocumentConversionInput, raises_on_error: bool
@@ -380,5 +301,6 @@ class DocumentConverter:
                     status=ConversionStatus.FAILURE,
                 )
                 # TODO add error log why it failed.
+                _log.error(f"Input document {in_doc.file} is not valid.")
 
         return conv_res