From c6e1471e0237a71ec057309f2ba66b1ecebca365 Mon Sep 17 00:00:00 2001
From: Michele Dolfi <dol@zurich.ibm.com>
Date: Fri, 11 Oct 2024 12:58:59 +0200
Subject: [PATCH 1/4] use options objects

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
---
 docling/datamodel/pipeline_options.py         | 15 ----
 docling/models/layout_model.py                |  8 +-
 docling/models/page_assemble_model.py         | 12 ++-
 docling/models/page_preprocessing_model.py    | 11 ++-
 docling/models/table_structure_model.py       | 16 ++--
 .../pipeline/standard_pdf_model_pipeline.py   | 76 +++++++++++--------
 tests/verify_utils.py                         |  7 +-
 7 files changed, 78 insertions(+), 67 deletions(-)

diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py
index 4be6fcec..10d5b14f 100644
--- a/docling/datamodel/pipeline_options.py
+++ b/docling/datamodel/pipeline_options.py
@@ -72,19 +72,4 @@ class PdfPipelineOptions(PipelineOptions):
         Field(EasyOcrOptions(), discriminator="kind")
     )
 
-    keep_page_images: Annotated[
-        bool,
-        Field(
-            deprecated="`keep_page_images` is depreacted, set the value of `images_scale` instead"
-        ),
-    ] = False  # False: page images are removed in the assemble step
     images_scale: Optional[float] = None  # if set, the scale for generated images
-
-    @model_validator(mode="after")
-    def set_page_images_from_deprecated(self) -> "PdfPipelineOptions":
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore", DeprecationWarning)
-            default_scale = 1.0
-            if self.keep_page_images and self.images_scale is None:
-                self.images_scale = default_scale
-        return self
diff --git a/docling/models/layout_model.py b/docling/models/layout_model.py
index 10b7f196..1240b456 100644
--- a/docling/models/layout_model.py
+++ b/docling/models/layout_model.py
@@ -2,6 +2,7 @@ import copy
 import logging
 import random
 import time
+from pathlib import Path
 from typing import Iterable, List
 
 from docling_core.types.experimental import CoordOrigin
@@ -43,11 +44,8 @@ class LayoutModel(AbstractPageModel):
     FIGURE_LABEL = DocItemLabel.PICTURE
     FORMULA_LABEL = DocItemLabel.FORMULA
 
-    def __init__(self, config):
-        self.config = config
-        self.layout_predictor = LayoutPredictor(
-            config["artifacts_path"]
-        )  # TODO temporary
+    def __init__(self, artifacts_path: Path):
+        self.layout_predictor = LayoutPredictor(artifacts_path)  # TODO temporary
 
     def postprocess(self, clusters: List[Cluster], cells: List[Cell], page_height):
         MIN_INTERSECTION = 0.2
diff --git a/docling/models/page_assemble_model.py b/docling/models/page_assemble_model.py
index 28f93c12..44743339 100644
--- a/docling/models/page_assemble_model.py
+++ b/docling/models/page_assemble_model.py
@@ -2,6 +2,8 @@ import logging
 import re
 from typing import Iterable, List
 
+from pydantic import BaseModel
+
 from docling.datamodel.base_models import (
     AssembledUnit,
     FigureElement,
@@ -16,9 +18,13 @@ from docling.models.layout_model import LayoutModel
 _log = logging.getLogger(__name__)
 
 
+class PageAssembleOptions(BaseModel):
+    keep_images: bool = False
+
+
 class PageAssembleModel(AbstractPageModel):
-    def __init__(self, config):
-        self.config = config
+    def __init__(self, options: PageAssembleOptions):
+        self.options = options
 
     def sanitize_text(self, lines):
         if len(lines) <= 1:
@@ -147,7 +153,7 @@ class PageAssembleModel(AbstractPageModel):
             )
 
             # Remove page images (can be disabled)
-            if self.config["images_scale"] is None:
+            if not self.options.keep_images:
                 page._image_cache = {}
 
             # Unload backend
diff --git a/docling/models/page_preprocessing_model.py b/docling/models/page_preprocessing_model.py
index 3683123c..438114d8 100644
--- a/docling/models/page_preprocessing_model.py
+++ b/docling/models/page_preprocessing_model.py
@@ -1,14 +1,19 @@
 from typing import Iterable
 
 from PIL import ImageDraw
+from pydantic import BaseModel
 
 from docling.datamodel.base_models import Page
 from docling.models.abstract_model import AbstractPageModel
 
 
+class PagePreprocessingOptions(BaseModel):
+    images_scale: float
+
+
 class PagePreprocessingModel(AbstractPageModel):
-    def __init__(self, config):
-        self.config = config
+    def __init__(self, options: PagePreprocessingOptions):
+        self.options = options
 
     def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
         for page in page_batch:
@@ -23,7 +28,7 @@ class PagePreprocessingModel(AbstractPageModel):
             scale=1.0
         )  # puts the page image on the image cache at default scale
 
-        images_scale = self.config["images_scale"]
+        images_scale = self.options.images_scale
         # user requested scales
         if images_scale is not None:
             page._default_image_scale = images_scale
diff --git a/docling/models/table_structure_model.py b/docling/models/table_structure_model.py
index 4a00e55d..4433930a 100644
--- a/docling/models/table_structure_model.py
+++ b/docling/models/table_structure_model.py
@@ -10,19 +10,21 @@ from docling_ibm_models.tableformer.data_management.tf_predictor import TFPredic
 from PIL import ImageDraw
 
 from docling.datamodel.base_models import Page, Table, TableStructurePrediction
-from docling.datamodel.pipeline_options import TableFormerMode
+from docling.datamodel.pipeline_options import TableFormerMode, TableStructureOptions
 from docling.models.abstract_model import AbstractPageModel
 
 
 class TableStructureModel(AbstractPageModel):
-    def __init__(self, config):
-        self.config = config
-        self.do_cell_matching = config["do_cell_matching"]
-        self.mode = config["mode"]
+    def __init__(
+        self, enabled: bool, artifacts_path: Path, options: TableStructureOptions
+    ):
+        self.options = options
+        self.do_cell_matching = self.options.do_cell_matching
+        self.mode = self.options.mode
 
-        self.enabled = config["enabled"]
+        self.enabled = enabled
         if self.enabled:
-            artifacts_path: Path = config["artifacts_path"]
+            artifacts_path: Path = artifacts_path
 
             if self.mode == TableFormerMode.ACCURATE:
                 artifacts_path = artifacts_path / "fat"
diff --git a/docling/pipeline/standard_pdf_model_pipeline.py b/docling/pipeline/standard_pdf_model_pipeline.py
index dba1f3dc..c50a5552 100644
--- a/docling/pipeline/standard_pdf_model_pipeline.py
+++ b/docling/pipeline/standard_pdf_model_pipeline.py
@@ -16,8 +16,11 @@ from docling.models.base_ocr_model import BaseOcrModel
 from docling.models.ds_glm_model import GlmModel
 from docling.models.easyocr_model import EasyOcrModel
 from docling.models.layout_model import LayoutModel
-from docling.models.page_assemble_model import PageAssembleModel
-from docling.models.page_preprocessing_model import PagePreprocessingModel
+from docling.models.page_assemble_model import PageAssembleModel, PageAssembleOptions
+from docling.models.page_preprocessing_model import (
+    PagePreprocessingModel,
+    PagePreprocessingOptions,
+)
 from docling.models.table_structure_model import TableStructureModel
 from docling.models.tesseract_ocr_cli_model import TesseractOcrCliModel
 from docling.models.tesseract_ocr_model import TesseractOcrModel
@@ -32,6 +35,7 @@ class StandardPdfModelPipeline(PaginatedModelPipeline):
 
     def __init__(self, pipeline_options: PdfPipelineOptions):
         super().__init__(pipeline_options)
+        self.pipeline_options: PdfPipelineOptions
 
         if not pipeline_options.artifacts_path:
             artifacts_path = self.download_models_hf()
@@ -39,48 +43,38 @@ class StandardPdfModelPipeline(PaginatedModelPipeline):
         self.artifacts_path = Path(artifacts_path)
         self.glm_model = GlmModel(config={})
 
-        ocr_model: BaseOcrModel
-        if isinstance(pipeline_options.ocr_options, EasyOcrOptions):
-            ocr_model = EasyOcrModel(
-                enabled=pipeline_options.do_ocr,
-                options=pipeline_options.ocr_options,
-            )
-        elif isinstance(pipeline_options.ocr_options, TesseractCliOcrOptions):
-            ocr_model = TesseractOcrCliModel(
-                enabled=pipeline_options.do_ocr,
-                options=pipeline_options.ocr_options,
-            )
-        elif isinstance(pipeline_options.ocr_options, TesseractOcrOptions):
-            ocr_model = TesseractOcrModel(
-                enabled=pipeline_options.do_ocr,
-                options=pipeline_options.ocr_options,
-            )
-        else:
+        if ocr_model := self.get_ocr_model() is None:
             raise RuntimeError(
                 f"The specified OCR kind is not supported: {pipeline_options.ocr_options.kind}."
             )
 
         self.model_pipe = [
+            # Pre-processing
             PagePreprocessingModel(
-                config={"images_scale": pipeline_options.images_scale}
+                options=PagePreprocessingOptions(
+                    images_scale=pipeline_options.images_scale
+                )
             ),
+            # OCR
             ocr_model,
+            # Layout model
             LayoutModel(
-                config={
-                    "artifacts_path": artifacts_path
-                    / StandardPdfModelPipeline._layout_model_path
-                }
+                artifacts_path=artifacts_path
+                / StandardPdfModelPipeline._layout_model_path
             ),
+            # Table structure model
             TableStructureModel(
-                config={
-                    "artifacts_path": artifacts_path
-                    / StandardPdfModelPipeline._table_model_path,
-                    "enabled": pipeline_options.do_table_structure,
-                    "do_cell_matching": pipeline_options.table_structure_options.do_cell_matching,
-                    "mode": pipeline_options.table_structure_options.mode,
-                }
+                enabled=pipeline_options.do_table_structure,
+                artifacts_path=artifacts_path
+                / StandardPdfModelPipeline._table_model_path,
+                options=pipeline_options.table_structure_options,
+            ),
+            # Page assemble
+            PageAssembleModel(
+                options=PageAssembleOptions(
+                    keep_images=pipeline_options.images_scale is not None
+                )
             ),
-            PageAssembleModel(config={"images_scale": pipeline_options.images_scale}),
         ]
 
         self.enrichment_pipe = [
@@ -102,6 +96,24 @@ class StandardPdfModelPipeline(PaginatedModelPipeline):
 
         return Path(download_path)
 
+    def get_ocr_model(self) -> Optional[BaseOcrModel]:
+        if isinstance(self.pipeline_options.ocr_options, EasyOcrOptions):
+            return EasyOcrModel(
+                enabled=self.pipeline_options.do_ocr,
+                options=self.pipeline_options.ocr_options,
+            )
+        elif isinstance(self.pipeline_options.ocr_options, TesseractCliOcrOptions):
+            return TesseractOcrCliModel(
+                enabled=self.pipeline_options.do_ocr,
+                options=self.pipeline_options.ocr_options,
+            )
+        elif isinstance(self.pipeline_options.ocr_options, TesseractOcrOptions):
+            return TesseractOcrModel(
+                enabled=self.pipeline_options.do_ocr,
+                options=self.pipeline_options.ocr_options,
+            )
+        return None
+
     def initialize_page(self, doc: InputDocument, page: Page) -> Page:
         page._backend = doc._backend.load_page(page.page_no)
         page.size = page._backend.get_size()
diff --git a/tests/verify_utils.py b/tests/verify_utils.py
index fb888bf8..186f2d9b 100644
--- a/tests/verify_utils.py
+++ b/tests/verify_utils.py
@@ -1,4 +1,5 @@
 import json
+import warnings
 from pathlib import Path
 from typing import List
 
@@ -235,8 +236,10 @@ def verify_conversion_result_v1(
 
     doc_pred_pages: List[Page] = doc_result.pages
     doc_pred: DsDocument = doc_result.legacy_output
-    doc_pred_md = doc_result.render_as_markdown()
-    doc_pred_dt = doc_result.render_as_doctags()
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore", DeprecationWarning)
+        doc_pred_md = doc_result.render_as_markdown()
+        doc_pred_dt = doc_result.render_as_doctags()
 
     engine_suffix = "" if ocr_engine is None else f".{ocr_engine}"
     gt_subpath = input_path.parent / "groundtruth" / "docling_v1" / input_path.name

From 94b5e1532dcca7a8b716400b84d536fd8b1a9ef2 Mon Sep 17 00:00:00 2001
From: Michele Dolfi <dol@zurich.ibm.com>
Date: Fri, 11 Oct 2024 13:03:38 +0200
Subject: [PATCH 2/4] add GlmOptions

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
---
 docling/models/ds_glm_model.py                | 21 ++++++++++---------
 .../pipeline/standard_pdf_model_pipeline.py   |  6 ++++--
 2 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/docling/models/ds_glm_model.py b/docling/models/ds_glm_model.py
index 529b12ce..5fa35af1 100644
--- a/docling/models/ds_glm_model.py
+++ b/docling/models/ds_glm_model.py
@@ -14,23 +14,24 @@ from docling_core.types import Ref
 from docling_core.types.experimental import BoundingBox, CoordOrigin
 from docling_core.types.experimental.document import DoclingDocument
 from PIL import ImageDraw
+from pydantic import BaseModel
 
 from docling.datamodel.base_models import Cluster
 from docling.datamodel.document import ConversionResult
 
 
-class GlmModel:
-    def __init__(self, config):
-        self.config = config
-        self.create_legacy_output = config.get("create_legacy_output", True)
+class GlmOptions(BaseModel):
+    create_legacy_output: bool = True
+    model_names: str = ""  # e.g. "language;term;reference"
+
+
+class GlmModel:
+    def __init__(self, options: GlmOptions):
+        self.options = options
+        self.create_legacy_output = self.options.create_legacy_output
 
-        self.model_names = self.config.get(
-            "model_names", ""
-        )  # "language;term;reference"
         load_pretrained_nlp_models()
-        # model = init_nlp_model(model_names="language;term;reference")
-        model = init_nlp_model(model_names=self.model_names)
-        self.model = model
+        self.model = init_nlp_model(model_names=self.options.model_names)
 
     def __call__(
         self, conv_res: ConversionResult
diff --git a/docling/pipeline/standard_pdf_model_pipeline.py b/docling/pipeline/standard_pdf_model_pipeline.py
index 3ec4c17e..cba8609b 100644
--- a/docling/pipeline/standard_pdf_model_pipeline.py
+++ b/docling/pipeline/standard_pdf_model_pipeline.py
@@ -13,7 +13,7 @@ from docling.datamodel.pipeline_options import (
     TesseractOcrOptions,
 )
 from docling.models.base_ocr_model import BaseOcrModel
-from docling.models.ds_glm_model import GlmModel
+from docling.models.ds_glm_model import GlmModel, GlmOptions
 from docling.models.easyocr_model import EasyOcrModel
 from docling.models.layout_model import LayoutModel
 from docling.models.page_assemble_model import PageAssembleModel, PageAssembleOptions
@@ -42,7 +42,9 @@ class StandardPdfModelPipeline(PaginatedModelPipeline):
 
         self.artifacts_path = Path(artifacts_path)
         self.glm_model = GlmModel(
-            config={"create_legacy_output": pipeline_options.create_legacy_output}
+            options=GlmOptions(
+                create_legacy_output=pipeline_options.create_legacy_output
+            )
         )
 
         if ocr_model := self.get_ocr_model() is None:

From 753f67a434960cbc41cffa429e194e5903ef3b91 Mon Sep 17 00:00:00 2001
From: Michele Dolfi <dol@zurich.ibm.com>
Date: Fri, 11 Oct 2024 13:06:32 +0200
Subject: [PATCH 3/4] fixes

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
---
 docling/models/page_preprocessing_model.py      | 4 ++--
 docling/pipeline/standard_pdf_model_pipeline.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docling/models/page_preprocessing_model.py b/docling/models/page_preprocessing_model.py
index 438114d8..52f92129 100644
--- a/docling/models/page_preprocessing_model.py
+++ b/docling/models/page_preprocessing_model.py
@@ -1,4 +1,4 @@
-from typing import Iterable
+from typing import Iterable, Optional
 
 from PIL import ImageDraw
 from pydantic import BaseModel
@@ -8,7 +8,7 @@ from docling.models.abstract_model import AbstractPageModel
 
 
 class PagePreprocessingOptions(BaseModel):
-    images_scale: float
+    images_scale: Optional[float]
 
 
 class PagePreprocessingModel(AbstractPageModel):
diff --git a/docling/pipeline/standard_pdf_model_pipeline.py b/docling/pipeline/standard_pdf_model_pipeline.py
index cba8609b..53558579 100644
--- a/docling/pipeline/standard_pdf_model_pipeline.py
+++ b/docling/pipeline/standard_pdf_model_pipeline.py
@@ -47,7 +47,7 @@ class StandardPdfModelPipeline(PaginatedModelPipeline):
             )
         )
 
-        if ocr_model := self.get_ocr_model() is None:
+        if (ocr_model := self.get_ocr_model()) is None:
             raise RuntimeError(
                 f"The specified OCR kind is not supported: {pipeline_options.ocr_options.kind}."
             )

From 136f16e85a8290b4d39905f6f1e6e6d6b372257e Mon Sep 17 00:00:00 2001
From: Panos Vagenas <35837085+vagenas@users.noreply.github.com>
Date: Fri, 11 Oct 2024 14:52:37 +0200
Subject: [PATCH 4/4] feat!: simplify conversion API (#139)

Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com>
---
 docling/cli/main.py              |   7 +-
 docling/datamodel/document.py    |  36 ++-------
 docling/document_converter.py    | 124 +++++++++++++------------------
 docling/models/ds_glm_model.py   |   4 +-
 examples/batch_convert.py        |  11 +--
 examples/custom_convert.py       |  21 +-----
 examples/export_figures.py       |  63 +++++-----------
 examples/export_multimodal.py    |  94 ++++++++++-------------
 examples/export_tables.py        |  58 +++++----------
 examples/minimal.py              |   2 +-
 examples/run_with_formats.py     |   6 +-
 tests/test_e2e_conversion.py     |   4 +-
 tests/test_e2e_ocr_conversion.py |   3 +-
 tests/test_interfaces.py         |  32 ++------
 tests/test_options.py            |   2 +-
 15 files changed, 164 insertions(+), 303 deletions(-)

diff --git a/docling/cli/main.py b/docling/cli/main.py
index b925e796..03701ad3 100644
--- a/docling/cli/main.py
+++ b/docling/cli/main.py
@@ -13,7 +13,7 @@ from docling_core.utils.file import resolve_file_source
 from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
 from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
 from docling.datamodel.base_models import ConversionStatus, InputFormat
-from docling.datamodel.document import ConversionResult, DocumentConversionInput
+from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import (
     EasyOcrOptions,
     PdfPipelineOptions,
@@ -231,12 +231,9 @@ def convert(
         }
     )
 
-    # Define input files
-    input = DocumentConversionInput.from_paths(input_doc_paths)
-
     start_time = time.time()
 
-    conv_results = doc_converter.convert_batch(input)
+    conv_results = doc_converter.convert_all(input_doc_paths)
 
     output.mkdir(parents=True, exist_ok=True)
     export_documents(
diff --git a/docling/datamodel/document.py b/docling/datamodel/document.py
index ede4e328..615acfac 100644
--- a/docling/datamodel/document.py
+++ b/docling/datamodel/document.py
@@ -19,6 +19,7 @@ from docling_core.types.experimental import (
     DocItemLabel,
     DoclingDocument,
 )
+from docling_core.utils.file import resolve_file_source
 from pydantic import BaseModel
 from typing_extensions import deprecated
 
@@ -162,8 +163,7 @@ class DocumentFormat(str, Enum):
     V1 = "v1"
 
 
-@deprecated("Use `ConversionResult` instead.")
-class ConvertedDocument(BaseModel):
+class ConversionResult(BaseModel):
     input: InputDocument
 
     status: ConversionStatus = ConversionStatus.PENDING  # failure, success
@@ -457,20 +457,16 @@ class ConvertedDocument(BaseModel):
                 yield element, cropped_im
 
 
-class ConversionResult(ConvertedDocument):
-    pass
+class _DocumentConversionInput(BaseModel):
 
-
-class DocumentConversionInput(BaseModel):
-
-    _path_or_stream_iterator: Iterable[Union[Path, DocumentStream]] = None
+    path_or_stream_iterator: Iterable[Union[Path, str, DocumentStream]]
     limits: Optional[DocumentLimits] = DocumentLimits()
 
     def docs(
         self, format_options: Dict[InputFormat, "FormatOption"]
     ) -> Iterable[InputDocument]:
-
-        for obj in self._path_or_stream_iterator:
+        for item in self.path_or_stream_iterator:
+            obj = resolve_file_source(item) if isinstance(item, str) else item
             format = self._guess_format(obj)
             if format not in format_options.keys():
                 _log.debug(
@@ -496,6 +492,8 @@ class DocumentConversionInput(BaseModel):
                     limits=self.limits,
                     backend=backend,
                 )
+            else:
+                raise RuntimeError(f"Unexpected obj type in iterator: {type(obj)}")
 
     def _guess_format(self, obj):
         content = None
@@ -531,21 +529,3 @@ class DocumentConversionInput(BaseModel):
             return "text/html"
 
         return None
-
-    @classmethod
-    def from_paths(cls, paths: Iterable[Path], limits: Optional[DocumentLimits] = None):
-        paths = [Path(p) for p in paths]
-
-        doc_input = cls(limits=limits)
-        doc_input._path_or_stream_iterator = paths
-
-        return doc_input
-
-    @classmethod
-    def from_streams(
-        cls, streams: Iterable[DocumentStream], limits: Optional[DocumentLimits] = None
-    ):
-        doc_input = cls(limits=limits)
-        doc_input._path_or_stream_iterator = streams
-
-        return doc_input
diff --git a/docling/document_converter.py b/docling/document_converter.py
index dc919883..f354d58b 100644
--- a/docling/document_converter.py
+++ b/docling/document_converter.py
@@ -1,34 +1,24 @@
 import logging
-import tempfile
+import sys
 import time
 from pathlib import Path
 from typing import Dict, Iterable, List, Optional, Type
 
-import requests
-from pydantic import (
-    AnyHttpUrl,
-    BaseModel,
-    ConfigDict,
-    TypeAdapter,
-    ValidationError,
-    field_validator,
-    model_validator,
-)
-from typing_extensions import deprecated
+from pydantic import BaseModel, ConfigDict, model_validator, validate_call
 
 from docling.backend.abstract_backend import AbstractDocumentBackend
 from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
 from docling.backend.html_backend import HTMLDocumentBackend
 from docling.backend.mspowerpoint_backend import MsPowerpointDocumentBackend
 from docling.backend.msword_backend import MsWordDocumentBackend
-from docling.datamodel.base_models import ConversionStatus, InputFormat
+from docling.datamodel.base_models import ConversionStatus, DocumentStream, InputFormat
 from docling.datamodel.document import (
     ConversionResult,
-    DocumentConversionInput,
     InputDocument,
+    _DocumentConversionInput,
 )
 from docling.datamodel.pipeline_options import PipelineOptions
-from docling.datamodel.settings import settings
+from docling.datamodel.settings import DocumentLimits, settings
 from docling.pipeline.base_model_pipeline import AbstractModelPipeline
 from docling.pipeline.simple_model_pipeline import SimpleModelPipeline
 from docling.pipeline.standard_pdf_model_pipeline import StandardPdfModelPipeline
@@ -119,16 +109,56 @@ class DocumentConverter:
             Type[AbstractModelPipeline], AbstractModelPipeline
         ] = {}
 
-    @deprecated("Use convert_batch instead.")
-    def convert(self, input: DocumentConversionInput) -> Iterable[ConversionResult]:
-        yield from self.convert_batch(input=input)
+    @validate_call(config=ConfigDict(strict=True))
+    def convert(
+        self,
+        source: Path | str | DocumentStream,  # TODO review naming
+        raises_on_error: bool = True,
+        max_num_pages: int = sys.maxsize,
+        max_file_size: int = sys.maxsize,
+    ) -> ConversionResult:
 
-    def convert_batch(
-        self, input: DocumentConversionInput, raise_on_error: bool = False
+        all_res = self.convert_all(
+            source=[source],
+            raises_on_error=raises_on_error,
+            max_num_pages=max_num_pages,
+            max_file_size=max_file_size,
+        )
+        return next(all_res)
+
+    @validate_call(config=ConfigDict(strict=True))
+    def convert_all(
+        self,
+        source: Iterable[Path | str | DocumentStream],  # TODO review naming
+        raises_on_error: bool = True,  # True: raises on first conversion error; False: does not raise on conv error
+        max_num_pages: int = sys.maxsize,
+        max_file_size: int = sys.maxsize,
     ) -> Iterable[ConversionResult]:
+        limits = DocumentLimits(
+            max_num_pages=max_num_pages,
+            max_file_size=max_file_size,
+        )
+        conv_input = _DocumentConversionInput(
+            path_or_stream_iterator=source,
+            limit=limits,
+        )
+        conv_res_iter = self._convert(conv_input)
+        for conv_res in conv_res_iter:
+            if raises_on_error and conv_res.status not in {
+                ConversionStatus.SUCCESS,
+                ConversionStatus.PARTIAL_SUCCESS,
+            }:
+                raise RuntimeError(
+                    f"Conversion failed for: {conv_res.input.file} with status: {conv_res.status}"
+                )
+            else:
+                yield conv_res
 
+    def _convert(
+        self, conv_input: _DocumentConversionInput
+    ) -> Iterable[ConversionResult]:
         for input_batch in chunkify(
-            input.docs(self.format_to_options),
+            conv_input.docs(self.format_to_options),
             settings.perf.doc_batch_size,  # pass format_options
         ):
             _log.info(f"Going to convert document batch...")
@@ -143,58 +173,6 @@ class DocumentConverter:
                 if item is not None:
                     yield item
 
-    def convert_single(
-        self, source: Path | AnyHttpUrl | str, raise_on_error: bool = False
-    ) -> ConversionResult:
-        """Convert a single document.
-
-        Args:
-            source (Path | AnyHttpUrl | str): The PDF input source. Can be a path or URL.
-
-        Raises:
-            ValueError: If source is of unexpected type.
-            RuntimeError: If conversion fails.
-
-        Returns:
-            ConversionResult: The conversion result object.
-        """
-        with tempfile.TemporaryDirectory() as temp_dir:
-            try:
-                http_url: AnyHttpUrl = TypeAdapter(AnyHttpUrl).validate_python(source)
-                res = requests.get(http_url, stream=True)
-                res.raise_for_status()
-                fname = None
-                # try to get filename from response header
-                if cont_disp := res.headers.get("Content-Disposition"):
-                    for par in cont_disp.strip().split(";"):
-                        # currently only handling directive "filename" (not "*filename")
-                        if (split := par.split("=")) and split[0].strip() == "filename":
-                            fname = "=".join(split[1:]).strip().strip("'\"") or None
-                            break
-                # otherwise, use name from URL:
-                if fname is None:
-                    fname = Path(http_url.path).name or self._default_download_filename
-                local_path = Path(temp_dir) / fname
-                with open(local_path, "wb") as f:
-                    for chunk in res.iter_content(chunk_size=1024):  # using 1-KB chunks
-                        f.write(chunk)
-            except ValidationError:
-                try:
-                    local_path = TypeAdapter(Path).validate_python(source)
-                except ValidationError:
-                    raise ValueError(
-                        f"Unexpected file path type encountered: {type(source)}"
-                    )
-            conv_inp = DocumentConversionInput.from_paths(paths=[local_path])
-            conv_res_iter = self.convert_batch(conv_inp)
-            conv_res: ConversionResult = next(conv_res_iter)
-        if conv_res.status not in {
-            ConversionStatus.SUCCESS,
-            ConversionStatus.PARTIAL_SUCCESS,
-        }:
-            raise RuntimeError(f"Conversion failed with status: {conv_res.status}")
-        return conv_res
-
     def _get_pipeline(self, doc: InputDocument) -> Optional[AbstractModelPipeline]:
         fopt = self.format_to_options.get(doc.format)
 
diff --git a/docling/models/ds_glm_model.py b/docling/models/ds_glm_model.py
index 5fa35af1..e44f493a 100644
--- a/docling/models/ds_glm_model.py
+++ b/docling/models/ds_glm_model.py
@@ -14,13 +14,15 @@ from docling_core.types import Ref
 from docling_core.types.experimental import BoundingBox, CoordOrigin
 from docling_core.types.experimental.document import DoclingDocument
 from PIL import ImageDraw
-from pydantic import BaseModel
+from pydantic import BaseModel, ConfigDict
 
 from docling.datamodel.base_models import Cluster
 from docling.datamodel.document import ConversionResult
 
 
 class GlmOptions(BaseModel):
+    model_config = ConfigDict(protected_namespaces=())
+
     create_legacy_output: bool = True
     model_names: str = ""  # e.g. "language;term;reference"
 
diff --git a/examples/batch_convert.py b/examples/batch_convert.py
index e54193f0..0cf2d650 100644
--- a/examples/batch_convert.py
+++ b/examples/batch_convert.py
@@ -7,7 +7,7 @@ from typing import Iterable
 import yaml
 
 from docling.datamodel.base_models import ConversionStatus
-from docling.datamodel.document import ConversionResult, DocumentConversionInput
+from docling.datamodel.document import ConversionResult
 from docling.document_converter import DocumentConverter
 
 _log = logging.getLogger(__name__)
@@ -125,18 +125,19 @@ def main():
 
     doc_converter = DocumentConverter()
 
-    input = DocumentConversionInput.from_paths(input_doc_paths)
-
     start_time = time.time()
 
-    conv_results = doc_converter.convert_batch(input)
+    conv_results = doc_converter.convert_all(
+        input_doc_paths,
+        raises_on_error=False,  # to let conversion run through all and examine results at the end
+    )
     success_count, partial_success_count, failure_count = export_documents(
         conv_results, output_dir=Path("./scratch")
     )
 
     end_time = time.time() - start_time
 
-    _log.info(f"All documents were converted in {end_time:.2f} seconds.")
+    _log.info(f"Document conversion complete in {end_time:.2f} seconds.")
 
     if failure_count > 0:
         raise RuntimeError(
diff --git a/examples/custom_convert.py b/examples/custom_convert.py
index 0805837b..70d86520 100644
--- a/examples/custom_convert.py
+++ b/examples/custom_convert.py
@@ -5,7 +5,7 @@ from pathlib import Path
 from typing import Iterable
 
 from docling.datamodel.base_models import ConversionStatus, InputFormat
-from docling.datamodel.document import ConversionResult, DocumentConversionInput
+from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import (
     PdfPipelineOptions,
     TesseractCliOcrOptions,
@@ -65,9 +65,7 @@ def export_documents(
 def main():
     logging.basicConfig(level=logging.INFO)
 
-    input_doc_paths = [
-        Path("./tests/data/2206.01062.pdf"),
-    ]
+    input_doc_path = Path("./tests/data/2206.01062.pdf")
 
     ###########################################################################
 
@@ -152,24 +150,13 @@ def main():
 
     ###########################################################################
 
-    # Define input files
-    input = DocumentConversionInput.from_paths(input_doc_paths)
-
     start_time = time.time()
 
-    conv_results = doc_converter.convert_batch(input)
-    success_count, failure_count = export_documents(
-        conv_results, output_dir=Path("./scratch")
-    )
+    conv_result = doc_converter.convert(input_doc_path)
 
     end_time = time.time() - start_time
 
-    _log.info(f"All documents were converted in {end_time:.2f} seconds.")
-
-    if failure_count > 0:
-        raise RuntimeError(
-            f"The example failed converting {failure_count} on {len(input_doc_paths)}."
-        )
+    _log.info(f"Document converted in {end_time:.2f} seconds.")
 
 
 if __name__ == "__main__":
diff --git a/examples/export_figures.py b/examples/export_figures.py
index 23f1bd20..4fa4dc58 100644
--- a/examples/export_figures.py
+++ b/examples/export_figures.py
@@ -2,13 +2,7 @@ import logging
 import time
 from pathlib import Path
 
-from docling.datamodel.base_models import (
-    ConversionStatus,
-    FigureElement,
-    InputFormat,
-    Table,
-)
-from docling.datamodel.document import DocumentConversionInput
+from docling.datamodel.base_models import FigureElement, InputFormat, Table
 from docling.datamodel.pipeline_options import PdfPipelineOptions
 from docling.document_converter import DocumentConverter, PdfFormatOption
 
@@ -20,13 +14,9 @@ IMAGE_RESOLUTION_SCALE = 2.0
 def main():
     logging.basicConfig(level=logging.INFO)
 
-    input_doc_paths = [
-        Path("./tests/data/2206.01062.pdf"),
-    ]
+    input_doc_path = Path("./tests/data/2206.01062.pdf")
     output_dir = Path("./scratch")
 
-    input_files = DocumentConversionInput.from_paths(input_doc_paths)
-
     # Important: For operating with page images, we must keep them, otherwise the DocumentConverter
     # will destroy them for cleaning up memory.
     # This is done by setting AssembleOptions.images_scale, which also defines the scale of images.
@@ -42,46 +32,29 @@ def main():
 
     start_time = time.time()
 
-    conv_results = doc_converter.convert_batch(input_files)
+    conv_res = doc_converter.convert(input_doc_path)
 
-    success_count = 0
-    failure_count = 0
     output_dir.mkdir(parents=True, exist_ok=True)
-    for conv_res in conv_results:
-        if conv_res.status != ConversionStatus.SUCCESS:
-            _log.info(f"Document {conv_res.input.file} failed to convert.")
-            failure_count += 1
-            continue
+    doc_filename = conv_res.input.file.stem
 
-        doc_filename = conv_res.input.file.stem
+    # Export page images
+    for page in conv_res.pages:
+        page_no = page.page_no + 1
+        page_image_filename = output_dir / f"{doc_filename}-{page_no}.png"
+        with page_image_filename.open("wb") as fp:
+            page.image.save(fp, format="PNG")
 
-        # Export page images
-        for page in conv_res.pages:
-            page_no = page.page_no + 1
-            page_image_filename = output_dir / f"{doc_filename}-{page_no}.png"
-            with page_image_filename.open("wb") as fp:
-                page.image.save(fp, format="PNG")
-
-        # Export figures and tables
-        for element, image in conv_res.render_element_images(
-            element_types=(FigureElement, Table)
-        ):
-            element_image_filename = (
-                output_dir / f"{doc_filename}-element-{element.id}.png"
-            )
-            with element_image_filename.open("wb") as fp:
-                image.save(fp, "PNG")
-
-        success_count += 1
+    # Export figures and tables
+    for element, image in conv_res.render_element_images(
+        element_types=(FigureElement, Table)
+    ):
+        element_image_filename = output_dir / f"{doc_filename}-element-{element.id}.png"
+        with element_image_filename.open("wb") as fp:
+            image.save(fp, "PNG")
 
     end_time = time.time() - start_time
 
-    _log.info(f"All documents were converted in {end_time:.2f} seconds.")
-
-    if failure_count > 0:
-        raise RuntimeError(
-            f"The example failed converting {failure_count} on {len(input_doc_paths)}."
-        )
+    _log.info(f"Document converted and figures exported in {end_time:.2f} seconds.")
 
 
 if __name__ == "__main__":
diff --git a/examples/export_multimodal.py b/examples/export_multimodal.py
index 11dd3f41..af569131 100644
--- a/examples/export_multimodal.py
+++ b/examples/export_multimodal.py
@@ -5,8 +5,7 @@ from pathlib import Path
 
 import pandas as pd
 
-from docling.datamodel.base_models import ConversionStatus, InputFormat
-from docling.datamodel.document import DocumentConversionInput
+from docling.datamodel.base_models import InputFormat
 from docling.datamodel.pipeline_options import PdfPipelineOptions
 from docling.document_converter import DocumentConverter, PdfFormatOption
 from docling.utils.export import generate_multimodal_pages
@@ -19,13 +18,9 @@ IMAGE_RESOLUTION_SCALE = 2.0
 def main():
     logging.basicConfig(level=logging.INFO)
 
-    input_doc_paths = [
-        Path("./tests/data/2206.01062.pdf"),
-    ]
+    input_doc_path = Path("./tests/data/2206.01062.pdf")
     output_dir = Path("./scratch")
 
-    input_files = DocumentConversionInput.from_paths(input_doc_paths)
-
     # Important: For operating with page images, we must keep them, otherwise the DocumentConverter
     # will destroy them for cleaning up memory.
     # This is done by setting AssembleOptions.images_scale, which also defines the scale of images.
@@ -41,53 +36,45 @@ def main():
 
     start_time = time.time()
 
-    converted_docs = doc_converter.convert_batch(input_files)
+    conv_res = doc_converter.convert(input_doc_path)
 
-    success_count = 0
-    failure_count = 0
     output_dir.mkdir(parents=True, exist_ok=True)
-    for doc in converted_docs:
-        if doc.status != ConversionStatus.SUCCESS:
-            _log.info(f"Document {doc.input.file} failed to convert.")
-            failure_count += 1
-            continue
 
-        rows = []
-        for (
-            content_text,
-            content_md,
-            content_dt,
-            page_cells,
-            page_segments,
-            page,
-        ) in generate_multimodal_pages(doc):
+    rows = []
+    for (
+        content_text,
+        content_md,
+        content_dt,
+        page_cells,
+        page_segments,
+        page,
+    ) in generate_multimodal_pages(conv_res):
 
-            dpi = page._default_image_scale * 72
+        dpi = page._default_image_scale * 72
 
-            rows.append(
-                {
-                    "document": doc.input.file.name,
-                    "hash": doc.input.document_hash,
-                    "page_hash": page.page_hash,
-                    "image": {
-                        "width": page.image.width,
-                        "height": page.image.height,
-                        "bytes": page.image.tobytes(),
-                    },
-                    "cells": page_cells,
-                    "contents": content_text,
-                    "contents_md": content_md,
-                    "contents_dt": content_dt,
-                    "segments": page_segments,
-                    "extra": {
-                        "page_num": page.page_no + 1,
-                        "width_in_points": page.size.width,
-                        "height_in_points": page.size.height,
-                        "dpi": dpi,
-                    },
-                }
-            )
-        success_count += 1
+        rows.append(
+            {
+                "document": conv_res.input.file.name,
+                "hash": conv_res.input.document_hash,
+                "page_hash": page.page_hash,
+                "image": {
+                    "width": page.image.width,
+                    "height": page.image.height,
+                    "bytes": page.image.tobytes(),
+                },
+                "cells": page_cells,
+                "contents": content_text,
+                "contents_md": content_md,
+                "contents_dt": content_dt,
+                "segments": page_segments,
+                "extra": {
+                    "page_num": page.page_no + 1,
+                    "width_in_points": page.size.width,
+                    "height_in_points": page.size.height,
+                    "dpi": dpi,
+                },
+            }
+        )
 
     # Generate one parquet from all documents
     df = pd.json_normalize(rows)
@@ -97,12 +84,9 @@ def main():
 
     end_time = time.time() - start_time
 
-    _log.info(f"All documents were converted in {end_time:.2f} seconds.")
-
-    if failure_count > 0:
-        raise RuntimeError(
-            f"The example failed converting {failure_count} on {len(input_doc_paths)}."
-        )
+    _log.info(
+        f"Document converted and multimodal pages generated in {end_time:.2f} seconds."
+    )
 
     # This block demonstrates how the file can be opened with the HF datasets library
     # from datasets import Dataset
diff --git a/examples/export_tables.py b/examples/export_tables.py
index 720e8c67..79a3333d 100644
--- a/examples/export_tables.py
+++ b/examples/export_tables.py
@@ -4,8 +4,6 @@ from pathlib import Path
 
 import pandas as pd
 
-from docling.datamodel.base_models import ConversionStatus
-from docling.datamodel.document import DocumentConversionInput
 from docling.document_converter import DocumentConverter
 
 _log = logging.getLogger(__name__)
@@ -14,59 +12,39 @@ _log = logging.getLogger(__name__)
 def main():
     logging.basicConfig(level=logging.INFO)
 
-    input_doc_paths = [
-        Path("./tests/data/2206.01062.pdf"),
-    ]
+    input_doc_path = Path("./tests/data/2206.01062.pdf")
     output_dir = Path("./scratch")
 
-    input_files = DocumentConversionInput.from_paths(input_doc_paths)
-
     doc_converter = DocumentConverter()
 
     start_time = time.time()
 
-    conv_results = doc_converter.convert_batch(input_files)
+    conv_res = doc_converter.convert(input_doc_path)
 
-    success_count = 0
-    failure_count = 0
     output_dir.mkdir(parents=True, exist_ok=True)
-    for conv_res in conv_results:
-        if conv_res.status != ConversionStatus.SUCCESS:
-            _log.info(f"Document {conv_res.input.file} failed to convert.")
-            failure_count += 1
-            continue
 
-        doc_filename = conv_res.input.file.stem
+    doc_filename = conv_res.input.file.stem
 
-        # Export tables
-        for table_ix, table in enumerate(conv_res.legacy_output.tables):
-            table_df: pd.DataFrame = table.export_to_dataframe()
-            print(f"## Table {table_ix}")
-            print(table_df.to_markdown())
+    # Export tables
+    for table_ix, table in enumerate(conv_res.legacy_output.tables):
+        table_df: pd.DataFrame = table.export_to_dataframe()
+        print(f"## Table {table_ix}")
+        print(table_df.to_markdown())
 
-            # Save the table as csv
-            element_csv_filename = output_dir / f"{doc_filename}-table-{table_ix+1}.csv"
-            _log.info(f"Saving CSV table to {element_csv_filename}")
-            table_df.to_csv(element_csv_filename)
+        # Save the table as csv
+        element_csv_filename = output_dir / f"{doc_filename}-table-{table_ix+1}.csv"
+        _log.info(f"Saving CSV table to {element_csv_filename}")
+        table_df.to_csv(element_csv_filename)
 
-            # Save the table as html
-            element_html_filename = (
-                output_dir / f"{doc_filename}-table-{table_ix+1}.html"
-            )
-            _log.info(f"Saving HTML table to {element_html_filename}")
-            with element_html_filename.open("w") as fp:
-                fp.write(table.export_to_html())
-
-        success_count += 1
+        # Save the table as html
+        element_html_filename = output_dir / f"{doc_filename}-table-{table_ix+1}.html"
+        _log.info(f"Saving HTML table to {element_html_filename}")
+        with element_html_filename.open("w") as fp:
+            fp.write(table.export_to_html())
 
     end_time = time.time() - start_time
 
-    _log.info(f"All documents were converted in {end_time:.2f} seconds.")
-
-    if failure_count > 0:
-        raise RuntimeError(
-            f"The example failed converting {failure_count} on {len(input_doc_paths)}."
-        )
+    _log.info(f"Document converted and tables exported in {end_time:.2f} seconds.")
 
 
 if __name__ == "__main__":
diff --git a/examples/minimal.py b/examples/minimal.py
index fb84cca4..55cdfc46 100644
--- a/examples/minimal.py
+++ b/examples/minimal.py
@@ -2,7 +2,7 @@ from docling.document_converter import DocumentConverter
 
 source = "https://arxiv.org/pdf/2408.09869"  # PDF path or URL
 converter = DocumentConverter()
-result = converter.convert_single(source)
+result = converter.convert(source)
 print(result.output.export_to_markdown())  # output: ## Docling Technical Report [...]"
 # if the legacy output is needed, use this version
 # print(result.render_as_markdown_v1())  # output: ## Docling Technical Report [...]"
diff --git a/examples/run_with_formats.py b/examples/run_with_formats.py
index f086bae2..37bb1b1a 100644
--- a/examples/run_with_formats.py
+++ b/examples/run_with_formats.py
@@ -6,7 +6,6 @@ from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
 from docling.backend.msword_backend import MsWordDocumentBackend
 from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
 from docling.datamodel.base_models import InputFormat
-from docling.datamodel.document import DocumentConversionInput
 from docling.document_converter import (
     DocumentConverter,
     FormatOption,
@@ -28,7 +27,6 @@ input_paths = [
     Path("tests/data/2206.01062.pdf"),
     # Path("tests/data/2305.03393v1-pg9-img.png"),
 ]
-input = DocumentConversionInput.from_paths(input_paths)
 
 ## for defaults use:
 # doc_converter = DocumentConverter()
@@ -52,12 +50,12 @@ doc_converter = DocumentConverter(  # all of the below is optional, has internal
     },
 )
 
-conv_results = doc_converter.convert_batch(input)
+conv_results = doc_converter.convert_all(input_paths)
 
 for res in conv_results:
     out_path = Path("./scratch")
     print(
-        f"Document {res.input.file.name} converted with status {res.status}."
+        f"Document {res.input.file.name} converted."
         f"\nSaved markdown output to: {str(out_path)}"
     )
     # print(res.experimental.export_to_markdown())
diff --git a/tests/test_e2e_conversion.py b/tests/test_e2e_conversion.py
index d7432a10..c18a7a5b 100644
--- a/tests/test_e2e_conversion.py
+++ b/tests/test_e2e_conversion.py
@@ -3,7 +3,7 @@ from pathlib import Path
 from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
 from docling.datamodel.base_models import InputFormat
 from docling.datamodel.document import ConversionResult
-from docling.datamodel.pipeline_options import PdfPipelineOptions, PipelineOptions
+from docling.datamodel.pipeline_options import PdfPipelineOptions
 from docling.document_converter import DocumentConverter, PdfFormatOption
 
 from .verify_utils import verify_conversion_result_v1, verify_conversion_result_v2
@@ -48,7 +48,7 @@ def test_e2e_conversions():
     for pdf_path in pdf_paths:
         print(f"converting {pdf_path}")
 
-        doc_result: ConversionResult = converter.convert_single(pdf_path)
+        doc_result: ConversionResult = converter.convert(pdf_path)
 
         verify_conversion_result_v1(
             input_path=pdf_path, doc_result=doc_result, generate=GENERATE_V1
diff --git a/tests/test_e2e_ocr_conversion.py b/tests/test_e2e_ocr_conversion.py
index ee7f3931..86c22554 100644
--- a/tests/test_e2e_ocr_conversion.py
+++ b/tests/test_e2e_ocr_conversion.py
@@ -8,7 +8,6 @@ from docling.datamodel.pipeline_options import (
     EasyOcrOptions,
     OcrOptions,
     PdfPipelineOptions,
-    PipelineOptions,
     TesseractCliOcrOptions,
     TesseractOcrOptions,
 )
@@ -90,7 +89,7 @@ def test_e2e_conversions():
         for pdf_path in pdf_paths:
             print(f"converting {pdf_path}")
 
-            doc_result: ConversionResult = converter.convert_single(pdf_path)
+            doc_result: ConversionResult = converter.convert(pdf_path)
 
             # Save conversions
             # save_output(pdf_path, doc_result, None)
diff --git a/tests/test_interfaces.py b/tests/test_interfaces.py
index 80f5ea4e..9ef3d131 100644
--- a/tests/test_interfaces.py
+++ b/tests/test_interfaces.py
@@ -5,8 +5,7 @@ import pytest
 
 from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
 from docling.datamodel.base_models import DocumentStream, InputFormat
-from docling.datamodel.document import ConversionResult, DocumentConversionInput
-from docling.datamodel.pipeline_options import PdfPipelineOptions, PipelineOptions
+from docling.datamodel.pipeline_options import PdfPipelineOptions
 from docling.document_converter import DocumentConverter, PdfFormatOption
 
 from .verify_utils import verify_conversion_result_v1, verify_conversion_result_v2
@@ -37,39 +36,24 @@ def converter():
     return converter
 
 
-def test_convert_single(converter: DocumentConverter):
+def test_convert_path(converter: DocumentConverter):
 
     pdf_path = get_pdf_path()
     print(f"converting {pdf_path}")
 
-    doc_result: ConversionResult = converter.convert_single(pdf_path)
+    doc_result = converter.convert(pdf_path)
     verify_conversion_result_v1(input_path=pdf_path, doc_result=doc_result)
     verify_conversion_result_v2(input_path=pdf_path, doc_result=doc_result)
 
 
-def test_batch_path(converter: DocumentConverter):
-
-    pdf_path = get_pdf_path()
-    print(f"converting {pdf_path}")
-
-    conv_input = DocumentConversionInput.from_paths([pdf_path])
-
-    results = converter.convert_batch(conv_input)
-    for doc_result in results:
-        verify_conversion_result_v1(input_path=pdf_path, doc_result=doc_result)
-        verify_conversion_result_v2(input_path=pdf_path, doc_result=doc_result)
-
-
-def test_batch_bytes(converter: DocumentConverter):
+def test_convert_stream(converter: DocumentConverter):
 
     pdf_path = get_pdf_path()
     print(f"converting {pdf_path}")
 
     buf = BytesIO(pdf_path.open("rb").read())
-    docs = [DocumentStream(name=pdf_path.name, stream=buf)]
-    conv_input = DocumentConversionInput.from_streams(docs)
+    stream = DocumentStream(name=pdf_path.name, stream=buf)
 
-    results = converter.convert_batch(conv_input)
-    for doc_result in results:
-        verify_conversion_result_v1(input_path=pdf_path, doc_result=doc_result)
-        verify_conversion_result_v2(input_path=pdf_path, doc_result=doc_result)
+    doc_result = converter.convert(stream)
+    verify_conversion_result_v1(input_path=pdf_path, doc_result=doc_result)
+    verify_conversion_result_v2(input_path=pdf_path, doc_result=doc_result)
diff --git a/tests/test_options.py b/tests/test_options.py
index 8b35811b..ad6c7a45 100644
--- a/tests/test_options.py
+++ b/tests/test_options.py
@@ -39,6 +39,6 @@ def test_e2e_conversions(test_doc_path):
     for converter in get_converters_with_table_options():
         print(f"converting {test_doc_path}")
 
-        doc_result: ConversionResult = converter.convert_single(test_doc_path)
+        doc_result: ConversionResult = converter.convert(test_doc_path)
 
         assert doc_result.status == ConversionStatus.SUCCESS