diff --git a/docling/cli/main.py b/docling/cli/main.py index 19f77e4e..2ca28866 100644 --- a/docling/cli/main.py +++ b/docling/cli/main.py @@ -41,6 +41,8 @@ from docling.datamodel.pipeline_options import ( ) from docling.datamodel.settings import settings from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption +from docling.models.code_formula_model import CodeFormulaModel +from docling.models.document_picture_classifier import DocumentPictureClassifier warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic|torch") warnings.filterwarnings(action="ignore", category=FutureWarning, module="easyocr") @@ -288,6 +290,12 @@ def convert( device: Annotated[ AcceleratorDevice, typer.Option(..., help="Accelerator device") ] = AcceleratorDevice.AUTO, + batch_size_code_formula: Annotated[ + int, typer.Option(..., help="Batch size for the code and formula model") + ] = CodeFormulaModel.elements_batch_size, + batch_size_picture_classes: Annotated[ + int, typer.Option(..., help="Batch size for the picture classifier") + ] = DocumentPictureClassifier.elements_batch_size, ): if verbose == 0: logging.basicConfig(level=logging.WARNING) diff --git a/docling/models/base_model.py b/docling/models/base_model.py index 08d728cc..a2bc776e 100644 --- a/docling/models/base_model.py +++ b/docling/models/base_model.py @@ -6,6 +6,7 @@ from typing_extensions import TypeVar from docling.datamodel.base_models import ItemAndImageEnrichmentElement, Page from docling.datamodel.document import ConversionResult +from docling.datamodel.settings import settings class BasePageModel(ABC): @@ -21,6 +22,8 @@ EnrichElementT = TypeVar("EnrichElementT", default=NodeItem) class GenericEnrichmentModel(ABC, Generic[EnrichElementT]): + elements_batch_size: int = settings.perf.elements_batch_size + @abstractmethod def is_processable(self, doc: DoclingDocument, element: NodeItem) -> bool: pass diff --git a/docling/models/code_formula_model.py b/docling/models/code_formula_model.py index e4d56945..d3752d7f 100644 --- a/docling/models/code_formula_model.py +++ b/docling/models/code_formula_model.py @@ -61,6 +61,7 @@ class CodeFormulaModel(BaseItemAndImageEnrichmentModel): Processes the given batch of elements and enriches them with predictions. """ + elements_batch_size = 5 images_scale = 1.66 # = 120 dpi, aligned with training data resolution expansion_factor = 0.03 @@ -234,6 +235,8 @@ class CodeFormulaModel(BaseItemAndImageEnrichmentModel): labels.append(el.item.label) images.append(el.image) + print(f"BATCH: {len(images)=}") + outputs = self.code_formula_model.predict(images, labels) for item, output in zip(elements, outputs): diff --git a/docling/pipeline/base_pipeline.py b/docling/pipeline/base_pipeline.py index 89aedf8e..1bf48ef0 100644 --- a/docling/pipeline/base_pipeline.py +++ b/docling/pipeline/base_pipeline.py @@ -79,7 +79,7 @@ class BasePipeline(ABC): for model in self.enrichment_pipe: for element_batch in chunkify( _prepare_elements(conv_res, model), - settings.perf.elements_batch_size, + model.elements_batch_size, ): for element in model( doc=conv_res.document, element_batch=element_batch