diff --git a/docling/cli/main.py b/docling/cli/main.py index a2e6962e..cb21365e 100644 --- a/docling/cli/main.py +++ b/docling/cli/main.py @@ -343,9 +343,9 @@ def convert( raise RuntimeError(f"Unexpected PDF backend type {pdf_backend}") pdf_format_option = PdfFormatOption( - pipeline_options=pipeline_options, - backend=backend, # pdf_backend - ) + pipeline_options=pipeline_options, + backend=backend, # pdf_backend + ) format_options: Dict[InputFormat, FormatOption] = { InputFormat.PDF: pdf_format_option, InputFormat.IMAGE: pdf_format_option, diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py index 13560132..9be3ee82 100644 --- a/docling/datamodel/pipeline_options.py +++ b/docling/datamodel/pipeline_options.py @@ -143,7 +143,11 @@ class PdfPipelineOptions(PipelineOptions): table_structure_options: TableStructureOptions = TableStructureOptions() ocr_options: Union[ - EasyOcrOptions, TesseractCliOcrOptions, TesseractOcrOptions, OcrMacOptions, RapidOcrOptions + EasyOcrOptions, + TesseractCliOcrOptions, + TesseractOcrOptions, + OcrMacOptions, + RapidOcrOptions, ] = Field(EasyOcrOptions(), discriminator="kind") images_scale: float = 1.0 diff --git a/docling/pipeline/standard_pdf_pipeline.py b/docling/pipeline/standard_pdf_pipeline.py index f733ced9..42ce238f 100644 --- a/docling/pipeline/standard_pdf_pipeline.py +++ b/docling/pipeline/standard_pdf_pipeline.py @@ -96,11 +96,13 @@ class StandardPdfPipeline(PaginatedPipeline): def download_models_hf( local_dir: Optional[Path] = None, force: bool = False ) -> Path: + from functools import partialmethod + from huggingface_hub import snapshot_download # Disable tqdm prints used by HF from tqdm import tqdm - from functools import partialmethod + tqdm.__init__ = partialmethod(tqdm.__init__, disable=True) download_path = snapshot_download(