From 7c4ab5c716004af72eb25077f3884d57ec73915a Mon Sep 17 00:00:00 2001 From: Maksym Lysak Date: Tue, 21 Jan 2025 18:00:05 +0100 Subject: [PATCH] Moved artifacts_path for SmolDocling into vlm_options instead of global pipeline option Signed-off-by: Maksym Lysak --- docling/models/smol_docling_model.py | 3 ++- docling/pipeline/vlm_pipeline.py | 3 ++- docs/examples/minimal_smol_docling.py | 1 - 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/docling/models/smol_docling_model.py b/docling/models/smol_docling_model.py index 86db3d7e..79d2affd 100644 --- a/docling/models/smol_docling_model.py +++ b/docling/models/smol_docling_model.py @@ -30,7 +30,7 @@ class SmolDoclingModel(BasePageModel): def __init__( self, - artifacts_path: Path, + # artifacts_path: Path, accelerator_options: AcceleratorOptions, vlm_options: SmolDoclingOptions, ): @@ -39,6 +39,7 @@ class SmolDoclingModel(BasePageModel): _log.info("Available device for SmolDocling: {}".format(device)) # PARAMETERS: + artifacts_path = Path(vlm_options.artifacts_path) self.param_question = vlm_options.question # "Perform Layout Analysis." self.param_quantization_config = BitsAndBytesConfig( load_in_8bit=vlm_options.load_in_8bit, # True, diff --git a/docling/pipeline/vlm_pipeline.py b/docling/pipeline/vlm_pipeline.py index c484ca24..357499f6 100644 --- a/docling/pipeline/vlm_pipeline.py +++ b/docling/pipeline/vlm_pipeline.py @@ -45,10 +45,12 @@ class VlmPipeline(PaginatedPipeline): # force_backend_text = True - get text from backend using bounding boxes predicted by SmolDoclingss self.force_backend_text = pipeline_options.force_backend_text + """ if pipeline_options.artifacts_path is None: self.artifacts_path = self.download_models_hf() else: self.artifacts_path = Path(pipeline_options.artifacts_path) + """ keep_images = ( self.pipeline_options.generate_page_images @@ -58,7 +60,6 @@ class VlmPipeline(PaginatedPipeline): self.build_pipe = [ SmolDoclingModel( - artifacts_path=self.artifacts_path, accelerator_options=pipeline_options.accelerator_options, vlm_options=self.pipeline_options.vlm_options, ), diff --git a/docs/examples/minimal_smol_docling.py b/docs/examples/minimal_smol_docling.py index a3d36a30..6e6209a5 100644 --- a/docs/examples/minimal_smol_docling.py +++ b/docs/examples/minimal_smol_docling.py @@ -22,7 +22,6 @@ pipeline_options = PdfPipelineOptions() pipeline_options.generate_page_images = True # If force_backend_text = True, text from backend will be used instead of generated text pipeline_options.force_backend_text = False -pipeline_options.artifacts_path = "model_artifacts/SmolDocling_2.7_DT_0.7" vlm_options = SmolDoclingOptions( artifacts_path="model_artifacts/SmolDocling_2.7_DT_0.7",