Pydantic field validator and comment restored.

Signed-off-by: ahn <ahn@zurich.ibm.com>
2025-08-01 15:02:21 +00:00 · 2025-01-31 16:21:36 +01:00 · 2025-01-31 16:21:36 +01:00 · dd0728e646
commit dd0728e646
parent b9668877be
3 changed files with 14 additions and 41 deletions
--- a/docling/datamodel/pipeline_options.py
+++ b/docling/datamodel/pipeline_options.py
@ -41,7 +41,7 @@ class AcceleratorOptions(BaseSettings):
    num_threads: int = 4
    device: str = "auto"
-    @validator("device")
+    @field_validator("device")
    def validate_device(cls, value):
        # "auto", "cpu", "cuda", "mps", or "cuda:N"
        if value in {d.value for d in AcceleratorDevice} or re.match(
@ -55,6 +55,15 @@ class AcceleratorOptions(BaseSettings):
    @model_validator(mode="before")
    @classmethod
    def check_alternative_envvars(cls, data: Any) -> Any:
        r"""
        Set num_threads from the "alternative" envvar OMP_NUM_THREADS.
        The alternative envvar is used only if it is valid and the regular envvar is not set.
        Notice: The standard pydantic settings mechanism with parameter "aliases" does not provide
        the same functionality. In case the alias envvar is set and the user tries to override the
        parameter in settings initialization, Pydantic treats the parameter provided in __init__()
        as an extra input instead of simply overwriting the evvar value for that parameter.
        """
        if isinstance(data, dict):
            input_num_threads = data.get("num_threads")
            if input_num_threads is None:
--- a/docling/models/easyocr_model.py
+++ b/docling/models/easyocr_model.py
@ -18,7 +18,6 @@ from docling.datamodel.settings import settings
 from docling.models.base_ocr_model import BaseOcrModel
 from docling.utils.accelerator_utils import decide_device
 from docling.utils.profiling import TimeRecorder
 from docling.utils.utils import download_url_with_progress
 _log = logging.getLogger(__name__)
@ -56,6 +55,7 @@ class EasyOcrModel(BaseOcrModel):
                        for x in [
                            AcceleratorDevice.CUDA.value,
                            AcceleratorDevice.MPS.value,
                            "cuda:",
                        ]
                    ]
                )
@ -82,40 +82,6 @@ class EasyOcrModel(BaseOcrModel):
                verbose=False,
            )
    @staticmethod
    def download_models(
        detection_models: List[str] = ["craft"],
        recognition_models: List[str] = ["english_g2", "latin_g2"],
        local_dir: Optional[Path] = None,
        force: bool = False,
        progress: bool = False,
    ) -> Path:
        # Models are located in https://github.com/JaidedAI/EasyOCR/blob/master/easyocr/config.py
        from easyocr.config import detection_models as det_models_dict
        from easyocr.config import recognition_models as rec_models_dict
        if local_dir is None:
            local_dir = settings.cache_dir / "models" / EasyOcrModel._model_repo_folder
        local_dir.mkdir(parents=True, exist_ok=True)
        # Collect models to download
        download_list = []
        for model_name in detection_models:
            if model_name in det_models_dict:
                download_list.append(det_models_dict[model_name])
        for model_name in recognition_models:
            if model_name in rec_models_dict["gen2"]:
                download_list.append(rec_models_dict["gen2"][model_name])
        # Download models
        for model_details in download_list:
            buf = download_url_with_progress(model_details["url"], progress=progress)
            with zipfile.ZipFile(buf, "r") as zip_ref:
                zip_ref.extractall(local_dir)
        return local_dir
    def __call__(
        self, conv_res: ConversionResult, page_batch: Iterable[Page]
    ) -> Iterable[Page]:
--- a/docs/examples/run_with_accelerator.py
+++ b/docs/examples/run_with_accelerator.py
@ -31,9 +31,7 @@ def main():
    # )
    # easyocr doesnt support cuda:N allocation
-    # accelerator_options = AcceleratorOptions(
+    # accelerator_options = AcceleratorOptions(num_threads=8, device="cuda:0")
    #     num_threads=8, device="cuda:1"
    # )
    pipeline_options = PdfPipelineOptions()
    pipeline_options.accelerator_options = accelerator_options
@ -59,8 +57,8 @@ def main():
    # List with total time per document
    doc_conversion_secs = conversion_result.timings["pipeline_total"].times
-    md = doc.export_to_markdown()
+    # md = doc.export_to_markdown()
-    print(md)
+    # print(md)
    print(f"Conversion secs: {doc_conversion_secs}")