mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-16 16:48:21 +00:00
feat: add factory for ocr engines via plugins (#1010)
* add factory for ocr engines Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * apply pre-commit after rebase Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add picture description factory Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * fix enable option Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * switch to create methods Signed-off-by: Panos Vagenas <pva@zurich.ibm.com> * make `options` an explicit kwarg Signed-off-by: Panos Vagenas <pva@zurich.ibm.com> * keep old lock of docling-core Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * fix lock Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add allow_external_plugins option Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add factory return and ignore options type Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> --------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Signed-off-by: Panos Vagenas <pva@zurich.ibm.com> Co-authored-by: Panos Vagenas <pva@zurich.ibm.com>
This commit is contained in:
@@ -1,14 +1,22 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Generic, Iterable, Optional
|
||||
from typing import Any, Generic, Iterable, Optional, Protocol, Type
|
||||
|
||||
from docling_core.types.doc import BoundingBox, DocItem, DoclingDocument, NodeItem
|
||||
from typing_extensions import TypeVar
|
||||
|
||||
from docling.datamodel.base_models import ItemAndImageEnrichmentElement, Page
|
||||
from docling.datamodel.document import ConversionResult
|
||||
from docling.datamodel.pipeline_options import BaseOptions
|
||||
from docling.datamodel.settings import settings
|
||||
|
||||
|
||||
class BaseModelWithOptions(Protocol):
|
||||
@classmethod
|
||||
def get_options_type(cls) -> Type[BaseOptions]: ...
|
||||
|
||||
def __init__(self, *, options: BaseOptions, **kwargs): ...
|
||||
|
||||
|
||||
class BasePageModel(ABC):
|
||||
@abstractmethod
|
||||
def __call__(
|
||||
|
||||
@@ -2,7 +2,7 @@ import copy
|
||||
import logging
|
||||
from abc import abstractmethod
|
||||
from pathlib import Path
|
||||
from typing import Iterable, List
|
||||
from typing import Iterable, List, Optional, Type
|
||||
|
||||
import numpy as np
|
||||
from docling_core.types.doc import BoundingBox, CoordOrigin
|
||||
@@ -13,15 +13,22 @@ from scipy.ndimage import binary_dilation, find_objects, label
|
||||
|
||||
from docling.datamodel.base_models import Page
|
||||
from docling.datamodel.document import ConversionResult
|
||||
from docling.datamodel.pipeline_options import OcrOptions
|
||||
from docling.datamodel.pipeline_options import AcceleratorOptions, OcrOptions
|
||||
from docling.datamodel.settings import settings
|
||||
from docling.models.base_model import BasePageModel
|
||||
from docling.models.base_model import BaseModelWithOptions, BasePageModel
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BaseOcrModel(BasePageModel):
|
||||
def __init__(self, enabled: bool, options: OcrOptions):
|
||||
class BaseOcrModel(BasePageModel, BaseModelWithOptions):
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
enabled: bool,
|
||||
artifacts_path: Optional[Path],
|
||||
options: OcrOptions,
|
||||
accelerator_options: AcceleratorOptions,
|
||||
):
|
||||
self.enabled = enabled
|
||||
self.options = options
|
||||
|
||||
@@ -186,3 +193,8 @@ class BaseOcrModel(BasePageModel):
|
||||
self, conv_res: ConversionResult, page_batch: Iterable[Page]
|
||||
) -> Iterable[Page]:
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def get_options_type(cls) -> Type[OcrOptions]:
|
||||
pass
|
||||
|
||||
@@ -2,7 +2,7 @@ import logging
|
||||
import warnings
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
from typing import Iterable, List, Optional
|
||||
from typing import Iterable, List, Optional, Type
|
||||
|
||||
import numpy
|
||||
from docling_core.types.doc import BoundingBox, CoordOrigin
|
||||
@@ -14,6 +14,7 @@ from docling.datamodel.pipeline_options import (
|
||||
AcceleratorDevice,
|
||||
AcceleratorOptions,
|
||||
EasyOcrOptions,
|
||||
OcrOptions,
|
||||
)
|
||||
from docling.datamodel.settings import settings
|
||||
from docling.models.base_ocr_model import BaseOcrModel
|
||||
@@ -34,7 +35,12 @@ class EasyOcrModel(BaseOcrModel):
|
||||
options: EasyOcrOptions,
|
||||
accelerator_options: AcceleratorOptions,
|
||||
):
|
||||
super().__init__(enabled=enabled, options=options)
|
||||
super().__init__(
|
||||
enabled=enabled,
|
||||
artifacts_path=artifacts_path,
|
||||
options=options,
|
||||
accelerator_options=accelerator_options,
|
||||
)
|
||||
self.options: EasyOcrOptions
|
||||
|
||||
self.scale = 3 # multiplier for 72 dpi == 216 dpi.
|
||||
@@ -180,3 +186,7 @@ class EasyOcrModel(BaseOcrModel):
|
||||
self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects)
|
||||
|
||||
yield page
|
||||
|
||||
@classmethod
|
||||
def get_options_type(cls) -> Type[OcrOptions]:
|
||||
return EasyOcrOptions
|
||||
|
||||
27
docling/models/factories/__init__.py
Normal file
27
docling/models/factories/__init__.py
Normal file
@@ -0,0 +1,27 @@
|
||||
import logging
|
||||
from functools import lru_cache
|
||||
|
||||
from docling.models.factories.ocr_factory import OcrFactory
|
||||
from docling.models.factories.picture_description_factory import (
|
||||
PictureDescriptionFactory,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@lru_cache()
|
||||
def get_ocr_factory(allow_external_plugins: bool = False) -> OcrFactory:
|
||||
factory = OcrFactory()
|
||||
factory.load_from_plugins(allow_external_plugins=allow_external_plugins)
|
||||
logger.info("Registered ocr engines: %r", factory.registered_kind)
|
||||
return factory
|
||||
|
||||
|
||||
@lru_cache()
|
||||
def get_picture_description_factory(
|
||||
allow_external_plugins: bool = False,
|
||||
) -> PictureDescriptionFactory:
|
||||
factory = PictureDescriptionFactory()
|
||||
factory.load_from_plugins(allow_external_plugins=allow_external_plugins)
|
||||
logger.info("Registered picture descriptions: %r", factory.registered_kind)
|
||||
return factory
|
||||
122
docling/models/factories/base_factory.py
Normal file
122
docling/models/factories/base_factory.py
Normal file
@@ -0,0 +1,122 @@
|
||||
import enum
|
||||
import logging
|
||||
from abc import ABCMeta
|
||||
from typing import Generic, Optional, Type, TypeVar
|
||||
|
||||
from pluggy import PluginManager
|
||||
from pydantic import BaseModel
|
||||
|
||||
from docling.datamodel.pipeline_options import BaseOptions
|
||||
from docling.models.base_model import BaseModelWithOptions
|
||||
|
||||
A = TypeVar("A", bound=BaseModelWithOptions)
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FactoryMeta(BaseModel):
|
||||
kind: str
|
||||
plugin_name: str
|
||||
module: str
|
||||
|
||||
|
||||
class BaseFactory(Generic[A], metaclass=ABCMeta):
|
||||
default_plugin_name = "docling"
|
||||
|
||||
def __init__(self, plugin_attr_name: str, plugin_name=default_plugin_name):
|
||||
self.plugin_name = plugin_name
|
||||
self.plugin_attr_name = plugin_attr_name
|
||||
|
||||
self._classes: dict[Type[BaseOptions], Type[A]] = {}
|
||||
self._meta: dict[Type[BaseOptions], FactoryMeta] = {}
|
||||
|
||||
@property
|
||||
def registered_kind(self) -> list[str]:
|
||||
return list(opt.kind for opt in self._classes.keys())
|
||||
|
||||
def get_enum(self) -> enum.Enum:
|
||||
return enum.Enum(
|
||||
self.plugin_attr_name + "_enum",
|
||||
names={kind: kind for kind in self.registered_kind},
|
||||
type=str,
|
||||
module=__name__,
|
||||
)
|
||||
|
||||
@property
|
||||
def classes(self):
|
||||
return self._classes
|
||||
|
||||
@property
|
||||
def registered_meta(self):
|
||||
return self._meta
|
||||
|
||||
def create_instance(self, options: BaseOptions, **kwargs) -> A:
|
||||
try:
|
||||
_cls = self._classes[type(options)]
|
||||
return _cls(options=options, **kwargs)
|
||||
except KeyError:
|
||||
raise RuntimeError(self._err_msg_on_class_not_found(options.kind))
|
||||
|
||||
def create_options(self, kind: str, *args, **kwargs) -> BaseOptions:
|
||||
for opt_cls, _ in self._classes.items():
|
||||
if opt_cls.kind == kind:
|
||||
return opt_cls(*args, **kwargs)
|
||||
raise RuntimeError(self._err_msg_on_class_not_found(kind))
|
||||
|
||||
def _err_msg_on_class_not_found(self, kind: str):
|
||||
msg = []
|
||||
|
||||
for opt, cls in self._classes.items():
|
||||
msg.append(f"\t{opt.kind!r} => {cls!r}")
|
||||
|
||||
msg_str = "\n".join(msg)
|
||||
|
||||
return f"No class found with the name {kind!r}, known classes are:\n{msg_str}"
|
||||
|
||||
def register(self, cls: Type[A], plugin_name: str, plugin_module_name: str):
|
||||
opt_type = cls.get_options_type()
|
||||
|
||||
if opt_type in self._classes:
|
||||
raise ValueError(
|
||||
f"{opt_type.kind!r} already registered to class {self._classes[opt_type]!r}"
|
||||
)
|
||||
|
||||
self._classes[opt_type] = cls
|
||||
self._meta[opt_type] = FactoryMeta(
|
||||
kind=opt_type.kind, plugin_name=plugin_name, module=plugin_module_name
|
||||
)
|
||||
|
||||
def load_from_plugins(
|
||||
self, plugin_name: Optional[str] = None, allow_external_plugins: bool = False
|
||||
):
|
||||
plugin_name = plugin_name or self.plugin_name
|
||||
|
||||
plugin_manager = PluginManager(plugin_name)
|
||||
plugin_manager.load_setuptools_entrypoints(plugin_name)
|
||||
|
||||
for plugin_name, plugin_module in plugin_manager.list_name_plugin():
|
||||
plugin_module_name = str(plugin_module.__name__) # type: ignore
|
||||
|
||||
if not allow_external_plugins and not plugin_module_name.startswith(
|
||||
"docling."
|
||||
):
|
||||
logger.warning(
|
||||
f"The plugin {plugin_name} will not be loaded because Docling is being executed with allow_external_plugins=false."
|
||||
)
|
||||
continue
|
||||
|
||||
attr = getattr(plugin_module, self.plugin_attr_name, None)
|
||||
|
||||
if callable(attr):
|
||||
logger.info("Loading plugin %r", plugin_name)
|
||||
|
||||
config = attr()
|
||||
self.process_plugin(config, plugin_name, plugin_module_name)
|
||||
|
||||
def process_plugin(self, config, plugin_name: str, plugin_module_name: str):
|
||||
for item in config[self.plugin_attr_name]:
|
||||
try:
|
||||
self.register(item, plugin_name, plugin_module_name)
|
||||
except ValueError:
|
||||
logger.warning("%r already registered", item)
|
||||
11
docling/models/factories/ocr_factory.py
Normal file
11
docling/models/factories/ocr_factory.py
Normal file
@@ -0,0 +1,11 @@
|
||||
import logging
|
||||
|
||||
from docling.models.base_ocr_model import BaseOcrModel
|
||||
from docling.models.factories.base_factory import BaseFactory
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class OcrFactory(BaseFactory[BaseOcrModel]):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__("ocr_engines", *args, **kwargs)
|
||||
11
docling/models/factories/picture_description_factory.py
Normal file
11
docling/models/factories/picture_description_factory.py
Normal file
@@ -0,0 +1,11 @@
|
||||
import logging
|
||||
|
||||
from docling.models.factories.base_factory import BaseFactory
|
||||
from docling.models.picture_description_base_model import PictureDescriptionBaseModel
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PictureDescriptionFactory(BaseFactory[PictureDescriptionBaseModel]):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__("picture_description", *args, **kwargs)
|
||||
@@ -1,13 +1,19 @@
|
||||
import logging
|
||||
import sys
|
||||
import tempfile
|
||||
from typing import Iterable, Optional, Tuple
|
||||
from pathlib import Path
|
||||
from typing import Iterable, Optional, Tuple, Type
|
||||
|
||||
from docling_core.types.doc import BoundingBox, CoordOrigin
|
||||
from docling_core.types.doc.page import BoundingRectangle, TextCell
|
||||
|
||||
from docling.datamodel.base_models import Page
|
||||
from docling.datamodel.document import ConversionResult
|
||||
from docling.datamodel.pipeline_options import OcrMacOptions
|
||||
from docling.datamodel.pipeline_options import (
|
||||
AcceleratorOptions,
|
||||
OcrMacOptions,
|
||||
OcrOptions,
|
||||
)
|
||||
from docling.datamodel.settings import settings
|
||||
from docling.models.base_ocr_model import BaseOcrModel
|
||||
from docling.utils.profiling import TimeRecorder
|
||||
@@ -16,13 +22,26 @@ _log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class OcrMacModel(BaseOcrModel):
|
||||
def __init__(self, enabled: bool, options: OcrMacOptions):
|
||||
super().__init__(enabled=enabled, options=options)
|
||||
def __init__(
|
||||
self,
|
||||
enabled: bool,
|
||||
artifacts_path: Optional[Path],
|
||||
options: OcrMacOptions,
|
||||
accelerator_options: AcceleratorOptions,
|
||||
):
|
||||
super().__init__(
|
||||
enabled=enabled,
|
||||
artifacts_path=artifacts_path,
|
||||
options=options,
|
||||
accelerator_options=accelerator_options,
|
||||
)
|
||||
self.options: OcrMacOptions
|
||||
|
||||
self.scale = 3 # multiplier for 72 dpi == 216 dpi.
|
||||
|
||||
if self.enabled:
|
||||
if "darwin" != sys.platform:
|
||||
raise RuntimeError(f"OcrMac is only supported on Mac.")
|
||||
install_errmsg = (
|
||||
"ocrmac is not correctly installed. "
|
||||
"Please install it via `pip install ocrmac` to use this OCR engine. "
|
||||
@@ -121,3 +140,7 @@ class OcrMacModel(BaseOcrModel):
|
||||
self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects)
|
||||
|
||||
yield page
|
||||
|
||||
@classmethod
|
||||
def get_options_type(cls) -> Type[OcrOptions]:
|
||||
return OcrMacOptions
|
||||
|
||||
@@ -1,13 +1,18 @@
|
||||
import base64
|
||||
import io
|
||||
import logging
|
||||
from typing import Iterable, List, Optional
|
||||
from pathlib import Path
|
||||
from typing import Iterable, List, Optional, Type, Union
|
||||
|
||||
import requests
|
||||
from PIL import Image
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
|
||||
from docling.datamodel.pipeline_options import PictureDescriptionApiOptions
|
||||
from docling.datamodel.pipeline_options import (
|
||||
AcceleratorOptions,
|
||||
PictureDescriptionApiOptions,
|
||||
PictureDescriptionBaseOptions,
|
||||
)
|
||||
from docling.exceptions import OperationNotAllowed
|
||||
from docling.models.picture_description_base_model import PictureDescriptionBaseModel
|
||||
|
||||
@@ -46,13 +51,25 @@ class ApiResponse(BaseModel):
|
||||
class PictureDescriptionApiModel(PictureDescriptionBaseModel):
|
||||
# elements_batch_size = 4
|
||||
|
||||
@classmethod
|
||||
def get_options_type(cls) -> Type[PictureDescriptionBaseOptions]:
|
||||
return PictureDescriptionApiOptions
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
enabled: bool,
|
||||
enable_remote_services: bool,
|
||||
artifacts_path: Optional[Union[Path, str]],
|
||||
options: PictureDescriptionApiOptions,
|
||||
accelerator_options: AcceleratorOptions,
|
||||
):
|
||||
super().__init__(enabled=enabled, options=options)
|
||||
super().__init__(
|
||||
enabled=enabled,
|
||||
enable_remote_services=enable_remote_services,
|
||||
artifacts_path=artifacts_path,
|
||||
options=options,
|
||||
accelerator_options=accelerator_options,
|
||||
)
|
||||
self.options: PictureDescriptionApiOptions
|
||||
|
||||
if self.enabled:
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import logging
|
||||
from abc import abstractmethod
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterable, List, Optional, Union
|
||||
from typing import Any, Iterable, List, Optional, Type, Union
|
||||
|
||||
from docling_core.types.doc import (
|
||||
DoclingDocument,
|
||||
@@ -13,20 +14,30 @@ from docling_core.types.doc.document import ( # TODO: move import to docling_co
|
||||
)
|
||||
from PIL import Image
|
||||
|
||||
from docling.datamodel.pipeline_options import PictureDescriptionBaseOptions
|
||||
from docling.datamodel.pipeline_options import (
|
||||
AcceleratorOptions,
|
||||
PictureDescriptionBaseOptions,
|
||||
)
|
||||
from docling.models.base_model import (
|
||||
BaseItemAndImageEnrichmentModel,
|
||||
BaseModelWithOptions,
|
||||
ItemAndImageEnrichmentElement,
|
||||
)
|
||||
|
||||
|
||||
class PictureDescriptionBaseModel(BaseItemAndImageEnrichmentModel):
|
||||
class PictureDescriptionBaseModel(
|
||||
BaseItemAndImageEnrichmentModel, BaseModelWithOptions
|
||||
):
|
||||
images_scale: float = 2.0
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
enabled: bool,
|
||||
enable_remote_services: bool,
|
||||
artifacts_path: Optional[Union[Path, str]],
|
||||
options: PictureDescriptionBaseOptions,
|
||||
accelerator_options: AcceleratorOptions,
|
||||
):
|
||||
self.enabled = enabled
|
||||
self.options = options
|
||||
@@ -62,3 +73,8 @@ class PictureDescriptionBaseModel(BaseItemAndImageEnrichmentModel):
|
||||
PictureDescriptionData(text=output, provenance=self.provenance)
|
||||
)
|
||||
yield item
|
||||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def get_options_type(cls) -> Type[PictureDescriptionBaseOptions]:
|
||||
pass
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
from pathlib import Path
|
||||
from typing import Iterable, Optional, Union
|
||||
from typing import Iterable, Optional, Type, Union
|
||||
|
||||
from PIL import Image
|
||||
|
||||
from docling.datamodel.pipeline_options import (
|
||||
AcceleratorOptions,
|
||||
PictureDescriptionBaseOptions,
|
||||
PictureDescriptionVlmOptions,
|
||||
)
|
||||
from docling.models.picture_description_base_model import PictureDescriptionBaseModel
|
||||
@@ -13,14 +14,25 @@ from docling.utils.accelerator_utils import decide_device
|
||||
|
||||
class PictureDescriptionVlmModel(PictureDescriptionBaseModel):
|
||||
|
||||
@classmethod
|
||||
def get_options_type(cls) -> Type[PictureDescriptionBaseOptions]:
|
||||
return PictureDescriptionVlmOptions
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
enabled: bool,
|
||||
enable_remote_services: bool,
|
||||
artifacts_path: Optional[Union[Path, str]],
|
||||
options: PictureDescriptionVlmOptions,
|
||||
accelerator_options: AcceleratorOptions,
|
||||
):
|
||||
super().__init__(enabled=enabled, options=options)
|
||||
super().__init__(
|
||||
enabled=enabled,
|
||||
enable_remote_services=enable_remote_services,
|
||||
artifacts_path=artifacts_path,
|
||||
options=options,
|
||||
accelerator_options=accelerator_options,
|
||||
)
|
||||
self.options: PictureDescriptionVlmOptions
|
||||
|
||||
if self.enabled:
|
||||
|
||||
0
docling/models/plugins/__init__.py
Normal file
0
docling/models/plugins/__init__.py
Normal file
28
docling/models/plugins/defaults.py
Normal file
28
docling/models/plugins/defaults.py
Normal file
@@ -0,0 +1,28 @@
|
||||
from docling.models.easyocr_model import EasyOcrModel
|
||||
from docling.models.ocr_mac_model import OcrMacModel
|
||||
from docling.models.picture_description_api_model import PictureDescriptionApiModel
|
||||
from docling.models.picture_description_vlm_model import PictureDescriptionVlmModel
|
||||
from docling.models.rapid_ocr_model import RapidOcrModel
|
||||
from docling.models.tesseract_ocr_cli_model import TesseractOcrCliModel
|
||||
from docling.models.tesseract_ocr_model import TesseractOcrModel
|
||||
|
||||
|
||||
def ocr_engines():
|
||||
return {
|
||||
"ocr_engines": [
|
||||
EasyOcrModel,
|
||||
OcrMacModel,
|
||||
RapidOcrModel,
|
||||
TesseractOcrModel,
|
||||
TesseractOcrCliModel,
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def picture_description():
|
||||
return {
|
||||
"picture_description": [
|
||||
PictureDescriptionVlmModel,
|
||||
PictureDescriptionApiModel,
|
||||
]
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
import logging
|
||||
from typing import Iterable
|
||||
from pathlib import Path
|
||||
from typing import Iterable, Optional, Type
|
||||
|
||||
import numpy
|
||||
from docling_core.types.doc import BoundingBox, CoordOrigin
|
||||
@@ -10,6 +11,7 @@ from docling.datamodel.document import ConversionResult
|
||||
from docling.datamodel.pipeline_options import (
|
||||
AcceleratorDevice,
|
||||
AcceleratorOptions,
|
||||
OcrOptions,
|
||||
RapidOcrOptions,
|
||||
)
|
||||
from docling.datamodel.settings import settings
|
||||
@@ -24,10 +26,16 @@ class RapidOcrModel(BaseOcrModel):
|
||||
def __init__(
|
||||
self,
|
||||
enabled: bool,
|
||||
artifacts_path: Optional[Path],
|
||||
options: RapidOcrOptions,
|
||||
accelerator_options: AcceleratorOptions,
|
||||
):
|
||||
super().__init__(enabled=enabled, options=options)
|
||||
super().__init__(
|
||||
enabled=enabled,
|
||||
artifacts_path=artifacts_path,
|
||||
options=options,
|
||||
accelerator_options=accelerator_options,
|
||||
)
|
||||
self.options: RapidOcrOptions
|
||||
|
||||
self.scale = 3 # multiplier for 72 dpi == 216 dpi.
|
||||
@@ -135,3 +143,7 @@ class RapidOcrModel(BaseOcrModel):
|
||||
self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects)
|
||||
|
||||
yield page
|
||||
|
||||
@classmethod
|
||||
def get_options_type(cls) -> Type[OcrOptions]:
|
||||
return RapidOcrOptions
|
||||
|
||||
@@ -3,8 +3,9 @@ import io
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from subprocess import DEVNULL, PIPE, Popen
|
||||
from typing import Iterable, List, Optional, Tuple
|
||||
from typing import Iterable, List, Optional, Tuple, Type
|
||||
|
||||
import pandas as pd
|
||||
from docling_core.types.doc import BoundingBox, CoordOrigin
|
||||
@@ -12,7 +13,11 @@ from docling_core.types.doc.page import BoundingRectangle, TextCell
|
||||
|
||||
from docling.datamodel.base_models import Page
|
||||
from docling.datamodel.document import ConversionResult
|
||||
from docling.datamodel.pipeline_options import TesseractCliOcrOptions
|
||||
from docling.datamodel.pipeline_options import (
|
||||
AcceleratorOptions,
|
||||
OcrOptions,
|
||||
TesseractCliOcrOptions,
|
||||
)
|
||||
from docling.datamodel.settings import settings
|
||||
from docling.models.base_ocr_model import BaseOcrModel
|
||||
from docling.utils.ocr_utils import map_tesseract_script
|
||||
@@ -22,8 +27,19 @@ _log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TesseractOcrCliModel(BaseOcrModel):
|
||||
def __init__(self, enabled: bool, options: TesseractCliOcrOptions):
|
||||
super().__init__(enabled=enabled, options=options)
|
||||
def __init__(
|
||||
self,
|
||||
enabled: bool,
|
||||
artifacts_path: Optional[Path],
|
||||
options: TesseractCliOcrOptions,
|
||||
accelerator_options: AcceleratorOptions,
|
||||
):
|
||||
super().__init__(
|
||||
enabled=enabled,
|
||||
artifacts_path=artifacts_path,
|
||||
options=options,
|
||||
accelerator_options=accelerator_options,
|
||||
)
|
||||
self.options: TesseractCliOcrOptions
|
||||
|
||||
self.scale = 3 # multiplier for 72 dpi == 216 dpi.
|
||||
@@ -257,3 +273,7 @@ class TesseractOcrCliModel(BaseOcrModel):
|
||||
self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects)
|
||||
|
||||
yield page
|
||||
|
||||
@classmethod
|
||||
def get_options_type(cls) -> Type[OcrOptions]:
|
||||
return TesseractCliOcrOptions
|
||||
|
||||
@@ -1,12 +1,17 @@
|
||||
import logging
|
||||
from typing import Iterable
|
||||
from pathlib import Path
|
||||
from typing import Iterable, Optional, Type
|
||||
|
||||
from docling_core.types.doc import BoundingBox, CoordOrigin
|
||||
from docling_core.types.doc.page import BoundingRectangle, TextCell
|
||||
|
||||
from docling.datamodel.base_models import Page
|
||||
from docling.datamodel.document import ConversionResult
|
||||
from docling.datamodel.pipeline_options import TesseractOcrOptions
|
||||
from docling.datamodel.pipeline_options import (
|
||||
AcceleratorOptions,
|
||||
OcrOptions,
|
||||
TesseractOcrOptions,
|
||||
)
|
||||
from docling.datamodel.settings import settings
|
||||
from docling.models.base_ocr_model import BaseOcrModel
|
||||
from docling.utils.ocr_utils import map_tesseract_script
|
||||
@@ -16,8 +21,19 @@ _log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TesseractOcrModel(BaseOcrModel):
|
||||
def __init__(self, enabled: bool, options: TesseractOcrOptions):
|
||||
super().__init__(enabled=enabled, options=options)
|
||||
def __init__(
|
||||
self,
|
||||
enabled: bool,
|
||||
artifacts_path: Optional[Path],
|
||||
options: TesseractOcrOptions,
|
||||
accelerator_options: AcceleratorOptions,
|
||||
):
|
||||
super().__init__(
|
||||
enabled=enabled,
|
||||
artifacts_path=artifacts_path,
|
||||
options=options,
|
||||
accelerator_options=accelerator_options,
|
||||
)
|
||||
self.options: TesseractOcrOptions
|
||||
|
||||
self.scale = 3 # multiplier for 72 dpi == 216 dpi.
|
||||
@@ -200,3 +216,7 @@ class TesseractOcrModel(BaseOcrModel):
|
||||
self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects)
|
||||
|
||||
yield page
|
||||
|
||||
@classmethod
|
||||
def get_options_type(cls) -> Type[OcrOptions]:
|
||||
return TesseractOcrOptions
|
||||
|
||||
Reference in New Issue
Block a user