mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 20:14:47 +00:00
Fix baseocrmodel test issue
Signed-off-by: William Easton <bill.easton@elastic.co>
This commit is contained in:
parent
a2eafdbac3
commit
bab9c25c0c
@ -20,9 +20,6 @@ from docling.models.base_model import BaseModelWithOptions, BasePageModel
|
|||||||
|
|
||||||
_log = logging.getLogger(__name__)
|
_log = logging.getLogger(__name__)
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
from scipy.ndimage import binary_dilation, find_objects, label
|
|
||||||
|
|
||||||
|
|
||||||
class BaseOcrModel(BasePageModel, BaseModelWithOptions):
|
class BaseOcrModel(BasePageModel, BaseModelWithOptions):
|
||||||
def __init__(
|
def __init__(
|
||||||
@ -33,6 +30,7 @@ class BaseOcrModel(BasePageModel, BaseModelWithOptions):
|
|||||||
options: OcrOptions,
|
options: OcrOptions,
|
||||||
accelerator_options: AcceleratorOptions,
|
accelerator_options: AcceleratorOptions,
|
||||||
):
|
):
|
||||||
|
# Make sure any delay/error from import occurs on ocr model init and not first use
|
||||||
from scipy.ndimage import binary_dilation, find_objects, label
|
from scipy.ndimage import binary_dilation, find_objects, label
|
||||||
|
|
||||||
self.enabled = enabled
|
self.enabled = enabled
|
||||||
@ -40,6 +38,8 @@ class BaseOcrModel(BasePageModel, BaseModelWithOptions):
|
|||||||
|
|
||||||
# Computes the optimum amount and coordinates of rectangles to OCR on a given page
|
# Computes the optimum amount and coordinates of rectangles to OCR on a given page
|
||||||
def get_ocr_rects(self, page: Page) -> List[BoundingBox]:
|
def get_ocr_rects(self, page: Page) -> List[BoundingBox]:
|
||||||
|
from scipy.ndimage import binary_dilation, find_objects, label
|
||||||
|
|
||||||
BITMAP_COVERAGE_TRESHOLD = 0.75
|
BITMAP_COVERAGE_TRESHOLD = 0.75
|
||||||
assert page.size is not None
|
assert page.size is not None
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user