mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-02 07:22:14 +00:00
Merge pull request #2 from 0xCarbon/feat/disable_image_labeling
Force Image Content OCR
This commit is contained in:
commit
162e89e013
@ -48,6 +48,7 @@ class LayoutModel(BasePageModel):
|
|||||||
FIGURE_LABEL = DocItemLabel.PICTURE
|
FIGURE_LABEL = DocItemLabel.PICTURE
|
||||||
FORMULA_LABEL = DocItemLabel.FORMULA
|
FORMULA_LABEL = DocItemLabel.FORMULA
|
||||||
|
|
||||||
|
|
||||||
def __init__(self, artifacts_path: Path, accelerator_options: AcceleratorOptions):
|
def __init__(self, artifacts_path: Path, accelerator_options: AcceleratorOptions):
|
||||||
device = decide_device(accelerator_options.device)
|
device = decide_device(accelerator_options.device)
|
||||||
|
|
||||||
@ -56,7 +57,7 @@ class LayoutModel(BasePageModel):
|
|||||||
device=device,
|
device=device,
|
||||||
num_threads=accelerator_options.num_threads,
|
num_threads=accelerator_options.num_threads,
|
||||||
base_threshold=0.6,
|
base_threshold=0.6,
|
||||||
blacklist_classes={"Form", "Key-Value Region"},
|
blacklist_classes={"Form", "Key-Value Region", "Picture"}, # Use this to disable picture recognition (trying to force to identify only text)
|
||||||
)
|
)
|
||||||
|
|
||||||
def postprocess(self, clusters_in: List[Cluster], cells: List[Cell], page_height):
|
def postprocess(self, clusters_in: List[Cluster], cells: List[Cell], page_height):
|
||||||
|
Loading…
Reference in New Issue
Block a user