mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-01 15:02:21 +00:00
Merge pull request #2 from 0xCarbon/feat/disable_image_labeling
Force Image Content OCR
This commit is contained in:
commit
162e89e013
@ -48,6 +48,7 @@ class LayoutModel(BasePageModel):
|
||||
FIGURE_LABEL = DocItemLabel.PICTURE
|
||||
FORMULA_LABEL = DocItemLabel.FORMULA
|
||||
|
||||
|
||||
def __init__(self, artifacts_path: Path, accelerator_options: AcceleratorOptions):
|
||||
device = decide_device(accelerator_options.device)
|
||||
|
||||
@ -56,7 +57,7 @@ class LayoutModel(BasePageModel):
|
||||
device=device,
|
||||
num_threads=accelerator_options.num_threads,
|
||||
base_threshold=0.6,
|
||||
blacklist_classes={"Form", "Key-Value Region"},
|
||||
blacklist_classes={"Form", "Key-Value Region", "Picture"}, # Use this to disable picture recognition (trying to force to identify only text)
|
||||
)
|
||||
|
||||
def postprocess(self, clusters_in: List[Cluster], cells: List[Cell], page_height):
|
||||
|
Loading…
Reference in New Issue
Block a user