mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-01 15:02:21 +00:00
blacklisted the picture layout tag so that it is forced to interpret the contents of the image and retrieve text that otherwise would be lost with an image tag
This commit is contained in:
parent
365a1e7b98
commit
a4dc21395d
@ -48,6 +48,7 @@ class LayoutModel(BasePageModel):
|
||||
|
||||
def __init__(self, artifacts_path: Path):
|
||||
self.layout_predictor = LayoutPredictor(artifacts_path) # TODO temporary
|
||||
self.layout_predictor._black_classes.add("Picture") # Use this to disable picture recognition (trying to force to identify only text)
|
||||
|
||||
def postprocess(self, clusters_in: List[Cluster], cells: List[Cell], page_height):
|
||||
MIN_INTERSECTION = 0.2
|
||||
|
Loading…
Reference in New Issue
Block a user