mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-24 19:14:23 +00:00
feat: Add option to control empty clusters in layout postprocessing (#1940)
Add option to control empty clusters in layout postprocessing Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
parent
95e70962f1
commit
a436be7367
@ -279,6 +279,9 @@ class LayoutOptions(BaseModel):
|
|||||||
"""Options for layout processing."""
|
"""Options for layout processing."""
|
||||||
|
|
||||||
create_orphan_clusters: bool = True # Whether to create clusters for orphaned cells
|
create_orphan_clusters: bool = True # Whether to create clusters for orphaned cells
|
||||||
|
keep_empty_clusters: bool = (
|
||||||
|
False # Whether to keep clusters that contain no text cells
|
||||||
|
)
|
||||||
model_spec: LayoutModelConfig = DOCLING_LAYOUT_V2
|
model_spec: LayoutModelConfig = DOCLING_LAYOUT_V2
|
||||||
|
|
||||||
|
|
||||||
|
@ -267,8 +267,9 @@ class LayoutPostprocessor:
|
|||||||
# Initial cell assignment
|
# Initial cell assignment
|
||||||
clusters = self._assign_cells_to_clusters(clusters)
|
clusters = self._assign_cells_to_clusters(clusters)
|
||||||
|
|
||||||
# Remove clusters with no cells
|
# Remove clusters with no cells (if keep_empty_clusters is False)
|
||||||
clusters = [cluster for cluster in clusters if cluster.cells]
|
if not self.options.keep_empty_clusters:
|
||||||
|
clusters = [cluster for cluster in clusters if cluster.cells]
|
||||||
|
|
||||||
# Handle orphaned cells
|
# Handle orphaned cells
|
||||||
unassigned = self._find_unassigned_cells(clusters)
|
unassigned = self._find_unassigned_cells(clusters)
|
||||||
|
Loading…
Reference in New Issue
Block a user