feat: Add option to control empty clusters in layout postprocessing (#1940)

Add option to control empty clusters in layout postprocessing

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer 2025-07-14 18:32:01 +02:00 committed by GitHub
parent 95e70962f1
commit a436be7367
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 6 additions and 2 deletions

View File

@ -279,6 +279,9 @@ class LayoutOptions(BaseModel):
"""Options for layout processing."""
create_orphan_clusters: bool = True # Whether to create clusters for orphaned cells
keep_empty_clusters: bool = (
False # Whether to keep clusters that contain no text cells
)
model_spec: LayoutModelConfig = DOCLING_LAYOUT_V2

View File

@ -267,8 +267,9 @@ class LayoutPostprocessor:
# Initial cell assignment
clusters = self._assign_cells_to_clusters(clusters)
# Remove clusters with no cells
clusters = [cluster for cluster in clusters if cluster.cells]
# Remove clusters with no cells (if keep_empty_clusters is False)
if not self.options.keep_empty_clusters:
clusters = [cluster for cluster in clusters if cluster.cells]
# Handle orphaned cells
unassigned = self._find_unassigned_cells(clusters)