Add option to control empty clusters in layout postprocessing

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer 2025-07-14 12:20:37 +02:00
parent 95e70962f1
commit 3e71e6fc6e
2 changed files with 6 additions and 2 deletions

View File

@ -279,6 +279,9 @@ class LayoutOptions(BaseModel):
"""Options for layout processing."""
create_orphan_clusters: bool = True # Whether to create clusters for orphaned cells
keep_empty_clusters: bool = (
False # Whether to keep clusters that contain no text cells
)
model_spec: LayoutModelConfig = DOCLING_LAYOUT_V2

View File

@ -267,8 +267,9 @@ class LayoutPostprocessor:
# Initial cell assignment
clusters = self._assign_cells_to_clusters(clusters)
# Remove clusters with no cells
clusters = [cluster for cluster in clusters if cluster.cells]
# Remove clusters with no cells (if keep_empty_clusters is False)
if not self.options.keep_empty_clusters:
clusters = [cluster for cluster in clusters if cluster.cells]
# Handle orphaned cells
unassigned = self._find_unassigned_cells(clusters)