diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py index 99039e1f..06169fb8 100644 --- a/docling/datamodel/pipeline_options.py +++ b/docling/datamodel/pipeline_options.py @@ -279,6 +279,9 @@ class LayoutOptions(BaseModel): """Options for layout processing.""" create_orphan_clusters: bool = True # Whether to create clusters for orphaned cells + keep_empty_clusters: bool = ( + False # Whether to keep clusters that contain no text cells + ) model_spec: LayoutModelConfig = DOCLING_LAYOUT_V2 diff --git a/docling/utils/layout_postprocessor.py b/docling/utils/layout_postprocessor.py index a98b3aab..effce01b 100644 --- a/docling/utils/layout_postprocessor.py +++ b/docling/utils/layout_postprocessor.py @@ -267,8 +267,9 @@ class LayoutPostprocessor: # Initial cell assignment clusters = self._assign_cells_to_clusters(clusters) - # Remove clusters with no cells - clusters = [cluster for cluster in clusters if cluster.cells] + # Remove clusters with no cells (if keep_empty_clusters is False) + if not self.options.keep_empty_clusters: + clusters = [cluster for cluster in clusters if cluster.cells] # Handle orphaned cells unassigned = self._find_unassigned_cells(clusters)