mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-23 18:45:00 +00:00
feat: Add option to control empty clusters in layout postprocessing (#1940)
Add option to control empty clusters in layout postprocessing Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
parent
95e70962f1
commit
a436be7367
@ -279,6 +279,9 @@ class LayoutOptions(BaseModel):
|
||||
"""Options for layout processing."""
|
||||
|
||||
create_orphan_clusters: bool = True # Whether to create clusters for orphaned cells
|
||||
keep_empty_clusters: bool = (
|
||||
False # Whether to keep clusters that contain no text cells
|
||||
)
|
||||
model_spec: LayoutModelConfig = DOCLING_LAYOUT_V2
|
||||
|
||||
|
||||
|
@ -267,8 +267,9 @@ class LayoutPostprocessor:
|
||||
# Initial cell assignment
|
||||
clusters = self._assign_cells_to_clusters(clusters)
|
||||
|
||||
# Remove clusters with no cells
|
||||
clusters = [cluster for cluster in clusters if cluster.cells]
|
||||
# Remove clusters with no cells (if keep_empty_clusters is False)
|
||||
if not self.options.keep_empty_clusters:
|
||||
clusters = [cluster for cluster in clusters if cluster.cells]
|
||||
|
||||
# Handle orphaned cells
|
||||
unassigned = self._find_unassigned_cells(clusters)
|
||||
|
Loading…
Reference in New Issue
Block a user