mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 04:24:45 +00:00
chore: Restore the orphan clusters
Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com>
This commit is contained in:
parent
e824996406
commit
b5b1ddca3b
@ -264,29 +264,26 @@ class LayoutPostprocessor:
|
||||
# Remove clusters with no cells
|
||||
clusters = [cluster for cluster in clusters if cluster.cells]
|
||||
|
||||
###########################################################################################
|
||||
# Debug
|
||||
# # Handle orphaned cells
|
||||
# unassigned = self._find_unassigned_cells(clusters)
|
||||
# if unassigned:
|
||||
# next_id = max((c.id for c in clusters), default=0) + 1
|
||||
# orphan_clusters = []
|
||||
# for i, cell in enumerate(unassigned):
|
||||
# conf = 1.0
|
||||
# if isinstance(cell, OcrCell):
|
||||
# conf = cell.confidence
|
||||
# Handle orphaned cells
|
||||
unassigned = self._find_unassigned_cells(clusters)
|
||||
if unassigned:
|
||||
next_id = max((c.id for c in clusters), default=0) + 1
|
||||
orphan_clusters = []
|
||||
for i, cell in enumerate(unassigned):
|
||||
conf = 1.0
|
||||
if isinstance(cell, OcrCell):
|
||||
conf = cell.confidence
|
||||
|
||||
# orphan_clusters.append(
|
||||
# Cluster(
|
||||
# id=next_id + i,
|
||||
# label=DocItemLabel.TEXT,
|
||||
# bbox=cell.bbox,
|
||||
# confidence=conf,
|
||||
# cells=[cell],
|
||||
# )
|
||||
# )
|
||||
# clusters.extend(orphan_clusters)
|
||||
###########################################################################################
|
||||
orphan_clusters.append(
|
||||
Cluster(
|
||||
id=next_id + i,
|
||||
label=DocItemLabel.TEXT,
|
||||
bbox=cell.bbox,
|
||||
confidence=conf,
|
||||
cells=[cell],
|
||||
)
|
||||
)
|
||||
clusters.extend(orphan_clusters)
|
||||
|
||||
# Iterative refinement
|
||||
prev_count = len(clusters) + 1
|
||||
|
Loading…
Reference in New Issue
Block a user