From e82499640689588a5e2bf1d29ccebcf880a09cf2 Mon Sep 17 00:00:00 2001 From: Nikos Livathinos Date: Wed, 12 Feb 2025 10:01:52 +0100 Subject: [PATCH] fix: Measure the layout mAP without the orphan clusters. Signed-off-by: Nikos Livathinos --- docling/utils/layout_postprocessor.py | 41 ++++++++++++++------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/docling/utils/layout_postprocessor.py b/docling/utils/layout_postprocessor.py index 8cb6bc55..7274a600 100644 --- a/docling/utils/layout_postprocessor.py +++ b/docling/utils/layout_postprocessor.py @@ -264,26 +264,29 @@ class LayoutPostprocessor: # Remove clusters with no cells clusters = [cluster for cluster in clusters if cluster.cells] - # Handle orphaned cells - unassigned = self._find_unassigned_cells(clusters) - if unassigned: - next_id = max((c.id for c in clusters), default=0) + 1 - orphan_clusters = [] - for i, cell in enumerate(unassigned): - conf = 1.0 - if isinstance(cell, OcrCell): - conf = cell.confidence + ########################################################################################### + # Debug + # # Handle orphaned cells + # unassigned = self._find_unassigned_cells(clusters) + # if unassigned: + # next_id = max((c.id for c in clusters), default=0) + 1 + # orphan_clusters = [] + # for i, cell in enumerate(unassigned): + # conf = 1.0 + # if isinstance(cell, OcrCell): + # conf = cell.confidence - orphan_clusters.append( - Cluster( - id=next_id + i, - label=DocItemLabel.TEXT, - bbox=cell.bbox, - confidence=conf, - cells=[cell], - ) - ) - clusters.extend(orphan_clusters) + # orphan_clusters.append( + # Cluster( + # id=next_id + i, + # label=DocItemLabel.TEXT, + # bbox=cell.bbox, + # confidence=conf, + # cells=[cell], + # ) + # ) + # clusters.extend(orphan_clusters) + ########################################################################################### # Iterative refinement prev_count = len(clusters) + 1