Establish confidence field, propagate layout confidence through

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer 2025-04-07 14:34:13 +02:00
parent bfcab3d677
commit c907af0928
5 changed files with 77 additions and 6 deletions

View File

@ -1,6 +1,9 @@
import math
from collections import defaultdict
from enum import Enum
from typing import TYPE_CHECKING, Dict, List, Optional, Union
from typing import TYPE_CHECKING, Annotated, Dict, List, Literal, Optional, Union
import numpy as np
from docling_core.types.doc import (
BoundingBox,
DocItemLabel,
@ -14,7 +17,7 @@ from docling_core.types.io import ( # DO ΝΟΤ REMOVE; explicitly exposed from
DocumentStream,
)
from PIL.Image import Image
from pydantic import BaseModel, ConfigDict
from pydantic import BaseModel, ConfigDict, Field
if TYPE_CHECKING:
from docling.backend.pdf_backend import PdfPageBackend
@ -262,3 +265,22 @@ class Page(BaseModel):
@property
def image(self) -> Optional[Image]:
return self.get_image(scale=self._default_image_scale)
# Create a type alias for score values
ScoreValue = float
class PageConfidenceScores(BaseModel):
overall_score: ScoreValue = np.nan
parse_score: ScoreValue = np.nan
layout_score: ScoreValue = np.nan
table_score: ScoreValue = np.nan
ocr_score: ScoreValue = np.nan
class ConfidenceReport(PageConfidenceScores):
pages: Dict[int, PageConfidenceScores] = Field(
default_factory=lambda: defaultdict(PageConfidenceScores)
)

View File

@ -45,7 +45,7 @@ from docling_core.types.legacy_doc.document import CCSFileInfoObject as DsFileIn
from docling_core.types.legacy_doc.document import ExportedCCSDocument as DsDocument
from docling_core.utils.file import resolve_source_to_stream
from docling_core.utils.legacy import docling_document_to_legacy
from pydantic import BaseModel
from pydantic import BaseModel, Field
from typing_extensions import deprecated
from docling.backend.abstract_backend import (
@ -54,6 +54,7 @@ from docling.backend.abstract_backend import (
)
from docling.datamodel.base_models import (
AssembledUnit,
ConfidenceReport,
ConversionStatus,
DocumentStream,
ErrorItem,
@ -199,6 +200,7 @@ class ConversionResult(BaseModel):
pages: List[Page] = []
assembled: AssembledUnit = AssembledUnit()
timings: Dict[str, ProfilingItem] = {}
confidence: ConfidenceReport = Field(default_factory=ConfidenceReport)
document: DoclingDocument = _EMPTY_DOCLING_DOC

View File

@ -4,6 +4,7 @@ import warnings
from pathlib import Path
from typing import Iterable, Optional, Union
import numpy as np
from docling_core.types.doc import DocItemLabel
from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor
from PIL import Image
@ -184,6 +185,10 @@ class LayoutModel(BasePageModel):
).postprocess()
# processed_clusters, processed_cells = clusters, page.cells
conv_res.confidence.pages[page.page_no].layout_score = float(
np.mean([c.confidence for c in processed_clusters])
)
page.cells = processed_cells
page.predictions.layout = LayoutPrediction(
clusters=processed_clusters

View File

@ -2,6 +2,7 @@ import logging
import re
from typing import Iterable, List
import numpy as np
from pydantic import BaseModel
from docling.datamodel.base_models import (
@ -157,4 +158,15 @@ class PageAssembleModel(BasePageModel):
elements=elements, headers=headers, body=body
)
# Aggregate page score
scores = conv_res.confidence.pages[page.page_no]
scores.overall_score = float(np.nanmean(
[
scores.ocr_score,
scores.table_score,
scores.layout_score,
scores.parse_score,
]
))
yield page

View File

@ -4,11 +4,12 @@ import warnings
from pathlib import Path
from typing import Optional
import numpy as np
from docling_core.types.doc import DocItem, ImageRef, PictureItem, TableItem
from docling.backend.abstract_backend import AbstractDocumentBackend
from docling.backend.pdf_backend import PdfDocumentBackend
from docling.datamodel.base_models import AssembledUnit, Page
from docling.datamodel.base_models import AssembledUnit, Page, PageConfidenceScores
from docling.datamodel.document import ConversionResult
from docling.datamodel.pipeline_options import PdfPipelineOptions
from docling.datamodel.settings import settings
@ -61,7 +62,7 @@ class StandardPdfPipeline(PaginatedPipeline):
or self.pipeline_options.generate_table_images
)
self.glm_model = ReadingOrderModel(options=ReadingOrderOptions())
self.reading_order_model = ReadingOrderModel(options=ReadingOrderOptions())
ocr_model = self.get_ocr_model(artifacts_path=artifacts_path)
@ -198,7 +199,7 @@ class StandardPdfPipeline(PaginatedPipeline):
elements=all_elements, headers=all_headers, body=all_body
)
conv_res.document = self.glm_model(conv_res)
conv_res.document = self.reading_order_model(conv_res)
# Generate page images in the output
if self.pipeline_options.generate_page_images:
@ -241,6 +242,35 @@ class StandardPdfPipeline(PaginatedPipeline):
cropped_im, dpi=int(72 * scale)
)
# Aggregate confidence values for document:
if len(conv_res.pages) > 0:
conv_res.confidence.layout_score = float(
np.nanmean(
[c.layout_score for c in conv_res.confidence.pages.values()]
)
)
conv_res.confidence.parse_score = float(
np.nanmean(
[c.parse_score for c in conv_res.confidence.pages.values()]
)
)
conv_res.confidence.table_score = float(
np.nanmean(
[c.table_score for c in conv_res.confidence.pages.values()]
)
)
conv_res.confidence.ocr_score = float(
np.nanmean(
[c.ocr_score for c in conv_res.confidence.pages.values()]
)
)
conv_res.confidence.overall_score = float(
np.nanmean(
[c.overall_score for c in conv_res.confidence.pages.values()]
)
)
return conv_res
@classmethod