diff --git a/docling/datamodel/base_models.py b/docling/datamodel/base_models.py index 76827a1b..9713630f 100644 --- a/docling/datamodel/base_models.py +++ b/docling/datamodel/base_models.py @@ -1,6 +1,9 @@ +import math +from collections import defaultdict from enum import Enum -from typing import TYPE_CHECKING, Dict, List, Optional, Union +from typing import TYPE_CHECKING, Annotated, Dict, List, Literal, Optional, Union +import numpy as np from docling_core.types.doc import ( BoundingBox, DocItemLabel, @@ -14,7 +17,7 @@ from docling_core.types.io import ( # DO ΝΟΤ REMOVE; explicitly exposed from DocumentStream, ) from PIL.Image import Image -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel, ConfigDict, Field if TYPE_CHECKING: from docling.backend.pdf_backend import PdfPageBackend @@ -262,3 +265,22 @@ class Page(BaseModel): @property def image(self) -> Optional[Image]: return self.get_image(scale=self._default_image_scale) + + +# Create a type alias for score values +ScoreValue = float + + +class PageConfidenceScores(BaseModel): + overall_score: ScoreValue = np.nan + + parse_score: ScoreValue = np.nan + layout_score: ScoreValue = np.nan + table_score: ScoreValue = np.nan + ocr_score: ScoreValue = np.nan + + +class ConfidenceReport(PageConfidenceScores): + pages: Dict[int, PageConfidenceScores] = Field( + default_factory=lambda: defaultdict(PageConfidenceScores) + ) diff --git a/docling/datamodel/document.py b/docling/datamodel/document.py index 43894b07..cca0fff8 100644 --- a/docling/datamodel/document.py +++ b/docling/datamodel/document.py @@ -45,7 +45,7 @@ from docling_core.types.legacy_doc.document import CCSFileInfoObject as DsFileIn from docling_core.types.legacy_doc.document import ExportedCCSDocument as DsDocument from docling_core.utils.file import resolve_source_to_stream from docling_core.utils.legacy import docling_document_to_legacy -from pydantic import BaseModel +from pydantic import BaseModel, Field from typing_extensions import deprecated from docling.backend.abstract_backend import ( @@ -54,6 +54,7 @@ from docling.backend.abstract_backend import ( ) from docling.datamodel.base_models import ( AssembledUnit, + ConfidenceReport, ConversionStatus, DocumentStream, ErrorItem, @@ -199,6 +200,7 @@ class ConversionResult(BaseModel): pages: List[Page] = [] assembled: AssembledUnit = AssembledUnit() timings: Dict[str, ProfilingItem] = {} + confidence: ConfidenceReport = Field(default_factory=ConfidenceReport) document: DoclingDocument = _EMPTY_DOCLING_DOC diff --git a/docling/models/layout_model.py b/docling/models/layout_model.py index b3cbd954..fbafdfc1 100644 --- a/docling/models/layout_model.py +++ b/docling/models/layout_model.py @@ -4,6 +4,7 @@ import warnings from pathlib import Path from typing import Iterable, Optional, Union +import numpy as np from docling_core.types.doc import DocItemLabel from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor from PIL import Image @@ -184,6 +185,10 @@ class LayoutModel(BasePageModel): ).postprocess() # processed_clusters, processed_cells = clusters, page.cells + conv_res.confidence.pages[page.page_no].layout_score = float( + np.mean([c.confidence for c in processed_clusters]) + ) + page.cells = processed_cells page.predictions.layout = LayoutPrediction( clusters=processed_clusters diff --git a/docling/models/page_assemble_model.py b/docling/models/page_assemble_model.py index 4712abdc..a71ba8a9 100644 --- a/docling/models/page_assemble_model.py +++ b/docling/models/page_assemble_model.py @@ -2,6 +2,7 @@ import logging import re from typing import Iterable, List +import numpy as np from pydantic import BaseModel from docling.datamodel.base_models import ( @@ -157,4 +158,15 @@ class PageAssembleModel(BasePageModel): elements=elements, headers=headers, body=body ) + # Aggregate page score + scores = conv_res.confidence.pages[page.page_no] + scores.overall_score = float(np.nanmean( + [ + scores.ocr_score, + scores.table_score, + scores.layout_score, + scores.parse_score, + ] + )) + yield page diff --git a/docling/pipeline/standard_pdf_pipeline.py b/docling/pipeline/standard_pdf_pipeline.py index ecaa27c7..9c9326f4 100644 --- a/docling/pipeline/standard_pdf_pipeline.py +++ b/docling/pipeline/standard_pdf_pipeline.py @@ -4,11 +4,12 @@ import warnings from pathlib import Path from typing import Optional +import numpy as np from docling_core.types.doc import DocItem, ImageRef, PictureItem, TableItem from docling.backend.abstract_backend import AbstractDocumentBackend from docling.backend.pdf_backend import PdfDocumentBackend -from docling.datamodel.base_models import AssembledUnit, Page +from docling.datamodel.base_models import AssembledUnit, Page, PageConfidenceScores from docling.datamodel.document import ConversionResult from docling.datamodel.pipeline_options import PdfPipelineOptions from docling.datamodel.settings import settings @@ -61,7 +62,7 @@ class StandardPdfPipeline(PaginatedPipeline): or self.pipeline_options.generate_table_images ) - self.glm_model = ReadingOrderModel(options=ReadingOrderOptions()) + self.reading_order_model = ReadingOrderModel(options=ReadingOrderOptions()) ocr_model = self.get_ocr_model(artifacts_path=artifacts_path) @@ -198,7 +199,7 @@ class StandardPdfPipeline(PaginatedPipeline): elements=all_elements, headers=all_headers, body=all_body ) - conv_res.document = self.glm_model(conv_res) + conv_res.document = self.reading_order_model(conv_res) # Generate page images in the output if self.pipeline_options.generate_page_images: @@ -241,6 +242,35 @@ class StandardPdfPipeline(PaginatedPipeline): cropped_im, dpi=int(72 * scale) ) + # Aggregate confidence values for document: + if len(conv_res.pages) > 0: + conv_res.confidence.layout_score = float( + np.nanmean( + [c.layout_score for c in conv_res.confidence.pages.values()] + ) + ) + conv_res.confidence.parse_score = float( + np.nanmean( + [c.parse_score for c in conv_res.confidence.pages.values()] + ) + ) + conv_res.confidence.table_score = float( + np.nanmean( + [c.table_score for c in conv_res.confidence.pages.values()] + ) + ) + conv_res.confidence.ocr_score = float( + np.nanmean( + [c.ocr_score for c in conv_res.confidence.pages.values()] + ) + ) + + conv_res.confidence.overall_score = float( + np.nanmean( + [c.overall_score for c in conv_res.confidence.pages.values()] + ) + ) + return conv_res @classmethod