Establish confidence field, propagate layout confidence through

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer 2025-04-07 14:34:13 +02:00
parent bfcab3d677
commit c907af0928
5 changed files with 77 additions and 6 deletions

View File

@ -1,6 +1,9 @@
import math
from collections import defaultdict
from enum import Enum from enum import Enum
from typing import TYPE_CHECKING, Dict, List, Optional, Union from typing import TYPE_CHECKING, Annotated, Dict, List, Literal, Optional, Union
import numpy as np
from docling_core.types.doc import ( from docling_core.types.doc import (
BoundingBox, BoundingBox,
DocItemLabel, DocItemLabel,
@ -14,7 +17,7 @@ from docling_core.types.io import ( # DO ΝΟΤ REMOVE; explicitly exposed from
DocumentStream, DocumentStream,
) )
from PIL.Image import Image from PIL.Image import Image
from pydantic import BaseModel, ConfigDict from pydantic import BaseModel, ConfigDict, Field
if TYPE_CHECKING: if TYPE_CHECKING:
from docling.backend.pdf_backend import PdfPageBackend from docling.backend.pdf_backend import PdfPageBackend
@ -262,3 +265,22 @@ class Page(BaseModel):
@property @property
def image(self) -> Optional[Image]: def image(self) -> Optional[Image]:
return self.get_image(scale=self._default_image_scale) return self.get_image(scale=self._default_image_scale)
# Create a type alias for score values
ScoreValue = float
class PageConfidenceScores(BaseModel):
overall_score: ScoreValue = np.nan
parse_score: ScoreValue = np.nan
layout_score: ScoreValue = np.nan
table_score: ScoreValue = np.nan
ocr_score: ScoreValue = np.nan
class ConfidenceReport(PageConfidenceScores):
pages: Dict[int, PageConfidenceScores] = Field(
default_factory=lambda: defaultdict(PageConfidenceScores)
)

View File

@ -45,7 +45,7 @@ from docling_core.types.legacy_doc.document import CCSFileInfoObject as DsFileIn
from docling_core.types.legacy_doc.document import ExportedCCSDocument as DsDocument from docling_core.types.legacy_doc.document import ExportedCCSDocument as DsDocument
from docling_core.utils.file import resolve_source_to_stream from docling_core.utils.file import resolve_source_to_stream
from docling_core.utils.legacy import docling_document_to_legacy from docling_core.utils.legacy import docling_document_to_legacy
from pydantic import BaseModel from pydantic import BaseModel, Field
from typing_extensions import deprecated from typing_extensions import deprecated
from docling.backend.abstract_backend import ( from docling.backend.abstract_backend import (
@ -54,6 +54,7 @@ from docling.backend.abstract_backend import (
) )
from docling.datamodel.base_models import ( from docling.datamodel.base_models import (
AssembledUnit, AssembledUnit,
ConfidenceReport,
ConversionStatus, ConversionStatus,
DocumentStream, DocumentStream,
ErrorItem, ErrorItem,
@ -199,6 +200,7 @@ class ConversionResult(BaseModel):
pages: List[Page] = [] pages: List[Page] = []
assembled: AssembledUnit = AssembledUnit() assembled: AssembledUnit = AssembledUnit()
timings: Dict[str, ProfilingItem] = {} timings: Dict[str, ProfilingItem] = {}
confidence: ConfidenceReport = Field(default_factory=ConfidenceReport)
document: DoclingDocument = _EMPTY_DOCLING_DOC document: DoclingDocument = _EMPTY_DOCLING_DOC

View File

@ -4,6 +4,7 @@ import warnings
from pathlib import Path from pathlib import Path
from typing import Iterable, Optional, Union from typing import Iterable, Optional, Union
import numpy as np
from docling_core.types.doc import DocItemLabel from docling_core.types.doc import DocItemLabel
from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor
from PIL import Image from PIL import Image
@ -184,6 +185,10 @@ class LayoutModel(BasePageModel):
).postprocess() ).postprocess()
# processed_clusters, processed_cells = clusters, page.cells # processed_clusters, processed_cells = clusters, page.cells
conv_res.confidence.pages[page.page_no].layout_score = float(
np.mean([c.confidence for c in processed_clusters])
)
page.cells = processed_cells page.cells = processed_cells
page.predictions.layout = LayoutPrediction( page.predictions.layout = LayoutPrediction(
clusters=processed_clusters clusters=processed_clusters

View File

@ -2,6 +2,7 @@ import logging
import re import re
from typing import Iterable, List from typing import Iterable, List
import numpy as np
from pydantic import BaseModel from pydantic import BaseModel
from docling.datamodel.base_models import ( from docling.datamodel.base_models import (
@ -157,4 +158,15 @@ class PageAssembleModel(BasePageModel):
elements=elements, headers=headers, body=body elements=elements, headers=headers, body=body
) )
# Aggregate page score
scores = conv_res.confidence.pages[page.page_no]
scores.overall_score = float(np.nanmean(
[
scores.ocr_score,
scores.table_score,
scores.layout_score,
scores.parse_score,
]
))
yield page yield page

View File

@ -4,11 +4,12 @@ import warnings
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
import numpy as np
from docling_core.types.doc import DocItem, ImageRef, PictureItem, TableItem from docling_core.types.doc import DocItem, ImageRef, PictureItem, TableItem
from docling.backend.abstract_backend import AbstractDocumentBackend from docling.backend.abstract_backend import AbstractDocumentBackend
from docling.backend.pdf_backend import PdfDocumentBackend from docling.backend.pdf_backend import PdfDocumentBackend
from docling.datamodel.base_models import AssembledUnit, Page from docling.datamodel.base_models import AssembledUnit, Page, PageConfidenceScores
from docling.datamodel.document import ConversionResult from docling.datamodel.document import ConversionResult
from docling.datamodel.pipeline_options import PdfPipelineOptions from docling.datamodel.pipeline_options import PdfPipelineOptions
from docling.datamodel.settings import settings from docling.datamodel.settings import settings
@ -61,7 +62,7 @@ class StandardPdfPipeline(PaginatedPipeline):
or self.pipeline_options.generate_table_images or self.pipeline_options.generate_table_images
) )
self.glm_model = ReadingOrderModel(options=ReadingOrderOptions()) self.reading_order_model = ReadingOrderModel(options=ReadingOrderOptions())
ocr_model = self.get_ocr_model(artifacts_path=artifacts_path) ocr_model = self.get_ocr_model(artifacts_path=artifacts_path)
@ -198,7 +199,7 @@ class StandardPdfPipeline(PaginatedPipeline):
elements=all_elements, headers=all_headers, body=all_body elements=all_elements, headers=all_headers, body=all_body
) )
conv_res.document = self.glm_model(conv_res) conv_res.document = self.reading_order_model(conv_res)
# Generate page images in the output # Generate page images in the output
if self.pipeline_options.generate_page_images: if self.pipeline_options.generate_page_images:
@ -241,6 +242,35 @@ class StandardPdfPipeline(PaginatedPipeline):
cropped_im, dpi=int(72 * scale) cropped_im, dpi=int(72 * scale)
) )
# Aggregate confidence values for document:
if len(conv_res.pages) > 0:
conv_res.confidence.layout_score = float(
np.nanmean(
[c.layout_score for c in conv_res.confidence.pages.values()]
)
)
conv_res.confidence.parse_score = float(
np.nanmean(
[c.parse_score for c in conv_res.confidence.pages.values()]
)
)
conv_res.confidence.table_score = float(
np.nanmean(
[c.table_score for c in conv_res.confidence.pages.values()]
)
)
conv_res.confidence.ocr_score = float(
np.nanmean(
[c.ocr_score for c in conv_res.confidence.pages.values()]
)
)
conv_res.confidence.overall_score = float(
np.nanmean(
[c.overall_score for c in conv_res.confidence.pages.values()]
)
)
return conv_res return conv_res
@classmethod @classmethod