mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 04:24:45 +00:00
Establish confidence field, propagate layout confidence through
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
parent
bfcab3d677
commit
c907af0928
@ -1,6 +1,9 @@
|
||||
import math
|
||||
from collections import defaultdict
|
||||
from enum import Enum
|
||||
from typing import TYPE_CHECKING, Dict, List, Optional, Union
|
||||
from typing import TYPE_CHECKING, Annotated, Dict, List, Literal, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
from docling_core.types.doc import (
|
||||
BoundingBox,
|
||||
DocItemLabel,
|
||||
@ -14,7 +17,7 @@ from docling_core.types.io import ( # DO ΝΟΤ REMOVE; explicitly exposed from
|
||||
DocumentStream,
|
||||
)
|
||||
from PIL.Image import Image
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docling.backend.pdf_backend import PdfPageBackend
|
||||
@ -262,3 +265,22 @@ class Page(BaseModel):
|
||||
@property
|
||||
def image(self) -> Optional[Image]:
|
||||
return self.get_image(scale=self._default_image_scale)
|
||||
|
||||
|
||||
# Create a type alias for score values
|
||||
ScoreValue = float
|
||||
|
||||
|
||||
class PageConfidenceScores(BaseModel):
|
||||
overall_score: ScoreValue = np.nan
|
||||
|
||||
parse_score: ScoreValue = np.nan
|
||||
layout_score: ScoreValue = np.nan
|
||||
table_score: ScoreValue = np.nan
|
||||
ocr_score: ScoreValue = np.nan
|
||||
|
||||
|
||||
class ConfidenceReport(PageConfidenceScores):
|
||||
pages: Dict[int, PageConfidenceScores] = Field(
|
||||
default_factory=lambda: defaultdict(PageConfidenceScores)
|
||||
)
|
||||
|
@ -45,7 +45,7 @@ from docling_core.types.legacy_doc.document import CCSFileInfoObject as DsFileIn
|
||||
from docling_core.types.legacy_doc.document import ExportedCCSDocument as DsDocument
|
||||
from docling_core.utils.file import resolve_source_to_stream
|
||||
from docling_core.utils.legacy import docling_document_to_legacy
|
||||
from pydantic import BaseModel
|
||||
from pydantic import BaseModel, Field
|
||||
from typing_extensions import deprecated
|
||||
|
||||
from docling.backend.abstract_backend import (
|
||||
@ -54,6 +54,7 @@ from docling.backend.abstract_backend import (
|
||||
)
|
||||
from docling.datamodel.base_models import (
|
||||
AssembledUnit,
|
||||
ConfidenceReport,
|
||||
ConversionStatus,
|
||||
DocumentStream,
|
||||
ErrorItem,
|
||||
@ -199,6 +200,7 @@ class ConversionResult(BaseModel):
|
||||
pages: List[Page] = []
|
||||
assembled: AssembledUnit = AssembledUnit()
|
||||
timings: Dict[str, ProfilingItem] = {}
|
||||
confidence: ConfidenceReport = Field(default_factory=ConfidenceReport)
|
||||
|
||||
document: DoclingDocument = _EMPTY_DOCLING_DOC
|
||||
|
||||
|
@ -4,6 +4,7 @@ import warnings
|
||||
from pathlib import Path
|
||||
from typing import Iterable, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
from docling_core.types.doc import DocItemLabel
|
||||
from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor
|
||||
from PIL import Image
|
||||
@ -184,6 +185,10 @@ class LayoutModel(BasePageModel):
|
||||
).postprocess()
|
||||
# processed_clusters, processed_cells = clusters, page.cells
|
||||
|
||||
conv_res.confidence.pages[page.page_no].layout_score = float(
|
||||
np.mean([c.confidence for c in processed_clusters])
|
||||
)
|
||||
|
||||
page.cells = processed_cells
|
||||
page.predictions.layout = LayoutPrediction(
|
||||
clusters=processed_clusters
|
||||
|
@ -2,6 +2,7 @@ import logging
|
||||
import re
|
||||
from typing import Iterable, List
|
||||
|
||||
import numpy as np
|
||||
from pydantic import BaseModel
|
||||
|
||||
from docling.datamodel.base_models import (
|
||||
@ -157,4 +158,15 @@ class PageAssembleModel(BasePageModel):
|
||||
elements=elements, headers=headers, body=body
|
||||
)
|
||||
|
||||
# Aggregate page score
|
||||
scores = conv_res.confidence.pages[page.page_no]
|
||||
scores.overall_score = float(np.nanmean(
|
||||
[
|
||||
scores.ocr_score,
|
||||
scores.table_score,
|
||||
scores.layout_score,
|
||||
scores.parse_score,
|
||||
]
|
||||
))
|
||||
|
||||
yield page
|
||||
|
@ -4,11 +4,12 @@ import warnings
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
from docling_core.types.doc import DocItem, ImageRef, PictureItem, TableItem
|
||||
|
||||
from docling.backend.abstract_backend import AbstractDocumentBackend
|
||||
from docling.backend.pdf_backend import PdfDocumentBackend
|
||||
from docling.datamodel.base_models import AssembledUnit, Page
|
||||
from docling.datamodel.base_models import AssembledUnit, Page, PageConfidenceScores
|
||||
from docling.datamodel.document import ConversionResult
|
||||
from docling.datamodel.pipeline_options import PdfPipelineOptions
|
||||
from docling.datamodel.settings import settings
|
||||
@ -61,7 +62,7 @@ class StandardPdfPipeline(PaginatedPipeline):
|
||||
or self.pipeline_options.generate_table_images
|
||||
)
|
||||
|
||||
self.glm_model = ReadingOrderModel(options=ReadingOrderOptions())
|
||||
self.reading_order_model = ReadingOrderModel(options=ReadingOrderOptions())
|
||||
|
||||
ocr_model = self.get_ocr_model(artifacts_path=artifacts_path)
|
||||
|
||||
@ -198,7 +199,7 @@ class StandardPdfPipeline(PaginatedPipeline):
|
||||
elements=all_elements, headers=all_headers, body=all_body
|
||||
)
|
||||
|
||||
conv_res.document = self.glm_model(conv_res)
|
||||
conv_res.document = self.reading_order_model(conv_res)
|
||||
|
||||
# Generate page images in the output
|
||||
if self.pipeline_options.generate_page_images:
|
||||
@ -241,6 +242,35 @@ class StandardPdfPipeline(PaginatedPipeline):
|
||||
cropped_im, dpi=int(72 * scale)
|
||||
)
|
||||
|
||||
# Aggregate confidence values for document:
|
||||
if len(conv_res.pages) > 0:
|
||||
conv_res.confidence.layout_score = float(
|
||||
np.nanmean(
|
||||
[c.layout_score for c in conv_res.confidence.pages.values()]
|
||||
)
|
||||
)
|
||||
conv_res.confidence.parse_score = float(
|
||||
np.nanmean(
|
||||
[c.parse_score for c in conv_res.confidence.pages.values()]
|
||||
)
|
||||
)
|
||||
conv_res.confidence.table_score = float(
|
||||
np.nanmean(
|
||||
[c.table_score for c in conv_res.confidence.pages.values()]
|
||||
)
|
||||
)
|
||||
conv_res.confidence.ocr_score = float(
|
||||
np.nanmean(
|
||||
[c.ocr_score for c in conv_res.confidence.pages.values()]
|
||||
)
|
||||
)
|
||||
|
||||
conv_res.confidence.overall_score = float(
|
||||
np.nanmean(
|
||||
[c.overall_score for c in conv_res.confidence.pages.values()]
|
||||
)
|
||||
)
|
||||
|
||||
return conv_res
|
||||
|
||||
@classmethod
|
||||
|
Loading…
Reference in New Issue
Block a user