mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 12:34:22 +00:00
Establish confidence field, propagate layout confidence through
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
parent
bfcab3d677
commit
c907af0928
@ -1,6 +1,9 @@
|
|||||||
|
import math
|
||||||
|
from collections import defaultdict
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import TYPE_CHECKING, Dict, List, Optional, Union
|
from typing import TYPE_CHECKING, Annotated, Dict, List, Literal, Optional, Union
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
from docling_core.types.doc import (
|
from docling_core.types.doc import (
|
||||||
BoundingBox,
|
BoundingBox,
|
||||||
DocItemLabel,
|
DocItemLabel,
|
||||||
@ -14,7 +17,7 @@ from docling_core.types.io import ( # DO ΝΟΤ REMOVE; explicitly exposed from
|
|||||||
DocumentStream,
|
DocumentStream,
|
||||||
)
|
)
|
||||||
from PIL.Image import Image
|
from PIL.Image import Image
|
||||||
from pydantic import BaseModel, ConfigDict
|
from pydantic import BaseModel, ConfigDict, Field
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from docling.backend.pdf_backend import PdfPageBackend
|
from docling.backend.pdf_backend import PdfPageBackend
|
||||||
@ -262,3 +265,22 @@ class Page(BaseModel):
|
|||||||
@property
|
@property
|
||||||
def image(self) -> Optional[Image]:
|
def image(self) -> Optional[Image]:
|
||||||
return self.get_image(scale=self._default_image_scale)
|
return self.get_image(scale=self._default_image_scale)
|
||||||
|
|
||||||
|
|
||||||
|
# Create a type alias for score values
|
||||||
|
ScoreValue = float
|
||||||
|
|
||||||
|
|
||||||
|
class PageConfidenceScores(BaseModel):
|
||||||
|
overall_score: ScoreValue = np.nan
|
||||||
|
|
||||||
|
parse_score: ScoreValue = np.nan
|
||||||
|
layout_score: ScoreValue = np.nan
|
||||||
|
table_score: ScoreValue = np.nan
|
||||||
|
ocr_score: ScoreValue = np.nan
|
||||||
|
|
||||||
|
|
||||||
|
class ConfidenceReport(PageConfidenceScores):
|
||||||
|
pages: Dict[int, PageConfidenceScores] = Field(
|
||||||
|
default_factory=lambda: defaultdict(PageConfidenceScores)
|
||||||
|
)
|
||||||
|
@ -45,7 +45,7 @@ from docling_core.types.legacy_doc.document import CCSFileInfoObject as DsFileIn
|
|||||||
from docling_core.types.legacy_doc.document import ExportedCCSDocument as DsDocument
|
from docling_core.types.legacy_doc.document import ExportedCCSDocument as DsDocument
|
||||||
from docling_core.utils.file import resolve_source_to_stream
|
from docling_core.utils.file import resolve_source_to_stream
|
||||||
from docling_core.utils.legacy import docling_document_to_legacy
|
from docling_core.utils.legacy import docling_document_to_legacy
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel, Field
|
||||||
from typing_extensions import deprecated
|
from typing_extensions import deprecated
|
||||||
|
|
||||||
from docling.backend.abstract_backend import (
|
from docling.backend.abstract_backend import (
|
||||||
@ -54,6 +54,7 @@ from docling.backend.abstract_backend import (
|
|||||||
)
|
)
|
||||||
from docling.datamodel.base_models import (
|
from docling.datamodel.base_models import (
|
||||||
AssembledUnit,
|
AssembledUnit,
|
||||||
|
ConfidenceReport,
|
||||||
ConversionStatus,
|
ConversionStatus,
|
||||||
DocumentStream,
|
DocumentStream,
|
||||||
ErrorItem,
|
ErrorItem,
|
||||||
@ -199,6 +200,7 @@ class ConversionResult(BaseModel):
|
|||||||
pages: List[Page] = []
|
pages: List[Page] = []
|
||||||
assembled: AssembledUnit = AssembledUnit()
|
assembled: AssembledUnit = AssembledUnit()
|
||||||
timings: Dict[str, ProfilingItem] = {}
|
timings: Dict[str, ProfilingItem] = {}
|
||||||
|
confidence: ConfidenceReport = Field(default_factory=ConfidenceReport)
|
||||||
|
|
||||||
document: DoclingDocument = _EMPTY_DOCLING_DOC
|
document: DoclingDocument = _EMPTY_DOCLING_DOC
|
||||||
|
|
||||||
|
@ -4,6 +4,7 @@ import warnings
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Iterable, Optional, Union
|
from typing import Iterable, Optional, Union
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
from docling_core.types.doc import DocItemLabel
|
from docling_core.types.doc import DocItemLabel
|
||||||
from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor
|
from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
@ -184,6 +185,10 @@ class LayoutModel(BasePageModel):
|
|||||||
).postprocess()
|
).postprocess()
|
||||||
# processed_clusters, processed_cells = clusters, page.cells
|
# processed_clusters, processed_cells = clusters, page.cells
|
||||||
|
|
||||||
|
conv_res.confidence.pages[page.page_no].layout_score = float(
|
||||||
|
np.mean([c.confidence for c in processed_clusters])
|
||||||
|
)
|
||||||
|
|
||||||
page.cells = processed_cells
|
page.cells = processed_cells
|
||||||
page.predictions.layout = LayoutPrediction(
|
page.predictions.layout = LayoutPrediction(
|
||||||
clusters=processed_clusters
|
clusters=processed_clusters
|
||||||
|
@ -2,6 +2,7 @@ import logging
|
|||||||
import re
|
import re
|
||||||
from typing import Iterable, List
|
from typing import Iterable, List
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from docling.datamodel.base_models import (
|
from docling.datamodel.base_models import (
|
||||||
@ -157,4 +158,15 @@ class PageAssembleModel(BasePageModel):
|
|||||||
elements=elements, headers=headers, body=body
|
elements=elements, headers=headers, body=body
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Aggregate page score
|
||||||
|
scores = conv_res.confidence.pages[page.page_no]
|
||||||
|
scores.overall_score = float(np.nanmean(
|
||||||
|
[
|
||||||
|
scores.ocr_score,
|
||||||
|
scores.table_score,
|
||||||
|
scores.layout_score,
|
||||||
|
scores.parse_score,
|
||||||
|
]
|
||||||
|
))
|
||||||
|
|
||||||
yield page
|
yield page
|
||||||
|
@ -4,11 +4,12 @@ import warnings
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
from docling_core.types.doc import DocItem, ImageRef, PictureItem, TableItem
|
from docling_core.types.doc import DocItem, ImageRef, PictureItem, TableItem
|
||||||
|
|
||||||
from docling.backend.abstract_backend import AbstractDocumentBackend
|
from docling.backend.abstract_backend import AbstractDocumentBackend
|
||||||
from docling.backend.pdf_backend import PdfDocumentBackend
|
from docling.backend.pdf_backend import PdfDocumentBackend
|
||||||
from docling.datamodel.base_models import AssembledUnit, Page
|
from docling.datamodel.base_models import AssembledUnit, Page, PageConfidenceScores
|
||||||
from docling.datamodel.document import ConversionResult
|
from docling.datamodel.document import ConversionResult
|
||||||
from docling.datamodel.pipeline_options import PdfPipelineOptions
|
from docling.datamodel.pipeline_options import PdfPipelineOptions
|
||||||
from docling.datamodel.settings import settings
|
from docling.datamodel.settings import settings
|
||||||
@ -61,7 +62,7 @@ class StandardPdfPipeline(PaginatedPipeline):
|
|||||||
or self.pipeline_options.generate_table_images
|
or self.pipeline_options.generate_table_images
|
||||||
)
|
)
|
||||||
|
|
||||||
self.glm_model = ReadingOrderModel(options=ReadingOrderOptions())
|
self.reading_order_model = ReadingOrderModel(options=ReadingOrderOptions())
|
||||||
|
|
||||||
ocr_model = self.get_ocr_model(artifacts_path=artifacts_path)
|
ocr_model = self.get_ocr_model(artifacts_path=artifacts_path)
|
||||||
|
|
||||||
@ -198,7 +199,7 @@ class StandardPdfPipeline(PaginatedPipeline):
|
|||||||
elements=all_elements, headers=all_headers, body=all_body
|
elements=all_elements, headers=all_headers, body=all_body
|
||||||
)
|
)
|
||||||
|
|
||||||
conv_res.document = self.glm_model(conv_res)
|
conv_res.document = self.reading_order_model(conv_res)
|
||||||
|
|
||||||
# Generate page images in the output
|
# Generate page images in the output
|
||||||
if self.pipeline_options.generate_page_images:
|
if self.pipeline_options.generate_page_images:
|
||||||
@ -241,6 +242,35 @@ class StandardPdfPipeline(PaginatedPipeline):
|
|||||||
cropped_im, dpi=int(72 * scale)
|
cropped_im, dpi=int(72 * scale)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Aggregate confidence values for document:
|
||||||
|
if len(conv_res.pages) > 0:
|
||||||
|
conv_res.confidence.layout_score = float(
|
||||||
|
np.nanmean(
|
||||||
|
[c.layout_score for c in conv_res.confidence.pages.values()]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
conv_res.confidence.parse_score = float(
|
||||||
|
np.nanmean(
|
||||||
|
[c.parse_score for c in conv_res.confidence.pages.values()]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
conv_res.confidence.table_score = float(
|
||||||
|
np.nanmean(
|
||||||
|
[c.table_score for c in conv_res.confidence.pages.values()]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
conv_res.confidence.ocr_score = float(
|
||||||
|
np.nanmean(
|
||||||
|
[c.ocr_score for c in conv_res.confidence.pages.values()]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
conv_res.confidence.overall_score = float(
|
||||||
|
np.nanmean(
|
||||||
|
[c.overall_score for c in conv_res.confidence.pages.values()]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
return conv_res
|
return conv_res
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
Loading…
Reference in New Issue
Block a user