mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 12:48:28 +00:00
feat: Establish confidence estimation for document and pages (#1313)
* Establish confidence field, propagate layout confidence through Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Add OCR confidence and parse confidence (stub) Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Add parse quality rules, use 5% percentile for overall and parse scores Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Heuristic updates Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Fix garbage regex Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Move grade to page Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Introduce mean_score and low_score, consistent aggregate computations Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Add confidence test Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
@@ -7,7 +7,7 @@ from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
|
||||
from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
|
||||
from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
|
||||
from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
|
||||
from docling.datamodel.base_models import ConversionStatus, InputFormat
|
||||
from docling.datamodel.base_models import ConversionStatus, InputFormat, QualityGrade
|
||||
from docling.datamodel.document import ConversionResult
|
||||
from docling.datamodel.pipeline_options import (
|
||||
AcceleratorDevice,
|
||||
@@ -163,3 +163,11 @@ def test_parser_backends(test_doc_path):
|
||||
doc_result: ConversionResult = converter.convert(test_doc_path)
|
||||
|
||||
assert doc_result.status == ConversionStatus.SUCCESS
|
||||
|
||||
|
||||
def test_confidence(test_doc_path):
|
||||
converter = DocumentConverter()
|
||||
doc_result: ConversionResult = converter.convert(test_doc_path, page_range=(6, 9))
|
||||
|
||||
assert doc_result.confidence.mean_grade == QualityGrade.EXCELLENT
|
||||
assert doc_result.confidence.low_grade == QualityGrade.EXCELLENT
|
||||
|
||||
Reference in New Issue
Block a user