mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-15 16:18:22 +00:00
feat: Page-level error reporting from PDF backend, introduce PARTIAL_SUCCESS status (#47)
* Put safety-checks for failed parse of pages Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Introduce page-level error checks Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Bump to docling-parse 1.1.1 Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Introduce page-level error checks Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
@@ -16,7 +16,7 @@ class ConversionStatus(str, Enum):
|
||||
STARTED = auto()
|
||||
FAILURE = auto()
|
||||
SUCCESS = auto()
|
||||
SUCCESS_WITH_ERRORS = auto()
|
||||
PARTIAL_SUCCESS = auto()
|
||||
|
||||
|
||||
class DocInputType(str, Enum):
|
||||
@@ -29,6 +29,18 @@ class CoordOrigin(str, Enum):
|
||||
BOTTOMLEFT = auto()
|
||||
|
||||
|
||||
class DoclingComponentType(str, Enum):
|
||||
PDF_BACKEND = auto()
|
||||
MODEL = auto()
|
||||
DOC_ASSEMBLER = auto()
|
||||
|
||||
|
||||
class ErrorItem(BaseModel):
|
||||
component_type: DoclingComponentType
|
||||
module_name: str
|
||||
error_message: str
|
||||
|
||||
|
||||
class PageSize(BaseModel):
|
||||
width: float = 0.0
|
||||
height: float = 0.0
|
||||
|
||||
@@ -19,6 +19,7 @@ from docling.datamodel.base_models import (
|
||||
AssembledUnit,
|
||||
ConversionStatus,
|
||||
DocumentStream,
|
||||
ErrorItem,
|
||||
FigureElement,
|
||||
Page,
|
||||
PageElement,
|
||||
@@ -118,7 +119,7 @@ class ConvertedDocument(BaseModel):
|
||||
input: InputDocument
|
||||
|
||||
status: ConversionStatus = ConversionStatus.PENDING # failure, success
|
||||
errors: List[Dict] = [] # structure to keep errors
|
||||
errors: List[ErrorItem] = [] # structure to keep errors
|
||||
|
||||
pages: List[Page] = []
|
||||
assembled: Optional[AssembledUnit] = None
|
||||
|
||||
Reference in New Issue
Block a user