rename new status, populate ConversionResult errors

Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com>
This commit is contained in:
Panos Vagenas 2024-12-02 13:32:05 +01:00
parent 4138110c6b
commit 8e57c85bf4
3 changed files with 18 additions and 5 deletions

View File

@ -22,7 +22,7 @@ class ConversionStatus(str, Enum):
FAILURE = auto() FAILURE = auto()
SUCCESS = auto() SUCCESS = auto()
PARTIAL_SUCCESS = auto() PARTIAL_SUCCESS = auto()
UNSUPPORTED = auto() SKIPPED = auto()
class InputFormat(str, Enum): class InputFormat(str, Enum):
@ -94,6 +94,7 @@ class DoclingComponentType(str, Enum):
DOCUMENT_BACKEND = auto() DOCUMENT_BACKEND = auto()
MODEL = auto() MODEL = auto()
DOC_ASSEMBLER = auto() DOC_ASSEMBLER = auto()
USER_INPUT = auto()
class ErrorItem(BaseModel): class ErrorItem(BaseModel):

View File

@ -15,7 +15,13 @@ from docling.backend.md_backend import MarkdownDocumentBackend
from docling.backend.msexcel_backend import MsExcelDocumentBackend from docling.backend.msexcel_backend import MsExcelDocumentBackend
from docling.backend.mspowerpoint_backend import MsPowerpointDocumentBackend from docling.backend.mspowerpoint_backend import MsPowerpointDocumentBackend
from docling.backend.msword_backend import MsWordDocumentBackend from docling.backend.msword_backend import MsWordDocumentBackend
from docling.datamodel.base_models import ConversionStatus, DocumentStream, InputFormat from docling.datamodel.base_models import (
ConversionStatus,
DoclingComponentType,
DocumentStream,
ErrorItem,
InputFormat,
)
from docling.datamodel.document import ( from docling.datamodel.document import (
ConversionResult, ConversionResult,
InputDocument, InputDocument,
@ -262,11 +268,17 @@ class DocumentConverter:
if valid: if valid:
conv_res = self._execute_pipeline(in_doc, raises_on_error=raises_on_error) conv_res = self._execute_pipeline(in_doc, raises_on_error=raises_on_error)
else: else:
error_message = f"File format not allowed: {in_doc.file}"
if raises_on_error: if raises_on_error:
raise ConversionError(f"Unsupported format in: {in_doc.file}") raise ConversionError(error_message)
else: else:
error_item = ErrorItem(
component_type=DoclingComponentType.USER_INPUT,
module_name="",
error_message=error_message,
)
conv_res = ConversionResult( conv_res = ConversionResult(
input=in_doc, status=ConversionStatus.UNSUPPORTED input=in_doc, status=ConversionStatus.SKIPPED, errors=[error_item]
) )
return conv_res return conv_res

View File

@ -24,7 +24,7 @@ def test_convert_unsupported_doc_format_wout_exception(converter: DocumentConver
result = converter.convert( result = converter.convert(
DocumentStream(name="input.xyz", stream=BytesIO(b"xyz")), raises_on_error=False DocumentStream(name="input.xyz", stream=BytesIO(b"xyz")), raises_on_error=False
) )
assert result.status == ConversionStatus.UNSUPPORTED assert result.status == ConversionStatus.SKIPPED
def test_convert_unsupported_doc_format_with_exception(converter: DocumentConverter): def test_convert_unsupported_doc_format_with_exception(converter: DocumentConverter):