rename new status, populate ConversionResult errors

Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com>
This commit is contained in:
Panos Vagenas 2024-12-02 13:32:05 +01:00
parent 4138110c6b
commit 8e57c85bf4
3 changed files with 18 additions and 5 deletions

View File

@ -22,7 +22,7 @@ class ConversionStatus(str, Enum):
FAILURE = auto()
SUCCESS = auto()
PARTIAL_SUCCESS = auto()
UNSUPPORTED = auto()
SKIPPED = auto()
class InputFormat(str, Enum):
@ -94,6 +94,7 @@ class DoclingComponentType(str, Enum):
DOCUMENT_BACKEND = auto()
MODEL = auto()
DOC_ASSEMBLER = auto()
USER_INPUT = auto()
class ErrorItem(BaseModel):

View File

@ -15,7 +15,13 @@ from docling.backend.md_backend import MarkdownDocumentBackend
from docling.backend.msexcel_backend import MsExcelDocumentBackend
from docling.backend.mspowerpoint_backend import MsPowerpointDocumentBackend
from docling.backend.msword_backend import MsWordDocumentBackend
from docling.datamodel.base_models import ConversionStatus, DocumentStream, InputFormat
from docling.datamodel.base_models import (
ConversionStatus,
DoclingComponentType,
DocumentStream,
ErrorItem,
InputFormat,
)
from docling.datamodel.document import (
ConversionResult,
InputDocument,
@ -262,11 +268,17 @@ class DocumentConverter:
if valid:
conv_res = self._execute_pipeline(in_doc, raises_on_error=raises_on_error)
else:
error_message = f"File format not allowed: {in_doc.file}"
if raises_on_error:
raise ConversionError(f"Unsupported format in: {in_doc.file}")
raise ConversionError(error_message)
else:
error_item = ErrorItem(
component_type=DoclingComponentType.USER_INPUT,
module_name="",
error_message=error_message,
)
conv_res = ConversionResult(
input=in_doc, status=ConversionStatus.UNSUPPORTED
input=in_doc, status=ConversionStatus.SKIPPED, errors=[error_item]
)
return conv_res

View File

@ -24,7 +24,7 @@ def test_convert_unsupported_doc_format_wout_exception(converter: DocumentConver
result = converter.convert(
DocumentStream(name="input.xyz", stream=BytesIO(b"xyz")), raises_on_error=False
)
assert result.status == ConversionStatus.UNSUPPORTED
assert result.status == ConversionStatus.SKIPPED
def test_convert_unsupported_doc_format_with_exception(converter: DocumentConverter):