mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-01 23:12:20 +00:00
rename new status, populate ConversionResult errors
Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com>
This commit is contained in:
parent
4138110c6b
commit
8e57c85bf4
@ -22,7 +22,7 @@ class ConversionStatus(str, Enum):
|
|||||||
FAILURE = auto()
|
FAILURE = auto()
|
||||||
SUCCESS = auto()
|
SUCCESS = auto()
|
||||||
PARTIAL_SUCCESS = auto()
|
PARTIAL_SUCCESS = auto()
|
||||||
UNSUPPORTED = auto()
|
SKIPPED = auto()
|
||||||
|
|
||||||
|
|
||||||
class InputFormat(str, Enum):
|
class InputFormat(str, Enum):
|
||||||
@ -94,6 +94,7 @@ class DoclingComponentType(str, Enum):
|
|||||||
DOCUMENT_BACKEND = auto()
|
DOCUMENT_BACKEND = auto()
|
||||||
MODEL = auto()
|
MODEL = auto()
|
||||||
DOC_ASSEMBLER = auto()
|
DOC_ASSEMBLER = auto()
|
||||||
|
USER_INPUT = auto()
|
||||||
|
|
||||||
|
|
||||||
class ErrorItem(BaseModel):
|
class ErrorItem(BaseModel):
|
||||||
|
@ -15,7 +15,13 @@ from docling.backend.md_backend import MarkdownDocumentBackend
|
|||||||
from docling.backend.msexcel_backend import MsExcelDocumentBackend
|
from docling.backend.msexcel_backend import MsExcelDocumentBackend
|
||||||
from docling.backend.mspowerpoint_backend import MsPowerpointDocumentBackend
|
from docling.backend.mspowerpoint_backend import MsPowerpointDocumentBackend
|
||||||
from docling.backend.msword_backend import MsWordDocumentBackend
|
from docling.backend.msword_backend import MsWordDocumentBackend
|
||||||
from docling.datamodel.base_models import ConversionStatus, DocumentStream, InputFormat
|
from docling.datamodel.base_models import (
|
||||||
|
ConversionStatus,
|
||||||
|
DoclingComponentType,
|
||||||
|
DocumentStream,
|
||||||
|
ErrorItem,
|
||||||
|
InputFormat,
|
||||||
|
)
|
||||||
from docling.datamodel.document import (
|
from docling.datamodel.document import (
|
||||||
ConversionResult,
|
ConversionResult,
|
||||||
InputDocument,
|
InputDocument,
|
||||||
@ -262,11 +268,17 @@ class DocumentConverter:
|
|||||||
if valid:
|
if valid:
|
||||||
conv_res = self._execute_pipeline(in_doc, raises_on_error=raises_on_error)
|
conv_res = self._execute_pipeline(in_doc, raises_on_error=raises_on_error)
|
||||||
else:
|
else:
|
||||||
|
error_message = f"File format not allowed: {in_doc.file}"
|
||||||
if raises_on_error:
|
if raises_on_error:
|
||||||
raise ConversionError(f"Unsupported format in: {in_doc.file}")
|
raise ConversionError(error_message)
|
||||||
else:
|
else:
|
||||||
|
error_item = ErrorItem(
|
||||||
|
component_type=DoclingComponentType.USER_INPUT,
|
||||||
|
module_name="",
|
||||||
|
error_message=error_message,
|
||||||
|
)
|
||||||
conv_res = ConversionResult(
|
conv_res = ConversionResult(
|
||||||
input=in_doc, status=ConversionStatus.UNSUPPORTED
|
input=in_doc, status=ConversionStatus.SKIPPED, errors=[error_item]
|
||||||
)
|
)
|
||||||
|
|
||||||
return conv_res
|
return conv_res
|
||||||
|
@ -24,7 +24,7 @@ def test_convert_unsupported_doc_format_wout_exception(converter: DocumentConver
|
|||||||
result = converter.convert(
|
result = converter.convert(
|
||||||
DocumentStream(name="input.xyz", stream=BytesIO(b"xyz")), raises_on_error=False
|
DocumentStream(name="input.xyz", stream=BytesIO(b"xyz")), raises_on_error=False
|
||||||
)
|
)
|
||||||
assert result.status == ConversionStatus.UNSUPPORTED
|
assert result.status == ConversionStatus.SKIPPED
|
||||||
|
|
||||||
|
|
||||||
def test_convert_unsupported_doc_format_with_exception(converter: DocumentConverter):
|
def test_convert_unsupported_doc_format_with_exception(converter: DocumentConverter):
|
||||||
|
Loading…
Reference in New Issue
Block a user