diff --git a/docling/backend/msexcel_backend.py b/docling/backend/msexcel_backend.py index 2e6e2077..cc2e4106 100644 --- a/docling/backend/msexcel_backend.py +++ b/docling/backend/msexcel_backend.py @@ -76,8 +76,7 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend): origin = DocumentOrigin( filename=self.file.name or "file", - # mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", - mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document", + mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", binary_hash=self.document_hash, ) diff --git a/docling/datamodel/base_models.py b/docling/datamodel/base_models.py index 9065c9df..311d6d01 100644 --- a/docling/datamodel/base_models.py +++ b/docling/datamodel/base_models.py @@ -32,7 +32,7 @@ class InputFormat(str, Enum): PDF = "pdf" ASCIIDOC = "asciidoc" MD = "md" - EXCEL = "excel" + XLSX = "xlsx" class OutputFormat(str, Enum): @@ -50,7 +50,7 @@ FormatToExtensions: Dict[InputFormat, List[str]] = { InputFormat.HTML: ["html", "htm", "xhtml"], InputFormat.IMAGE: ["jpg", "jpeg", "png", "tif", "tiff", "bmp"], InputFormat.ASCIIDOC: ["adoc", "asciidoc", "asc"], - InputFormat.EXCEL: ["xlsx"], + InputFormat.XLSX: ["xlsx"], } FormatToMimeType: Dict[InputFormat, List[str]] = { @@ -74,7 +74,7 @@ FormatToMimeType: Dict[InputFormat, List[str]] = { InputFormat.PDF: ["application/pdf"], InputFormat.ASCIIDOC: ["text/asciidoc"], InputFormat.MD: ["text/markdown", "text/x-markdown"], - InputFormat.EXCEL: [ + InputFormat.XLSX: [ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ], }