From 2a63e828251b7f3421e135e96153eee06d15bac2 Mon Sep 17 00:00:00 2001 From: MoheyElDin Badr Date: Mon, 19 May 2025 07:48:40 +0300 Subject: [PATCH] fix detecting files with uppercase extensions Signed-off-by: MoheyElDin Badr --- docling/datamodel/document.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/docling/datamodel/document.py b/docling/datamodel/document.py index 2b3aa9b5..984cf02b 100644 --- a/docling/datamodel/document.py +++ b/docling/datamodel/document.py @@ -302,7 +302,7 @@ class _DocumentConversionInput(BaseModel): if ("." in obj.name and not obj.name.startswith(".")) else "" ) - mime = _DocumentConversionInput._mime_from_extension(ext) + mime = _DocumentConversionInput._mime_from_extension(ext.lower()) if mime is not None and mime.lower() == "application/zip": objname = obj.name.lower() if objname.endswith(".xlsx"): @@ -376,6 +376,13 @@ class _DocumentConversionInput(BaseModel): mime = FormatToMimeType[InputFormat.JSON_DOCLING][0] elif ext in FormatToExtensions[InputFormat.PDF]: mime = FormatToMimeType[InputFormat.PDF][0] + elif ext in FormatToExtensions[InputFormat.DOCX]: + mime = FormatToMimeType[InputFormat.DOCX][0] + elif ext in FormatToExtensions[InputFormat.PPTX]: + mime = FormatToMimeType[InputFormat.PPTX][0] + elif ext in FormatToExtensions[InputFormat.XLSX]: + mime = FormatToMimeType[InputFormat.XLSX][0] + return mime @staticmethod