From 7885c1d75112bac0e47ee650f795ed22bf6381a0 Mon Sep 17 00:00:00 2001 From: MoheyElDin Badr <56153924+MoheyEl-DinBadr@users.noreply.github.com> Date: Tue, 6 May 2025 09:40:13 +0300 Subject: [PATCH] Update document.py add docx to the mime types, and for extentions lower the case so they can be compared if they came in Upper case Signed-off-by: MoheyElDin Badr <56153924+MoheyEl-DinBadr@users.noreply.github.com> --- docling/datamodel/document.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docling/datamodel/document.py b/docling/datamodel/document.py index 2b3aa9b5..5bb0352c 100644 --- a/docling/datamodel/document.py +++ b/docling/datamodel/document.py @@ -302,7 +302,7 @@ class _DocumentConversionInput(BaseModel): if ("." in obj.name and not obj.name.startswith(".")) else "" ) - mime = _DocumentConversionInput._mime_from_extension(ext) + mime = _DocumentConversionInput._mime_from_extension(ext.lower()) if mime is not None and mime.lower() == "application/zip": objname = obj.name.lower() if objname.endswith(".xlsx"): @@ -376,6 +376,8 @@ class _DocumentConversionInput(BaseModel): mime = FormatToMimeType[InputFormat.JSON_DOCLING][0] elif ext in FormatToExtensions[InputFormat.PDF]: mime = FormatToMimeType[InputFormat.PDF][0] + elif ext in FormatToExtentions[InputFormat.DOCX]: + mime = FormatToMimeType[InputFormat.DOCX][0] return mime @staticmethod