From bcb29caf962cdda7afb6cd0c1fd13a69f3a223c7 Mon Sep 17 00:00:00 2001 From: MoheyElDin Badr <56153924+MoheyEl-DinBadr@users.noreply.github.com> Date: Tue, 6 May 2025 09:40:13 +0300 Subject: [PATCH] Update document.py add docx to the mime types, and for extentions lower the case so they can be compared if they came in Upper case Signed-off-by: MoheyElDin Badr <56153924+MoheyEl-DinBadr@users.noreply.github.com> --- docling/datamodel/document.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docling/datamodel/document.py b/docling/datamodel/document.py index 668e8249..64ff4ac8 100644 --- a/docling/datamodel/document.py +++ b/docling/datamodel/document.py @@ -302,7 +302,7 @@ class _DocumentConversionInput(BaseModel): if ("." in obj.name and not obj.name.startswith(".")) else "" ) - mime = _DocumentConversionInput._mime_from_extension(ext) + mime = _DocumentConversionInput._mime_from_extension(ext.lower()) mime = mime or _DocumentConversionInput._detect_html_xhtml(content) mime = mime or _DocumentConversionInput._detect_csv(content) @@ -368,6 +368,8 @@ class _DocumentConversionInput(BaseModel): mime = FormatToMimeType[InputFormat.JSON_DOCLING][0] elif ext in FormatToExtensions[InputFormat.PDF]: mime = FormatToMimeType[InputFormat.PDF][0] + elif ext in FormatToExtentions[InputFormat.DOCX]: + mime = FormatToMimeType[InputFormat.DOCX][0] return mime @staticmethod