From bcb29caf962cdda7afb6cd0c1fd13a69f3a223c7 Mon Sep 17 00:00:00 2001 From: MoheyElDin Badr <56153924+MoheyEl-DinBadr@users.noreply.github.com> Date: Tue, 6 May 2025 09:40:13 +0300 Subject: [PATCH 1/2] Update document.py add docx to the mime types, and for extentions lower the case so they can be compared if they came in Upper case Signed-off-by: MoheyElDin Badr <56153924+MoheyEl-DinBadr@users.noreply.github.com> --- docling/datamodel/document.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docling/datamodel/document.py b/docling/datamodel/document.py index 668e8249..64ff4ac8 100644 --- a/docling/datamodel/document.py +++ b/docling/datamodel/document.py @@ -302,7 +302,7 @@ class _DocumentConversionInput(BaseModel): if ("." in obj.name and not obj.name.startswith(".")) else "" ) - mime = _DocumentConversionInput._mime_from_extension(ext) + mime = _DocumentConversionInput._mime_from_extension(ext.lower()) mime = mime or _DocumentConversionInput._detect_html_xhtml(content) mime = mime or _DocumentConversionInput._detect_csv(content) @@ -368,6 +368,8 @@ class _DocumentConversionInput(BaseModel): mime = FormatToMimeType[InputFormat.JSON_DOCLING][0] elif ext in FormatToExtensions[InputFormat.PDF]: mime = FormatToMimeType[InputFormat.PDF][0] + elif ext in FormatToExtentions[InputFormat.DOCX]: + mime = FormatToMimeType[InputFormat.DOCX][0] return mime @staticmethod From dd6cb2056258ed200f596ed84f811564e05727da Mon Sep 17 00:00:00 2001 From: Mohey El-Din Badr Date: Wed, 7 May 2025 12:27:29 +0300 Subject: [PATCH 2/2] Add other types --- docling/datamodel/document.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docling/datamodel/document.py b/docling/datamodel/document.py index 5bb0352c..984cf02b 100644 --- a/docling/datamodel/document.py +++ b/docling/datamodel/document.py @@ -376,8 +376,13 @@ class _DocumentConversionInput(BaseModel): mime = FormatToMimeType[InputFormat.JSON_DOCLING][0] elif ext in FormatToExtensions[InputFormat.PDF]: mime = FormatToMimeType[InputFormat.PDF][0] - elif ext in FormatToExtentions[InputFormat.DOCX]: + elif ext in FormatToExtensions[InputFormat.DOCX]: mime = FormatToMimeType[InputFormat.DOCX][0] + elif ext in FormatToExtensions[InputFormat.PPTX]: + mime = FormatToMimeType[InputFormat.PPTX][0] + elif ext in FormatToExtensions[InputFormat.XLSX]: + mime = FormatToMimeType[InputFormat.XLSX][0] + return mime @staticmethod