From 9d9ed0716f18ec879d058e6cf9a554f35a815cf0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o?= Date: Thu, 9 Jan 2025 17:49:20 -0300 Subject: [PATCH] add more file types when infering the mime type from extesion --- docling/datamodel/document.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docling/datamodel/document.py b/docling/datamodel/document.py index 136428e8..0483dc2a 100644 --- a/docling/datamodel/document.py +++ b/docling/datamodel/document.py @@ -350,6 +350,16 @@ class _DocumentConversionInput(BaseModel): mime = FormatToMimeType[InputFormat.HTML][0] elif ext in FormatToExtensions[InputFormat.MD]: mime = FormatToMimeType[InputFormat.MD][0] + + elif ext in FormatToExtensions[InputFormat.PDF]: + mime = FormatToMimeType[InputFormat.PDF][0] + elif ext in FormatToExtensions[InputFormat.DOCX]: + mime = FormatToMimeType[InputFormat.DOCX][0] + elif ext in FormatToExtensions[InputFormat.PPTX]: + mime = FormatToMimeType[InputFormat.PPTX][0] + elif ext in FormatToExtensions[InputFormat.XLSX]: + mime = FormatToMimeType[InputFormat.XLSX][0] + return mime @staticmethod