Update document.py

add docx to the mime types, and for extentions lower the case so they can be compared if they came in Upper case

Signed-off-by: MoheyElDin Badr <56153924+MoheyEl-DinBadr@users.noreply.github.com>
This commit is contained in:
MoheyElDin Badr 2025-05-06 09:40:13 +03:00 committed by GitHub
parent 7c705739f9
commit bcb29caf96
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -302,7 +302,7 @@ class _DocumentConversionInput(BaseModel):
if ("." in obj.name and not obj.name.startswith(".")) if ("." in obj.name and not obj.name.startswith("."))
else "" else ""
) )
mime = _DocumentConversionInput._mime_from_extension(ext) mime = _DocumentConversionInput._mime_from_extension(ext.lower())
mime = mime or _DocumentConversionInput._detect_html_xhtml(content) mime = mime or _DocumentConversionInput._detect_html_xhtml(content)
mime = mime or _DocumentConversionInput._detect_csv(content) mime = mime or _DocumentConversionInput._detect_csv(content)
@ -368,6 +368,8 @@ class _DocumentConversionInput(BaseModel):
mime = FormatToMimeType[InputFormat.JSON_DOCLING][0] mime = FormatToMimeType[InputFormat.JSON_DOCLING][0]
elif ext in FormatToExtensions[InputFormat.PDF]: elif ext in FormatToExtensions[InputFormat.PDF]:
mime = FormatToMimeType[InputFormat.PDF][0] mime = FormatToMimeType[InputFormat.PDF][0]
elif ext in FormatToExtentions[InputFormat.DOCX]:
mime = FormatToMimeType[InputFormat.DOCX][0]
return mime return mime
@staticmethod @staticmethod