mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 20:14:47 +00:00
Update document.py
add docx to the mime types, and for extentions lower the case so they can be compared if they came in Upper case Signed-off-by: MoheyElDin Badr <56153924+MoheyEl-DinBadr@users.noreply.github.com>
This commit is contained in:
parent
7c705739f9
commit
bcb29caf96
@ -302,7 +302,7 @@ class _DocumentConversionInput(BaseModel):
|
|||||||
if ("." in obj.name and not obj.name.startswith("."))
|
if ("." in obj.name and not obj.name.startswith("."))
|
||||||
else ""
|
else ""
|
||||||
)
|
)
|
||||||
mime = _DocumentConversionInput._mime_from_extension(ext)
|
mime = _DocumentConversionInput._mime_from_extension(ext.lower())
|
||||||
|
|
||||||
mime = mime or _DocumentConversionInput._detect_html_xhtml(content)
|
mime = mime or _DocumentConversionInput._detect_html_xhtml(content)
|
||||||
mime = mime or _DocumentConversionInput._detect_csv(content)
|
mime = mime or _DocumentConversionInput._detect_csv(content)
|
||||||
@ -368,6 +368,8 @@ class _DocumentConversionInput(BaseModel):
|
|||||||
mime = FormatToMimeType[InputFormat.JSON_DOCLING][0]
|
mime = FormatToMimeType[InputFormat.JSON_DOCLING][0]
|
||||||
elif ext in FormatToExtensions[InputFormat.PDF]:
|
elif ext in FormatToExtensions[InputFormat.PDF]:
|
||||||
mime = FormatToMimeType[InputFormat.PDF][0]
|
mime = FormatToMimeType[InputFormat.PDF][0]
|
||||||
|
elif ext in FormatToExtentions[InputFormat.DOCX]:
|
||||||
|
mime = FormatToMimeType[InputFormat.DOCX][0]
|
||||||
return mime
|
return mime
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
Loading…
Reference in New Issue
Block a user