mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 20:14:47 +00:00
Merge branch 'main' into fix/fix-issue-with-detecting-docx-files
# Conflicts: # docling/datamodel/document.py
This commit is contained in:
commit
ec6bd87ab9
@ -303,6 +303,14 @@ class _DocumentConversionInput(BaseModel):
|
||||
else ""
|
||||
)
|
||||
mime = _DocumentConversionInput._mime_from_extension(ext.lower())
|
||||
if mime is not None and mime.lower() == "application/zip":
|
||||
objname = obj.name.lower()
|
||||
if objname.endswith(".xlsx"):
|
||||
mime = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||
elif objname.endswith(".docx"):
|
||||
mime = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
elif objname.endswith(".pptx"):
|
||||
mime = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
||||
|
||||
mime = mime or _DocumentConversionInput._detect_html_xhtml(content)
|
||||
mime = mime or _DocumentConversionInput._detect_csv(content)
|
||||
|
Loading…
Reference in New Issue
Block a user