mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 04:24:45 +00:00
apply to other ms office zip formats
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
parent
492680f468
commit
a09273ecb8
@ -283,14 +283,13 @@ class _DocumentConversionInput(BaseModel):
|
|||||||
if mime is None: # must guess from
|
if mime is None: # must guess from
|
||||||
with obj.open("rb") as f:
|
with obj.open("rb") as f:
|
||||||
content = f.read(1024) # Read first 1KB
|
content = f.read(1024) # Read first 1KB
|
||||||
if (
|
if mime is not None and mime.lower() == "application/zip":
|
||||||
mime is not None
|
if obj.suffixes[-1].lower() == ".xlsx":
|
||||||
and mime.lower() == "application/zip"
|
mime = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||||
and obj.suffixes[-1].lower() == ".xlsx"
|
elif obj.suffixes[-1].lower() == ".docx":
|
||||||
):
|
mime = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||||
mime = (
|
elif obj.suffixes[-1].lower() == ".pptx":
|
||||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
mime = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
||||||
)
|
|
||||||
|
|
||||||
elif isinstance(obj, DocumentStream):
|
elif isinstance(obj, DocumentStream):
|
||||||
content = obj.stream.read(8192)
|
content = obj.stream.read(8192)
|
||||||
|
Loading…
Reference in New Issue
Block a user