diff --git a/docling/datamodel/document.py b/docling/datamodel/document.py index 70d08b75..93dfd1a5 100644 --- a/docling/datamodel/document.py +++ b/docling/datamodel/document.py @@ -283,14 +283,13 @@ class _DocumentConversionInput(BaseModel): if mime is None: # must guess from with obj.open("rb") as f: content = f.read(1024) # Read first 1KB - if ( - mime is not None - and mime.lower() == "application/zip" - and obj.suffixes[-1].lower() == ".xlsx" - ): - mime = ( - "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" - ) + if mime is not None and mime.lower() == "application/zip": + if obj.suffixes[-1].lower() == ".xlsx": + mime = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + elif obj.suffixes[-1].lower() == ".docx": + mime = "application/vnd.openxmlformats-officedocument.wordprocessingml.document" + elif obj.suffixes[-1].lower() == ".pptx": + mime = "application/vnd.openxmlformats-officedocument.presentationml.presentation" elif isinstance(obj, DocumentStream): content = obj.stream.read(8192)