From 48d5405db1b9141eaa7f228bacc46fc600f25f17 Mon Sep 17 00:00:00 2001 From: Tim Kellogg Date: Wed, 9 Apr 2025 06:56:03 -0400 Subject: [PATCH] bug: auto-recognize .xlsx files Signed-off-by: Tim Kellogg --- docling/datamodel/document.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docling/datamodel/document.py b/docling/datamodel/document.py index 43894b07..fcc37484 100644 --- a/docling/datamodel/document.py +++ b/docling/datamodel/document.py @@ -283,6 +283,9 @@ class _DocumentConversionInput(BaseModel): if mime is None: # must guess from with obj.open("rb") as f: content = f.read(1024) # Read first 1KB + if mime is not None and mime.lower() == "application/zip" and obj.suffixes[-1].lower() == ".xlsx": + mime = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + elif isinstance(obj, DocumentStream): content = obj.stream.read(8192)