mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-02 15:32:30 +00:00
disabled auto file mime type detection, rely on extension
This commit is contained in:
parent
9d9ed0716f
commit
47c21a5edc
@ -273,7 +273,8 @@ class _DocumentConversionInput(BaseModel):
|
|||||||
formats: list[InputFormat] = []
|
formats: list[InputFormat] = []
|
||||||
|
|
||||||
if isinstance(obj, Path):
|
if isinstance(obj, Path):
|
||||||
mime = filetype.guess_mime(str(obj))
|
# mime = filetype.guess_mime(str(obj)) # We're having too much conflicts with documents being worngly classified as ZIP
|
||||||
|
mime = None
|
||||||
if mime is None:
|
if mime is None:
|
||||||
ext = obj.suffix[1:]
|
ext = obj.suffix[1:]
|
||||||
mime = _DocumentConversionInput._mime_from_extension(ext)
|
mime = _DocumentConversionInput._mime_from_extension(ext)
|
||||||
@ -359,7 +360,7 @@ class _DocumentConversionInput(BaseModel):
|
|||||||
mime = FormatToMimeType[InputFormat.PPTX][0]
|
mime = FormatToMimeType[InputFormat.PPTX][0]
|
||||||
elif ext in FormatToExtensions[InputFormat.XLSX]:
|
elif ext in FormatToExtensions[InputFormat.XLSX]:
|
||||||
mime = FormatToMimeType[InputFormat.XLSX][0]
|
mime = FormatToMimeType[InputFormat.XLSX][0]
|
||||||
|
|
||||||
return mime
|
return mime
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
Loading…
Reference in New Issue
Block a user