restore guess format

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi 2025-07-25 15:15:05 +02:00
parent bbb735d2de
commit 79c59cb2b0

View File

@ -325,15 +325,15 @@ class _DocumentConversionInput(BaseModel):
formats = MimeTypeToFormat.get(mime, []) formats = MimeTypeToFormat.get(mime, [])
_log.info(f"detected formats: {formats}") _log.info(f"detected formats: {formats}")
input_format: Optional[InputFormat] = None if formats:
if len(formats) == 1: if len(formats) == 1 and mime not in ("text/plain"):
input_format = formats[0] return formats[0]
else: # ambiguity in formats
if content: return _DocumentConversionInput._guess_from_content(
input_format = _DocumentConversionInput._guess_from_content( content, mime, formats
content, mime, formats )
) else:
return input_format return None
@staticmethod @staticmethod
def _guess_from_content( def _guess_from_content(
@ -342,9 +342,6 @@ class _DocumentConversionInput(BaseModel):
"""Guess the input format of a document by checking part of its content.""" """Guess the input format of a document by checking part of its content."""
input_format: Optional[InputFormat] = None input_format: Optional[InputFormat] = None
if len(formats) == 1:
input_format = formats[0]
if mime == "application/xml": if mime == "application/xml":
content_str = content.decode("utf-8") content_str = content.decode("utf-8")
match_doctype = re.search(r"<!DOCTYPE [^>]+>", content_str) match_doctype = re.search(r"<!DOCTYPE [^>]+>", content_str)