fix: guess HTML content starting with script tag (#1673)

Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>
This commit is contained in:
Cesar Berrospi Ramis
2025-06-02 08:43:24 +02:00
committed by GitHub
parent 3942923125
commit 984cb137f6
2 changed files with 12 additions and 1 deletions

View File

@@ -412,7 +412,11 @@ class _DocumentConversionInput(BaseModel):
else:
return "application/xml"
if re.match(r"<!doctype\s+html|<html|<head|<body", content_str):
if re.match(
r"(<script.*?>.*?</script>\s*)?(<!doctype\s+html|<html|<head|<body)",
content_str,
re.DOTALL,
):
return "text/html"
p = re.compile(