mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-10 13:48:13 +00:00
fix: guess HTML content starting with script tag (#1673)
Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
3942923125
commit
984cb137f6
@@ -412,7 +412,11 @@ class _DocumentConversionInput(BaseModel):
|
||||
else:
|
||||
return "application/xml"
|
||||
|
||||
if re.match(r"<!doctype\s+html|<html|<head|<body", content_str):
|
||||
if re.match(
|
||||
r"(<script.*?>.*?</script>\s*)?(<!doctype\s+html|<html|<head|<body)",
|
||||
content_str,
|
||||
re.DOTALL,
|
||||
):
|
||||
return "text/html"
|
||||
|
||||
p = re.compile(
|
||||
|
||||
Reference in New Issue
Block a user