mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-01 15:02:21 +00:00
fix: Skip NavigableString in HTML parsing
Signed-off-by: higuhigu-lb <higuchi@lightblute-tech.com>
This commit is contained in:
parent
8ada0bccc7
commit
624b392e79
@ -3,7 +3,7 @@ from io import BytesIO
|
||||
from pathlib import Path
|
||||
from typing import Set, Union
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4 import BeautifulSoup, NavigableString
|
||||
from docling_core.types.doc import (
|
||||
DocItemLabel,
|
||||
DoclingDocument,
|
||||
@ -92,6 +92,8 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
||||
try:
|
||||
# Iterate over elements in the body of the document
|
||||
for idx, element in enumerate(element.children):
|
||||
if isinstance(element, NavigableString):
|
||||
continue # Skip over navigable strings
|
||||
try:
|
||||
self.analyse_element(element, idx, doc)
|
||||
except Exception as exc_child:
|
||||
|
Loading…
Reference in New Issue
Block a user