mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 04:24:45 +00:00
another fix to the tests
Signed-off-by: Peter Staar <taa@zurich.ibm.com>
This commit is contained in:
parent
311640fb9d
commit
9e54a74410
@ -214,7 +214,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
||||
except:
|
||||
pass
|
||||
except:
|
||||
_log.warn("item has no children")
|
||||
_log.warning("item has no children")
|
||||
pass
|
||||
|
||||
return "".join(result) + " "
|
||||
@ -352,14 +352,14 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
||||
parent=self.parents[self.level],
|
||||
)
|
||||
else:
|
||||
_log.warn("list-item has no text: ", element)
|
||||
_log.debug("list-item has no text: ", element)
|
||||
|
||||
def handle_table(self, element, idx, doc):
|
||||
"""Handles table tags."""
|
||||
|
||||
nested_tables = element.find("table")
|
||||
if nested_tables is not None:
|
||||
_log.warn("detected nested tables: skipping for now")
|
||||
_log.warning("detected nested tables: skipping for now")
|
||||
return
|
||||
|
||||
# Count the number of rows (number of <tr> elements)
|
||||
@ -398,10 +398,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
||||
try:
|
||||
text = self.extract_table_cell_text(html_cell)
|
||||
except Exception as exc:
|
||||
_log.warn("exception: ", exc)
|
||||
exit(-1)
|
||||
|
||||
# label = html_cell.name
|
||||
_log.warning("exception: ", exc)
|
||||
|
||||
col_span = int(html_cell.get("colspan", 1))
|
||||
row_span = int(html_cell.get("rowspan", 1))
|
||||
@ -469,49 +466,49 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
||||
fig_ref = None
|
||||
|
||||
img = element.find(["img"])
|
||||
_log.info(img)
|
||||
|
||||
if img is not None and img.has_attr("src"):
|
||||
fig_uri = img["src"]
|
||||
_log.info(fig_uri)
|
||||
|
||||
dpi = 128
|
||||
if fig_uri.startswith("//"):
|
||||
fig_uri = "https:" + fig_uri
|
||||
|
||||
dpi: int = 128
|
||||
try:
|
||||
dpi = int(img["dpi"])
|
||||
except:
|
||||
_log.debug("could not identify `dpi` of image")
|
||||
|
||||
width = 128
|
||||
width: float = 128.0
|
||||
try:
|
||||
width = int(img["width"])
|
||||
width = float(img["width"])
|
||||
except:
|
||||
_log.debug("could not identify `width` of image")
|
||||
|
||||
height = 128
|
||||
height: float = 128.0
|
||||
try:
|
||||
height = int(img["height"])
|
||||
height = float(img["height"])
|
||||
except:
|
||||
_log.debug("could not identify `height` of image")
|
||||
|
||||
if fig_uri.endswith(".jpg"):
|
||||
fig_ref = ImageRef(
|
||||
mimetype="image/jpg", dpi=dpi, size=Size(width, height), uri=fig_uri
|
||||
)
|
||||
size = Size(width=width, height=height)
|
||||
|
||||
elif fig_uri.endswith(".jpeg"):
|
||||
if fig_uri.endswith(".jpg") or fig_uri.endswith(".jpeg"):
|
||||
fig_ref = ImageRef(
|
||||
mimetype="image/jpg", dpi=dpi, size=Size(width, height), uri=fig_uri
|
||||
mimetype="image/jpeg", dpi=dpi, size=size, uri=fig_uri
|
||||
)
|
||||
|
||||
elif fig_uri.endswith(".png"):
|
||||
fig_ref = ImageRef(
|
||||
mimetype="image/png", dpi=dpi, size=Size(width, height), uri=fig_uri
|
||||
mimetype="image/png", dpi=dpi, size=size, uri=fig_uri
|
||||
)
|
||||
|
||||
elif fig_uri.endswith(".svg"):
|
||||
fig_ref = ImageRef(
|
||||
mimetype="image/svg", dpi=dpi, size=Size(width, height), uri=fig_uri
|
||||
mimetype="image/svg", dpi=dpi, size=size, uri=fig_uri
|
||||
)
|
||||
else:
|
||||
_log.debug(f"We do not yet support uri of type: {fig_uri}")
|
||||
|
||||
return fig_ref
|
||||
|
||||
@ -537,8 +534,6 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
||||
fig_ref = self._get_imageref(element)
|
||||
fig_caption = self._get_figcaption(element, doc)
|
||||
|
||||
_log.warn(fig_ref)
|
||||
|
||||
doc.add_picture(
|
||||
parent=self.parents[self.level], image=fig_ref, caption=fig_caption
|
||||
)
|
||||
|
@ -5906,6 +5906,15 @@
|
||||
],
|
||||
"references": [],
|
||||
"footnotes": [],
|
||||
"image": {
|
||||
"mimetype": "image/jpeg",
|
||||
"dpi": 128,
|
||||
"size": {
|
||||
"width": 220.0,
|
||||
"height": 205.0
|
||||
},
|
||||
"uri": "https://upload.wikimedia.org/wikipedia/commons/thumb/8/82/Pacific_Black_Ducks_on_pond_ducking.jpg/220px-Pacific_Black_Ducks_on_pond_ducking.jpg"
|
||||
},
|
||||
"annotations": []
|
||||
},
|
||||
{
|
||||
@ -5923,6 +5932,15 @@
|
||||
],
|
||||
"references": [],
|
||||
"footnotes": [],
|
||||
"image": {
|
||||
"mimetype": "image/jpeg",
|
||||
"dpi": 128,
|
||||
"size": {
|
||||
"width": 220.0,
|
||||
"height": 214.0
|
||||
},
|
||||
"uri": "https://upload.wikimedia.org/wikipedia/commons/thumb/7/78/Mallard-drake-chicago-march-2024.jpg/220px-Mallard-drake-chicago-march-2024.jpg"
|
||||
},
|
||||
"annotations": []
|
||||
},
|
||||
{
|
||||
@ -5940,6 +5958,15 @@
|
||||
],
|
||||
"references": [],
|
||||
"footnotes": [],
|
||||
"image": {
|
||||
"mimetype": "image/jpeg",
|
||||
"dpi": 128,
|
||||
"size": {
|
||||
"width": 220.0,
|
||||
"height": 128.0
|
||||
},
|
||||
"uri": "https://upload.wikimedia.org/wikipedia/commons/thumb/7/7d/Wood-ducks-male-female-chicago-march-2024.jpg/220px-Wood-ducks-male-female-chicago-march-2024.jpg"
|
||||
},
|
||||
"annotations": []
|
||||
},
|
||||
{
|
||||
@ -5957,6 +5984,15 @@
|
||||
],
|
||||
"references": [],
|
||||
"footnotes": [],
|
||||
"image": {
|
||||
"mimetype": "image/jpeg",
|
||||
"dpi": 128,
|
||||
"size": {
|
||||
"width": 220.0,
|
||||
"height": 147.0
|
||||
},
|
||||
"uri": "https://upload.wikimedia.org/wikipedia/commons/thumb/5/54/Mallard_drake_.02.jpg/220px-Mallard_drake_.02.jpg"
|
||||
},
|
||||
"annotations": []
|
||||
},
|
||||
{
|
||||
@ -5974,6 +6010,15 @@
|
||||
],
|
||||
"references": [],
|
||||
"footnotes": [],
|
||||
"image": {
|
||||
"mimetype": "image/jpeg",
|
||||
"dpi": 128,
|
||||
"size": {
|
||||
"width": 220.0,
|
||||
"height": 157.0
|
||||
},
|
||||
"uri": "https://upload.wikimedia.org/wikipedia/commons/thumb/5/51/Mandarin.duck.arp.jpg/220px-Mandarin.duck.arp.jpg"
|
||||
},
|
||||
"annotations": []
|
||||
},
|
||||
{
|
||||
@ -5991,6 +6036,15 @@
|
||||
],
|
||||
"references": [],
|
||||
"footnotes": [],
|
||||
"image": {
|
||||
"mimetype": "image/jpeg",
|
||||
"dpi": 128,
|
||||
"size": {
|
||||
"width": 220.0,
|
||||
"height": 147.0
|
||||
},
|
||||
"uri": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/d0/Last_day_in_Ushuaia%2C_Argentina.Flying_Steamer-Ducks_%28Tachyeres_patachonicus%29_in_various_artistic_settings.Harbour_silhouettes._%2825921897721%29.jpg/220px-Last_day_in_Ushuaia%2C_Argentina.Flying_Steamer-Ducks_%28Tachyeres_patachonicus%29_in_various_artistic_settings.Harbour_silhouettes._%2825921897721%29.jpg"
|
||||
},
|
||||
"annotations": []
|
||||
},
|
||||
{
|
||||
@ -6008,6 +6062,15 @@
|
||||
],
|
||||
"references": [],
|
||||
"footnotes": [],
|
||||
"image": {
|
||||
"mimetype": "image/jpeg",
|
||||
"dpi": 128,
|
||||
"size": {
|
||||
"width": 220.0,
|
||||
"height": 165.0
|
||||
},
|
||||
"uri": "https://upload.wikimedia.org/wikipedia/commons/thumb/a/a5/Female_Mallard_at_Menacuddle_Well.jpg/220px-Female_Mallard_at_Menacuddle_Well.jpg"
|
||||
},
|
||||
"annotations": []
|
||||
},
|
||||
{
|
||||
@ -6025,6 +6088,15 @@
|
||||
],
|
||||
"references": [],
|
||||
"footnotes": [],
|
||||
"image": {
|
||||
"mimetype": "image/jpeg",
|
||||
"dpi": 128,
|
||||
"size": {
|
||||
"width": 220.0,
|
||||
"height": 106.0
|
||||
},
|
||||
"uri": "https://upload.wikimedia.org/wikipedia/commons/thumb/6/65/Duck_1_filter_teeth_edit.jpg/220px-Duck_1_filter_teeth_edit.jpg"
|
||||
},
|
||||
"annotations": []
|
||||
},
|
||||
{
|
||||
@ -6059,6 +6131,15 @@
|
||||
],
|
||||
"references": [],
|
||||
"footnotes": [],
|
||||
"image": {
|
||||
"mimetype": "image/jpeg",
|
||||
"dpi": 128,
|
||||
"size": {
|
||||
"width": 220.0,
|
||||
"height": 165.0
|
||||
},
|
||||
"uri": "https://upload.wikimedia.org/wikipedia/commons/thumb/a/a6/Parrulo_-Muscovy_duckling.jpg/220px-Parrulo_-Muscovy_duckling.jpg"
|
||||
},
|
||||
"annotations": []
|
||||
},
|
||||
{
|
||||
@ -6093,6 +6174,15 @@
|
||||
],
|
||||
"references": [],
|
||||
"footnotes": [],
|
||||
"image": {
|
||||
"mimetype": "image/jpeg",
|
||||
"dpi": 128,
|
||||
"size": {
|
||||
"width": 220.0,
|
||||
"height": 220.0
|
||||
},
|
||||
"uri": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/d8/Tunnel_of_ducks.jpg/220px-Tunnel_of_ducks.jpg"
|
||||
},
|
||||
"annotations": []
|
||||
},
|
||||
{
|
||||
@ -6110,6 +6200,15 @@
|
||||
],
|
||||
"references": [],
|
||||
"footnotes": [],
|
||||
"image": {
|
||||
"mimetype": "image/png",
|
||||
"dpi": 128,
|
||||
"size": {
|
||||
"width": 130.0,
|
||||
"height": 149.0
|
||||
},
|
||||
"uri": "https://upload.wikimedia.org/wikipedia/commons/thumb/0/04/Maaninka.vaakuna.svg/130px-Maaninka.vaakuna.svg.png"
|
||||
},
|
||||
"annotations": []
|
||||
},
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user