mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-23 18:45:00 +00:00
refactor(HTML): handle text from styled html (#1960)
* A new HTML backend that handles styled html (ignors it) as well as images. Images are parsed as placeholders with a caption, if it exists. Co-authored-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com> Co-authored-by: vaaale <2428222+vaaale@users.noreply.github.com> Signed-off-by: Alexander Vaagan <alexander.vaagan@gmail.com> Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com> Signed-off-by: vaaale <2428222+vaaale@users.noreply.github.com> * tests(HTML): re-enable test_ordered_lists Re-enable test_ordered_lists regression test for the HTML backend since docling-core now supports ordered lists with custom start value. Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com> --------- Signed-off-by: Alexander Vaagan <alexander.vaagan@gmail.com> Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com> Signed-off-by: vaaale <2428222+vaaale@users.noreply.github.com> Co-authored-by: Alexander Vaagan <2428222+vaaale@users.noreply.github.com>
This commit is contained in:
parent
5d98bcea1b
commit
a069b1175b
@ -1,10 +1,11 @@
|
|||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
import traceback
|
import traceback
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Final, Optional, Union, cast
|
from typing import Final, Optional, Union, cast
|
||||||
|
|
||||||
from bs4 import BeautifulSoup, NavigableString, PageElement, Tag
|
from bs4 import BeautifulSoup, NavigableString, Tag
|
||||||
from bs4.element import PreformattedString
|
from bs4.element import PreformattedString
|
||||||
from docling_core.types.doc import (
|
from docling_core.types.doc import (
|
||||||
DocItem,
|
DocItem,
|
||||||
@ -15,6 +16,7 @@ from docling_core.types.doc import (
|
|||||||
GroupLabel,
|
GroupLabel,
|
||||||
TableCell,
|
TableCell,
|
||||||
TableData,
|
TableData,
|
||||||
|
TextItem,
|
||||||
)
|
)
|
||||||
from docling_core.types.doc.document import ContentLayer
|
from docling_core.types.doc.document import ContentLayer
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
@ -26,10 +28,14 @@ from docling.datamodel.document import InputDocument
|
|||||||
|
|
||||||
_log = logging.getLogger(__name__)
|
_log = logging.getLogger(__name__)
|
||||||
|
|
||||||
# tags that generate NodeItem elements
|
DEFAULT_IMAGE_WIDTH = 128
|
||||||
TAGS_FOR_NODE_ITEMS: Final = [
|
DEFAULT_IMAGE_HEIGHT = 128
|
||||||
|
|
||||||
|
# Tags that initiate distinct Docling items
|
||||||
|
_BLOCK_TAGS: Final = {
|
||||||
"address",
|
"address",
|
||||||
"details",
|
"details",
|
||||||
|
"figure",
|
||||||
"h1",
|
"h1",
|
||||||
"h2",
|
"h2",
|
||||||
"h3",
|
"h3",
|
||||||
@ -41,12 +47,9 @@ TAGS_FOR_NODE_ITEMS: Final = [
|
|||||||
"code",
|
"code",
|
||||||
"ul",
|
"ul",
|
||||||
"ol",
|
"ol",
|
||||||
"li",
|
|
||||||
"summary",
|
"summary",
|
||||||
"table",
|
"table",
|
||||||
"figure",
|
}
|
||||||
"img",
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
class _Context(BaseModel):
|
class _Context(BaseModel):
|
||||||
@ -56,12 +59,16 @@ class _Context(BaseModel):
|
|||||||
|
|
||||||
class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
||||||
@override
|
@override
|
||||||
def __init__(self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path]):
|
def __init__(
|
||||||
|
self,
|
||||||
|
in_doc: InputDocument,
|
||||||
|
path_or_stream: Union[BytesIO, Path],
|
||||||
|
):
|
||||||
super().__init__(in_doc, path_or_stream)
|
super().__init__(in_doc, path_or_stream)
|
||||||
self.soup: Optional[Tag] = None
|
self.soup: Optional[Tag] = None
|
||||||
# HTML file:
|
|
||||||
self.path_or_stream = path_or_stream
|
self.path_or_stream = path_or_stream
|
||||||
# Initialise the parents for the hierarchy
|
|
||||||
|
# Initialize the parents for the hierarchy
|
||||||
self.max_levels = 10
|
self.max_levels = 10
|
||||||
self.level = 0
|
self.level = 0
|
||||||
self.parents: dict[int, Optional[Union[DocItem, GroupItem]]] = {}
|
self.parents: dict[int, Optional[Union[DocItem, GroupItem]]] = {}
|
||||||
@ -70,13 +77,12 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
self.parents[i] = None
|
self.parents[i] = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if isinstance(self.path_or_stream, BytesIO):
|
raw = (
|
||||||
text_stream = self.path_or_stream.getvalue()
|
path_or_stream.getvalue()
|
||||||
self.soup = BeautifulSoup(text_stream, "html.parser")
|
if isinstance(path_or_stream, BytesIO)
|
||||||
if isinstance(self.path_or_stream, Path):
|
else Path(path_or_stream).read_bytes()
|
||||||
with open(self.path_or_stream, "rb") as f:
|
)
|
||||||
html_content = f.read()
|
self.soup = BeautifulSoup(raw, "html.parser")
|
||||||
self.soup = BeautifulSoup(html_content, "html.parser")
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
"Could not initialize HTML backend for file with "
|
"Could not initialize HTML backend for file with "
|
||||||
@ -96,7 +102,6 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
def unload(self):
|
def unload(self):
|
||||||
if isinstance(self.path_or_stream, BytesIO):
|
if isinstance(self.path_or_stream, BytesIO):
|
||||||
self.path_or_stream.close()
|
self.path_or_stream.close()
|
||||||
|
|
||||||
self.path_or_stream = None
|
self.path_or_stream = None
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -106,211 +111,156 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
|
|
||||||
@override
|
@override
|
||||||
def convert(self) -> DoclingDocument:
|
def convert(self) -> DoclingDocument:
|
||||||
# access self.path_or_stream to load stuff
|
_log.debug("Starting HTML conversion...")
|
||||||
|
if not self.is_valid():
|
||||||
|
raise RuntimeError("Invalid HTML document.")
|
||||||
|
|
||||||
origin = DocumentOrigin(
|
origin = DocumentOrigin(
|
||||||
filename=self.file.name or "file",
|
filename=self.file.name or "file",
|
||||||
mimetype="text/html",
|
mimetype="text/html",
|
||||||
binary_hash=self.document_hash,
|
binary_hash=self.document_hash,
|
||||||
)
|
)
|
||||||
|
|
||||||
doc = DoclingDocument(name=self.file.stem or "file", origin=origin)
|
doc = DoclingDocument(name=self.file.stem or "file", origin=origin)
|
||||||
_log.debug("Trying to convert HTML...")
|
|
||||||
|
|
||||||
if self.is_valid():
|
assert self.soup is not None
|
||||||
assert self.soup is not None
|
# set the title as furniture, since it is part of the document metadata
|
||||||
content = self.soup.body or self.soup
|
title = self.soup.title
|
||||||
# Replace <br> tags with newline characters
|
if title:
|
||||||
# TODO: remove style to avoid losing text from tags like i, b, span, ...
|
doc.add_title(
|
||||||
for br in content("br"):
|
text=title.get_text(separator=" ", strip=True),
|
||||||
br.replace_with(NavigableString("\n"))
|
content_layer=ContentLayer.FURNITURE,
|
||||||
|
)
|
||||||
|
# remove scripts/styles
|
||||||
|
for tag in self.soup(["script", "style"]):
|
||||||
|
tag.decompose()
|
||||||
|
content = self.soup.body or self.soup
|
||||||
|
# normalize <br> tags
|
||||||
|
for br in content("br"):
|
||||||
|
br.replace_with(NavigableString("\n"))
|
||||||
|
# set default content layer
|
||||||
|
headers = content.find(["h1", "h2", "h3", "h4", "h5", "h6"])
|
||||||
|
self.content_layer = (
|
||||||
|
ContentLayer.BODY if headers is None else ContentLayer.FURNITURE
|
||||||
|
)
|
||||||
|
# reset context
|
||||||
|
self.ctx = _Context()
|
||||||
|
|
||||||
|
try:
|
||||||
|
self._walk(content, doc)
|
||||||
|
except Exception:
|
||||||
|
print(traceback.format_exc())
|
||||||
|
|
||||||
headers = content.find(["h1", "h2", "h3", "h4", "h5", "h6"])
|
|
||||||
self.content_layer = (
|
|
||||||
ContentLayer.BODY if headers is None else ContentLayer.FURNITURE
|
|
||||||
)
|
|
||||||
self.ctx = _Context() # reset context
|
|
||||||
self.walk(content, doc)
|
|
||||||
else:
|
|
||||||
raise RuntimeError(
|
|
||||||
f"Cannot convert doc with {self.document_hash} because the backend "
|
|
||||||
"failed to init."
|
|
||||||
)
|
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
def walk(self, tag: Tag, doc: DoclingDocument) -> None:
|
def _walk(self, element: Tag, doc: DoclingDocument) -> None:
|
||||||
# Iterate over elements in the body of the document
|
"""Parse an XML tag by recursively walking its content.
|
||||||
text: str = ""
|
|
||||||
for element in tag.children:
|
|
||||||
if isinstance(element, Tag):
|
|
||||||
try:
|
|
||||||
self.analyze_tag(cast(Tag, element), doc)
|
|
||||||
except Exception as exc_child:
|
|
||||||
_log.error(
|
|
||||||
f"Error processing child from tag {tag.name}:\n{traceback.format_exc()}"
|
|
||||||
)
|
|
||||||
raise exc_child
|
|
||||||
elif isinstance(element, NavigableString) and not isinstance(
|
|
||||||
element, PreformattedString
|
|
||||||
):
|
|
||||||
# Floating text outside paragraphs or analyzed tags
|
|
||||||
text += element
|
|
||||||
siblings: list[Tag] = [
|
|
||||||
item for item in element.next_siblings if isinstance(item, Tag)
|
|
||||||
]
|
|
||||||
if element.next_sibling is None or any(
|
|
||||||
item.name in TAGS_FOR_NODE_ITEMS for item in siblings
|
|
||||||
):
|
|
||||||
text = text.strip()
|
|
||||||
if text and tag.name in ["div"]:
|
|
||||||
doc.add_text(
|
|
||||||
parent=self.parents[self.level],
|
|
||||||
label=DocItemLabel.TEXT,
|
|
||||||
text=text,
|
|
||||||
content_layer=self.content_layer,
|
|
||||||
)
|
|
||||||
text = ""
|
|
||||||
|
|
||||||
return
|
While walking, the method buffers inline text across tags like <b> or <span>,
|
||||||
|
emitting text nodes only at block boundaries.
|
||||||
|
|
||||||
def analyze_tag(self, tag: Tag, doc: DoclingDocument) -> None:
|
Args:
|
||||||
if tag.name in ["h1", "h2", "h3", "h4", "h5", "h6"]:
|
element: The XML tag to parse.
|
||||||
self.handle_header(tag, doc)
|
doc: The Docling document to be updated with the parsed content.
|
||||||
elif tag.name in ["p", "address", "summary"]:
|
"""
|
||||||
self.handle_paragraph(tag, doc)
|
buffer: list[str] = []
|
||||||
elif tag.name in ["pre", "code"]:
|
|
||||||
self.handle_code(tag, doc)
|
|
||||||
elif tag.name in ["ul", "ol"]:
|
|
||||||
self.handle_list(tag, doc)
|
|
||||||
elif tag.name in ["li"]:
|
|
||||||
self.handle_list_item(tag, doc)
|
|
||||||
elif tag.name == "table":
|
|
||||||
self.handle_table(tag, doc)
|
|
||||||
elif tag.name == "figure":
|
|
||||||
self.handle_figure(tag, doc)
|
|
||||||
elif tag.name == "img":
|
|
||||||
self.handle_image(tag, doc)
|
|
||||||
elif tag.name == "details":
|
|
||||||
self.handle_details(tag, doc)
|
|
||||||
else:
|
|
||||||
self.walk(tag, doc)
|
|
||||||
|
|
||||||
def get_text(self, item: PageElement) -> str:
|
def flush_buffer():
|
||||||
"""Get the text content of a tag."""
|
if not buffer:
|
||||||
parts: list[str] = self.extract_text_recursively(item)
|
return
|
||||||
|
text = "".join(buffer).strip()
|
||||||
return "".join(parts) + " "
|
buffer.clear()
|
||||||
|
if not text:
|
||||||
# Function to recursively extract text from all child nodes
|
return
|
||||||
def extract_text_recursively(self, item: PageElement) -> list[str]:
|
for part in text.split("\n"):
|
||||||
result: list[str] = []
|
seg = part.strip()
|
||||||
|
if seg:
|
||||||
if isinstance(item, NavigableString):
|
doc.add_text(
|
||||||
return [item]
|
DocItemLabel.TEXT,
|
||||||
|
seg,
|
||||||
tag = cast(Tag, item)
|
parent=self.parents[self.level],
|
||||||
if tag.name not in ["ul", "ol"]:
|
|
||||||
for child in tag:
|
|
||||||
# Recursively get the child's text content
|
|
||||||
result.extend(self.extract_text_recursively(child))
|
|
||||||
|
|
||||||
return ["".join(result) + " "]
|
|
||||||
|
|
||||||
def handle_details(self, element: Tag, doc: DoclingDocument) -> None:
|
|
||||||
"""Handle details tag (details) and its content."""
|
|
||||||
|
|
||||||
self.parents[self.level + 1] = doc.add_group(
|
|
||||||
name="details",
|
|
||||||
label=GroupLabel.SECTION,
|
|
||||||
parent=self.parents[self.level],
|
|
||||||
content_layer=self.content_layer,
|
|
||||||
)
|
|
||||||
|
|
||||||
self.level += 1
|
|
||||||
self.walk(element, doc)
|
|
||||||
self.parents[self.level + 1] = None
|
|
||||||
self.level -= 1
|
|
||||||
|
|
||||||
def handle_header(self, element: Tag, doc: DoclingDocument) -> None:
|
|
||||||
"""Handles header tags (h1, h2, etc.)."""
|
|
||||||
hlevel = int(element.name.replace("h", ""))
|
|
||||||
text = element.text.strip()
|
|
||||||
|
|
||||||
self.content_layer = ContentLayer.BODY
|
|
||||||
|
|
||||||
if hlevel == 1:
|
|
||||||
for key in self.parents.keys():
|
|
||||||
self.parents[key] = None
|
|
||||||
|
|
||||||
self.level = 1
|
|
||||||
self.parents[self.level] = doc.add_text(
|
|
||||||
parent=self.parents[0],
|
|
||||||
label=DocItemLabel.TITLE,
|
|
||||||
text=text,
|
|
||||||
content_layer=self.content_layer,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
if hlevel > self.level:
|
|
||||||
# add invisible group
|
|
||||||
for i in range(self.level + 1, hlevel):
|
|
||||||
self.parents[i] = doc.add_group(
|
|
||||||
name=f"header-{i}",
|
|
||||||
label=GroupLabel.SECTION,
|
|
||||||
parent=self.parents[i - 1],
|
|
||||||
content_layer=self.content_layer,
|
content_layer=self.content_layer,
|
||||||
)
|
)
|
||||||
self.level = hlevel
|
|
||||||
|
|
||||||
elif hlevel < self.level:
|
for node in element.contents:
|
||||||
|
if isinstance(node, Tag):
|
||||||
|
name = node.name.lower()
|
||||||
|
if name == "img":
|
||||||
|
flush_buffer()
|
||||||
|
self._emit_image(node, doc)
|
||||||
|
elif name in _BLOCK_TAGS:
|
||||||
|
flush_buffer()
|
||||||
|
self._handle_block(node, doc)
|
||||||
|
elif node.find(_BLOCK_TAGS):
|
||||||
|
flush_buffer()
|
||||||
|
self._walk(node, doc)
|
||||||
|
else:
|
||||||
|
buffer.append(node.text)
|
||||||
|
elif isinstance(node, NavigableString) and not isinstance(
|
||||||
|
node, PreformattedString
|
||||||
|
):
|
||||||
|
buffer.append(str(node))
|
||||||
|
|
||||||
|
flush_buffer()
|
||||||
|
|
||||||
|
def _handle_heading(self, tag: Tag, doc: DoclingDocument) -> None:
|
||||||
|
tag_name = tag.name.lower()
|
||||||
|
# set default content layer to BODY as soon as we encounter a heading
|
||||||
|
self.content_layer = ContentLayer.BODY
|
||||||
|
level = int(tag_name[1])
|
||||||
|
text = tag.get_text(strip=True, separator=" ")
|
||||||
|
# the first level is for the title item
|
||||||
|
if level == 1:
|
||||||
|
for key in self.parents.keys():
|
||||||
|
self.parents[key] = None
|
||||||
|
self.level = 0
|
||||||
|
self.parents[self.level + 1] = doc.add_title(
|
||||||
|
text, content_layer=self.content_layer
|
||||||
|
)
|
||||||
|
# the other levels need to be lowered by 1 if a title was set
|
||||||
|
else:
|
||||||
|
level -= 1
|
||||||
|
if level > self.level:
|
||||||
|
# add invisible group
|
||||||
|
for i in range(self.level, level):
|
||||||
|
_log.debug(f"Adding invisible group to level {i}")
|
||||||
|
self.parents[i + 1] = doc.add_group(
|
||||||
|
name=f"header-{i + 1}",
|
||||||
|
label=GroupLabel.SECTION,
|
||||||
|
parent=self.parents[i],
|
||||||
|
content_layer=self.content_layer,
|
||||||
|
)
|
||||||
|
self.level = level
|
||||||
|
elif level < self.level:
|
||||||
# remove the tail
|
# remove the tail
|
||||||
for key in self.parents.keys():
|
for key in self.parents.keys():
|
||||||
if key > hlevel:
|
if key > level + 1:
|
||||||
|
_log.debug(f"Remove the tail of level {key}")
|
||||||
self.parents[key] = None
|
self.parents[key] = None
|
||||||
self.level = hlevel
|
self.level = level
|
||||||
|
self.parents[self.level + 1] = doc.add_heading(
|
||||||
self.parents[hlevel] = doc.add_heading(
|
|
||||||
parent=self.parents[hlevel - 1],
|
|
||||||
text=text,
|
|
||||||
level=hlevel - 1,
|
|
||||||
content_layer=self.content_layer,
|
|
||||||
)
|
|
||||||
|
|
||||||
def handle_code(self, element: Tag, doc: DoclingDocument) -> None:
|
|
||||||
"""Handles monospace code snippets (pre)."""
|
|
||||||
if element.text is None:
|
|
||||||
return
|
|
||||||
text = element.text.strip()
|
|
||||||
if text:
|
|
||||||
doc.add_code(
|
|
||||||
parent=self.parents[self.level],
|
parent=self.parents[self.level],
|
||||||
text=text,
|
text=text,
|
||||||
|
level=self.level,
|
||||||
content_layer=self.content_layer,
|
content_layer=self.content_layer,
|
||||||
)
|
)
|
||||||
|
self.level += 1
|
||||||
|
for img_tag in tag("img"):
|
||||||
|
if isinstance(img_tag, Tag):
|
||||||
|
self._emit_image(img_tag, doc)
|
||||||
|
|
||||||
def handle_paragraph(self, element: Tag, doc: DoclingDocument) -> None:
|
def _handle_list(self, tag: Tag, doc: DoclingDocument) -> None:
|
||||||
"""Handles paragraph tags (p) or equivalent ones."""
|
tag_name = tag.name.lower()
|
||||||
if element.text is None:
|
|
||||||
return
|
|
||||||
text = element.text.strip()
|
|
||||||
if text:
|
|
||||||
doc.add_text(
|
|
||||||
parent=self.parents[self.level],
|
|
||||||
label=DocItemLabel.TEXT,
|
|
||||||
text=text,
|
|
||||||
content_layer=self.content_layer,
|
|
||||||
)
|
|
||||||
|
|
||||||
def handle_list(self, element: Tag, doc: DoclingDocument) -> None:
|
|
||||||
"""Handles list tags (ul, ol) and their list items."""
|
|
||||||
|
|
||||||
start: Optional[int] = None
|
start: Optional[int] = None
|
||||||
if is_ordered := element.name == "ol":
|
name: str = ""
|
||||||
start_attr = element.get("start")
|
is_ordered = tag_name == "ol"
|
||||||
|
if is_ordered:
|
||||||
|
start_attr = tag.get("start")
|
||||||
if isinstance(start_attr, str) and start_attr.isnumeric():
|
if isinstance(start_attr, str) and start_attr.isnumeric():
|
||||||
start = int(start_attr)
|
start = int(start_attr)
|
||||||
name = "ordered list" + (f" start {start}" if start is not None else "")
|
name = "ordered list" + (f" start {start}" if start is not None else "")
|
||||||
else:
|
else:
|
||||||
name = "list"
|
name = "list"
|
||||||
# create a list group
|
# Create the list container
|
||||||
list_group = doc.add_list_group(
|
list_group = doc.add_list_group(
|
||||||
name=name,
|
name=name,
|
||||||
parent=self.parents[self.level],
|
parent=self.parents[self.level],
|
||||||
@ -320,64 +270,152 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
self.ctx.list_ordered_flag_by_ref[list_group.self_ref] = is_ordered
|
self.ctx.list_ordered_flag_by_ref[list_group.self_ref] = is_ordered
|
||||||
if is_ordered and start is not None:
|
if is_ordered and start is not None:
|
||||||
self.ctx.list_start_by_ref[list_group.self_ref] = start
|
self.ctx.list_start_by_ref[list_group.self_ref] = start
|
||||||
|
|
||||||
self.level += 1
|
self.level += 1
|
||||||
|
|
||||||
self.walk(element, doc)
|
# For each top-level <li> in this list
|
||||||
|
for li in tag.find_all({"li", "ul", "ol"}, recursive=False):
|
||||||
|
if not isinstance(li, Tag):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# sub-list items should be indented under main list items, but temporarily
|
||||||
|
# addressing invalid HTML (docling-core/issues/357)
|
||||||
|
if li.name in {"ul", "ol"}:
|
||||||
|
self._handle_block(li, doc)
|
||||||
|
|
||||||
|
else:
|
||||||
|
# 1) determine the marker
|
||||||
|
if is_ordered and start is not None:
|
||||||
|
marker = f"{start + len(list_group.children)}."
|
||||||
|
else:
|
||||||
|
marker = ""
|
||||||
|
|
||||||
|
# 2) extract only the "direct" text from this <li>
|
||||||
|
parts: list[str] = []
|
||||||
|
for child in li.contents:
|
||||||
|
if isinstance(child, NavigableString) and not isinstance(
|
||||||
|
child, PreformattedString
|
||||||
|
):
|
||||||
|
parts.append(child)
|
||||||
|
elif isinstance(child, Tag) and child.name not in ("ul", "ol"):
|
||||||
|
text_part = child.get_text()
|
||||||
|
if text_part:
|
||||||
|
parts.append(text_part)
|
||||||
|
li_text = re.sub(r"\s+|\n+", " ", "".join(parts)).strip()
|
||||||
|
|
||||||
|
# 3) add the list item
|
||||||
|
if li_text:
|
||||||
|
self.parents[self.level + 1] = doc.add_list_item(
|
||||||
|
text=li_text,
|
||||||
|
enumerated=is_ordered,
|
||||||
|
marker=marker,
|
||||||
|
parent=list_group,
|
||||||
|
content_layer=self.content_layer,
|
||||||
|
)
|
||||||
|
|
||||||
|
# 4) recurse into any nested lists, attaching them to this <li> item
|
||||||
|
for sublist in li({"ul", "ol"}, recursive=False):
|
||||||
|
if isinstance(sublist, Tag):
|
||||||
|
self.level += 1
|
||||||
|
self._handle_block(sublist, doc)
|
||||||
|
self.parents[self.level + 1] = None
|
||||||
|
self.level -= 1
|
||||||
|
else:
|
||||||
|
for sublist in li({"ul", "ol"}, recursive=False):
|
||||||
|
if isinstance(sublist, Tag):
|
||||||
|
self._handle_block(sublist, doc)
|
||||||
|
|
||||||
|
# 5) extract any images under this <li>
|
||||||
|
for img_tag in li("img"):
|
||||||
|
if isinstance(img_tag, Tag):
|
||||||
|
self._emit_image(img_tag, doc)
|
||||||
|
|
||||||
self.parents[self.level + 1] = None
|
self.parents[self.level + 1] = None
|
||||||
self.level -= 1
|
self.level -= 1
|
||||||
|
|
||||||
def handle_list_item(self, element: Tag, doc: DoclingDocument) -> None:
|
def _handle_block(self, tag: Tag, doc: DoclingDocument) -> None:
|
||||||
"""Handles list item tags (li)."""
|
tag_name = tag.name.lower()
|
||||||
nested_list = element.find(["ul", "ol"])
|
|
||||||
|
|
||||||
parent = self.parents[self.level]
|
if tag_name == "figure":
|
||||||
if parent is None:
|
img_tag = tag.find("img")
|
||||||
_log.debug(f"list-item has no parent in DoclingDocument: {element}")
|
if isinstance(img_tag, Tag):
|
||||||
return
|
self._emit_image(img_tag, doc)
|
||||||
enumerated = self.ctx.list_ordered_flag_by_ref.get(parent.self_ref, False)
|
|
||||||
if enumerated and (start := self.ctx.list_start_by_ref.get(parent.self_ref)):
|
|
||||||
marker = f"{start + len(parent.children)}."
|
|
||||||
else:
|
|
||||||
marker = ""
|
|
||||||
|
|
||||||
if nested_list:
|
elif tag_name in {"h1", "h2", "h3", "h4", "h5", "h6"}:
|
||||||
# Text in list item can be hidden within hierarchy, hence
|
self._handle_heading(tag, doc)
|
||||||
# we need to extract it recursively
|
|
||||||
text: str = self.get_text(element)
|
|
||||||
# Flatten text, remove break lines:
|
|
||||||
text = text.replace("\n", "").replace("\r", "")
|
|
||||||
text = " ".join(text.split()).strip()
|
|
||||||
|
|
||||||
if len(text) > 0:
|
elif tag_name in {"ul", "ol"}:
|
||||||
# create a list-item
|
self._handle_list(tag, doc)
|
||||||
self.parents[self.level + 1] = doc.add_list_item(
|
|
||||||
text=text,
|
elif tag_name in {"p", "address", "summary"}:
|
||||||
enumerated=enumerated,
|
for part in tag.text.split("\n"):
|
||||||
marker=marker,
|
seg = part.strip()
|
||||||
parent=parent,
|
if seg:
|
||||||
|
doc.add_text(
|
||||||
|
parent=self.parents[self.level],
|
||||||
|
label=DocItemLabel.TEXT,
|
||||||
|
text=seg,
|
||||||
|
content_layer=self.content_layer,
|
||||||
|
)
|
||||||
|
for img_tag in tag("img"):
|
||||||
|
if isinstance(img_tag, Tag):
|
||||||
|
self._emit_image(img_tag, doc)
|
||||||
|
|
||||||
|
elif tag_name == "table":
|
||||||
|
data = HTMLDocumentBackend.parse_table_data(tag)
|
||||||
|
for img_tag in tag("img"):
|
||||||
|
if isinstance(img_tag, Tag):
|
||||||
|
self._emit_image(tag, doc)
|
||||||
|
if data is not None:
|
||||||
|
doc.add_table(
|
||||||
|
data=data,
|
||||||
|
parent=self.parents[self.level],
|
||||||
content_layer=self.content_layer,
|
content_layer=self.content_layer,
|
||||||
)
|
)
|
||||||
self.level += 1
|
|
||||||
self.walk(element, doc)
|
|
||||||
self.parents[self.level + 1] = None
|
|
||||||
self.level -= 1
|
|
||||||
else:
|
|
||||||
self.walk(element, doc)
|
|
||||||
|
|
||||||
elif element.text.strip():
|
elif tag_name in {"pre", "code"}:
|
||||||
text = element.text.strip()
|
# handle monospace code snippets (pre).
|
||||||
|
text = tag.get_text(strip=True)
|
||||||
|
if text:
|
||||||
|
doc.add_code(
|
||||||
|
parent=self.parents[self.level],
|
||||||
|
text=text,
|
||||||
|
content_layer=self.content_layer,
|
||||||
|
)
|
||||||
|
|
||||||
doc.add_list_item(
|
elif tag_name == "details":
|
||||||
text=text,
|
# handle details and its content.
|
||||||
enumerated=enumerated,
|
self.parents[self.level + 1] = doc.add_group(
|
||||||
marker=marker,
|
name="details",
|
||||||
parent=parent,
|
label=GroupLabel.SECTION,
|
||||||
|
parent=self.parents[self.level],
|
||||||
content_layer=self.content_layer,
|
content_layer=self.content_layer,
|
||||||
)
|
)
|
||||||
else:
|
self.level += 1
|
||||||
_log.debug(f"list-item has no text: {element}")
|
self._walk(tag, doc)
|
||||||
|
self.parents[self.level + 1] = None
|
||||||
|
self.level -= 1
|
||||||
|
|
||||||
|
def _emit_image(self, img_tag: Tag, doc: DoclingDocument) -> None:
|
||||||
|
figure = img_tag.find_parent("figure")
|
||||||
|
caption: str = ""
|
||||||
|
if isinstance(figure, Tag):
|
||||||
|
caption_tag = figure.find("figcaption", recursive=False)
|
||||||
|
if isinstance(caption_tag, Tag):
|
||||||
|
caption = caption_tag.get_text()
|
||||||
|
if not caption:
|
||||||
|
caption = str(img_tag.get("alt", "")).strip()
|
||||||
|
|
||||||
|
caption_item: Optional[TextItem] = None
|
||||||
|
if caption:
|
||||||
|
caption_item = doc.add_text(
|
||||||
|
DocItemLabel.CAPTION, text=caption, content_layer=self.content_layer
|
||||||
|
)
|
||||||
|
|
||||||
|
doc.add_picture(
|
||||||
|
caption=caption_item,
|
||||||
|
parent=self.parents[self.level],
|
||||||
|
content_layer=self.content_layer,
|
||||||
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_cell_spans(cell: Tag) -> tuple[int, int]:
|
def _get_cell_spans(cell: Tag) -> tuple[int, int]:
|
||||||
@ -502,84 +540,3 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
data.table_cells.append(table_cell)
|
data.table_cells.append(table_cell)
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def handle_table(self, element: Tag, doc: DoclingDocument) -> None:
|
|
||||||
"""Handles table tags."""
|
|
||||||
|
|
||||||
table_data = HTMLDocumentBackend.parse_table_data(element)
|
|
||||||
|
|
||||||
if table_data is not None:
|
|
||||||
doc.add_table(
|
|
||||||
data=table_data,
|
|
||||||
parent=self.parents[self.level],
|
|
||||||
content_layer=self.content_layer,
|
|
||||||
)
|
|
||||||
|
|
||||||
def get_list_text(self, list_element: Tag, level: int = 0) -> list[str]:
|
|
||||||
"""Recursively extract text from <ul> or <ol> with proper indentation."""
|
|
||||||
result = []
|
|
||||||
bullet_char = "*" # Default bullet character for unordered lists
|
|
||||||
|
|
||||||
if list_element.name == "ol": # For ordered lists, use numbers
|
|
||||||
for i, li in enumerate(list_element("li", recursive=False), 1):
|
|
||||||
if not isinstance(li, Tag):
|
|
||||||
continue
|
|
||||||
# Add numbering for ordered lists
|
|
||||||
result.append(f"{' ' * level}{i}. {li.get_text(strip=True)}")
|
|
||||||
# Handle nested lists
|
|
||||||
nested_list = li.find(["ul", "ol"])
|
|
||||||
if isinstance(nested_list, Tag):
|
|
||||||
result.extend(self.get_list_text(nested_list, level + 1))
|
|
||||||
elif list_element.name == "ul": # For unordered lists, use bullet points
|
|
||||||
for li in list_element("li", recursive=False):
|
|
||||||
if not isinstance(li, Tag):
|
|
||||||
continue
|
|
||||||
# Add bullet points for unordered lists
|
|
||||||
result.append(
|
|
||||||
f"{' ' * level}{bullet_char} {li.get_text(strip=True)}"
|
|
||||||
)
|
|
||||||
# Handle nested lists
|
|
||||||
nested_list = li.find(["ul", "ol"])
|
|
||||||
if isinstance(nested_list, Tag):
|
|
||||||
result.extend(self.get_list_text(nested_list, level + 1))
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
def handle_figure(self, element: Tag, doc: DoclingDocument) -> None:
|
|
||||||
"""Handles image tags (img)."""
|
|
||||||
|
|
||||||
# Extract the image URI from the <img> tag
|
|
||||||
# image_uri = root.xpath('//figure//img/@src')[0]
|
|
||||||
|
|
||||||
contains_captions = element.find(["figcaption"])
|
|
||||||
if not isinstance(contains_captions, Tag):
|
|
||||||
doc.add_picture(
|
|
||||||
parent=self.parents[self.level],
|
|
||||||
caption=None,
|
|
||||||
content_layer=self.content_layer,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
texts = []
|
|
||||||
for item in contains_captions:
|
|
||||||
texts.append(item.text)
|
|
||||||
|
|
||||||
fig_caption = doc.add_text(
|
|
||||||
label=DocItemLabel.CAPTION,
|
|
||||||
text=("".join(texts)).strip(),
|
|
||||||
content_layer=self.content_layer,
|
|
||||||
)
|
|
||||||
doc.add_picture(
|
|
||||||
parent=self.parents[self.level],
|
|
||||||
caption=fig_caption,
|
|
||||||
content_layer=self.content_layer,
|
|
||||||
)
|
|
||||||
|
|
||||||
def handle_image(self, element: Tag, doc: DoclingDocument) -> None:
|
|
||||||
"""Handles image tags (img)."""
|
|
||||||
_log.debug(f"ignoring <img> tags at the moment: {element}")
|
|
||||||
|
|
||||||
doc.add_picture(
|
|
||||||
parent=self.parents[self.level],
|
|
||||||
caption=None,
|
|
||||||
content_layer=self.content_layer,
|
|
||||||
)
|
|
||||||
|
@ -4,6 +4,7 @@ item-0 at level 0: unspecified: group _root_
|
|||||||
item-3 at level 2: section_header: Background
|
item-3 at level 2: section_header: Background
|
||||||
item-4 at level 3: text: Some background information here.
|
item-4 at level 3: text: Some background information here.
|
||||||
item-5 at level 3: picture
|
item-5 at level 3: picture
|
||||||
|
item-5 at level 4: caption: Example image
|
||||||
item-6 at level 3: list: group list
|
item-6 at level 3: list: group list
|
||||||
item-7 at level 4: list_item: First item in unordered list
|
item-7 at level 4: list_item: First item in unordered list
|
||||||
item-8 at level 4: list_item: Second item in unordered list
|
item-8 at level 4: list_item: Second item in unordered list
|
||||||
@ -12,4 +13,5 @@ item-0 at level 0: unspecified: group _root_
|
|||||||
item-11 at level 4: list_item: Second item in ordered list
|
item-11 at level 4: list_item: Second item in ordered list
|
||||||
item-12 at level 3: list: group ordered list start 42
|
item-12 at level 3: list: group ordered list start 42
|
||||||
item-13 at level 4: list_item: First item in ordered list with start
|
item-13 at level 4: list_item: First item in ordered list with start
|
||||||
item-14 at level 4: list_item: Second item in ordered list with start
|
item-14 at level 4: list_item: Second item in ordered list with start
|
||||||
|
item-15 at level 1: caption: Example image
|
@ -19,6 +19,9 @@
|
|||||||
"children": [
|
"children": [
|
||||||
{
|
{
|
||||||
"$ref": "#/texts/0"
|
"$ref": "#/texts/0"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/4"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
@ -33,10 +36,10 @@
|
|||||||
},
|
},
|
||||||
"children": [
|
"children": [
|
||||||
{
|
{
|
||||||
"$ref": "#/texts/4"
|
"$ref": "#/texts/5"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"$ref": "#/texts/5"
|
"$ref": "#/texts/6"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
@ -50,10 +53,10 @@
|
|||||||
},
|
},
|
||||||
"children": [
|
"children": [
|
||||||
{
|
{
|
||||||
"$ref": "#/texts/6"
|
"$ref": "#/texts/7"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"$ref": "#/texts/7"
|
"$ref": "#/texts/8"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
@ -67,10 +70,10 @@
|
|||||||
},
|
},
|
||||||
"children": [
|
"children": [
|
||||||
{
|
{
|
||||||
"$ref": "#/texts/8"
|
"$ref": "#/texts/9"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"$ref": "#/texts/9"
|
"$ref": "#/texts/10"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
@ -153,6 +156,18 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/4",
|
"self_ref": "#/texts/4",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/body"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "caption",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "Example image",
|
||||||
|
"text": "Example image"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/5",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/0"
|
"$ref": "#/groups/0"
|
||||||
},
|
},
|
||||||
@ -166,7 +181,7 @@
|
|||||||
"marker": ""
|
"marker": ""
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/5",
|
"self_ref": "#/texts/6",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/0"
|
"$ref": "#/groups/0"
|
||||||
},
|
},
|
||||||
@ -180,7 +195,7 @@
|
|||||||
"marker": ""
|
"marker": ""
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/6",
|
"self_ref": "#/texts/7",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/1"
|
"$ref": "#/groups/1"
|
||||||
},
|
},
|
||||||
@ -194,7 +209,7 @@
|
|||||||
"marker": ""
|
"marker": ""
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/7",
|
"self_ref": "#/texts/8",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/1"
|
"$ref": "#/groups/1"
|
||||||
},
|
},
|
||||||
@ -208,7 +223,7 @@
|
|||||||
"marker": ""
|
"marker": ""
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/8",
|
"self_ref": "#/texts/9",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/2"
|
"$ref": "#/groups/2"
|
||||||
},
|
},
|
||||||
@ -222,7 +237,7 @@
|
|||||||
"marker": "42."
|
"marker": "42."
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/9",
|
"self_ref": "#/texts/10",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/2"
|
"$ref": "#/groups/2"
|
||||||
},
|
},
|
||||||
@ -246,7 +261,11 @@
|
|||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "picture",
|
"label": "picture",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"captions": [],
|
"captions": [
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/4"
|
||||||
|
}
|
||||||
|
],
|
||||||
"references": [],
|
"references": [],
|
||||||
"footnotes": [],
|
"footnotes": [],
|
||||||
"annotations": []
|
"annotations": []
|
||||||
|
@ -6,6 +6,8 @@ This is the first paragraph of the introduction.
|
|||||||
|
|
||||||
Some background information here.
|
Some background information here.
|
||||||
|
|
||||||
|
Example image
|
||||||
|
|
||||||
<!-- image -->
|
<!-- image -->
|
||||||
|
|
||||||
- First item in unordered list
|
- First item in unordered list
|
||||||
|
@ -3,8 +3,8 @@ item-0 at level 0: unspecified: group _root_
|
|||||||
item-2 at level 1: text: This is another div with text.
|
item-2 at level 1: text: This is another div with text.
|
||||||
item-3 at level 1: text: This is a regular paragraph.
|
item-3 at level 1: text: This is a regular paragraph.
|
||||||
item-4 at level 1: text: This is a third div
|
item-4 at level 1: text: This is a third div
|
||||||
with a new line.
|
item-5 at level 1: text: with a new line.
|
||||||
item-5 at level 1: section: group details
|
item-6 at level 1: section: group details
|
||||||
item-6 at level 2: text: Heading for the details element
|
item-7 at level 2: text: Heading for the details element
|
||||||
item-7 at level 2: text: Description of the details element.
|
item-8 at level 2: text: Description of the details element.
|
||||||
item-8 at level 1: text: This is a fourth div with a bold paragraph.
|
item-9 at level 1: text: This is a fourth div with a bold paragraph.
|
@ -29,11 +29,17 @@
|
|||||||
{
|
{
|
||||||
"$ref": "#/texts/3"
|
"$ref": "#/texts/3"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/4"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/5"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"$ref": "#/groups/0"
|
"$ref": "#/groups/0"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"$ref": "#/texts/6"
|
"$ref": "#/texts/8"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
@ -48,10 +54,10 @@
|
|||||||
},
|
},
|
||||||
"children": [
|
"children": [
|
||||||
{
|
{
|
||||||
"$ref": "#/texts/4"
|
"$ref": "#/texts/6"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"$ref": "#/texts/5"
|
"$ref": "#/texts/7"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
@ -66,6 +72,18 @@
|
|||||||
"$ref": "#/body"
|
"$ref": "#/body"
|
||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
|
"content_layer": "furniture",
|
||||||
|
"label": "title",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "Sample HTML File",
|
||||||
|
"text": "Sample HTML File"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/1",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/body"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "text",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
@ -73,7 +91,7 @@
|
|||||||
"text": "This is a div with text."
|
"text": "This is a div with text."
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/1",
|
"self_ref": "#/texts/2",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/body"
|
"$ref": "#/body"
|
||||||
},
|
},
|
||||||
@ -85,7 +103,7 @@
|
|||||||
"text": "This is another div with text."
|
"text": "This is another div with text."
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/2",
|
"self_ref": "#/texts/3",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/body"
|
"$ref": "#/body"
|
||||||
},
|
},
|
||||||
@ -97,7 +115,7 @@
|
|||||||
"text": "This is a regular paragraph."
|
"text": "This is a regular paragraph."
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/3",
|
"self_ref": "#/texts/4",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/body"
|
"$ref": "#/body"
|
||||||
},
|
},
|
||||||
@ -105,11 +123,23 @@
|
|||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "text",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "This is a third div\nwith a new line.",
|
"orig": "This is a third div",
|
||||||
"text": "This is a third div\nwith a new line."
|
"text": "This is a third div"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/4",
|
"self_ref": "#/texts/5",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/body"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "text",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "with a new line.",
|
||||||
|
"text": "with a new line."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/6",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/0"
|
"$ref": "#/groups/0"
|
||||||
},
|
},
|
||||||
@ -121,7 +151,7 @@
|
|||||||
"text": "Heading for the details element"
|
"text": "Heading for the details element"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/5",
|
"self_ref": "#/texts/7",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/0"
|
"$ref": "#/groups/0"
|
||||||
},
|
},
|
||||||
@ -133,7 +163,7 @@
|
|||||||
"text": "Description of the details element."
|
"text": "Description of the details element."
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/6",
|
"self_ref": "#/texts/8",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/body"
|
"$ref": "#/body"
|
||||||
},
|
},
|
||||||
|
@ -5,6 +5,7 @@ This is another div with text.
|
|||||||
This is a regular paragraph.
|
This is a regular paragraph.
|
||||||
|
|
||||||
This is a third div
|
This is a third div
|
||||||
|
|
||||||
with a new line.
|
with a new line.
|
||||||
|
|
||||||
Heading for the details element
|
Heading for the details element
|
||||||
|
27
tests/data/groundtruth/docling_v2/example_09.html.itxt
vendored
Normal file
27
tests/data/groundtruth/docling_v2/example_09.html.itxt
vendored
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
item-0 at level 0: unspecified: group _root_
|
||||||
|
item-1 at level 1: title: Introduction to parsing HTML files with Docling
|
||||||
|
item-2 at level 2: picture
|
||||||
|
item-2 at level 3: caption: Docling
|
||||||
|
item-3 at level 2: text: Docling simplifies document proc ... ntegrations with the gen AI ecosystem.
|
||||||
|
item-4 at level 2: section_header: Supported file formats
|
||||||
|
item-5 at level 3: text: Docling supports multiple file formats..
|
||||||
|
item-6 at level 3: list: group list
|
||||||
|
item-7 at level 4: list_item: Advanced PDF understanding
|
||||||
|
item-8 at level 4: picture
|
||||||
|
item-8 at level 5: caption: PDF
|
||||||
|
item-9 at level 4: list_item: Microsoft Office DOCX
|
||||||
|
item-10 at level 4: picture
|
||||||
|
item-10 at level 5: caption: DOCX
|
||||||
|
item-11 at level 4: list_item: HTML files (with optional support for images)
|
||||||
|
item-12 at level 4: picture
|
||||||
|
item-12 at level 5: caption: HTML
|
||||||
|
item-13 at level 3: section_header: Three backends for handling HTML files
|
||||||
|
item-14 at level 4: text: Docling has three backends for parsing HTML files:
|
||||||
|
item-15 at level 4: list: group ordered list
|
||||||
|
item-16 at level 5: list_item: HTMLDocumentBackend Ignores images
|
||||||
|
item-17 at level 5: list_item: HTMLDocumentBackendImagesInline Extracts images inline
|
||||||
|
item-18 at level 5: list_item: HTMLDocumentBackendImagesReferenced Extracts images as references
|
||||||
|
item-19 at level 1: caption: Docling
|
||||||
|
item-20 at level 1: caption: PDF
|
||||||
|
item-21 at level 1: caption: DOCX
|
||||||
|
item-22 at level 1: caption: HTML
|
404
tests/data/groundtruth/docling_v2/example_09.html.json
vendored
Normal file
404
tests/data/groundtruth/docling_v2/example_09.html.json
vendored
Normal file
@ -0,0 +1,404 @@
|
|||||||
|
{
|
||||||
|
"schema_name": "DoclingDocument",
|
||||||
|
"version": "1.5.0",
|
||||||
|
"name": "example_09",
|
||||||
|
"origin": {
|
||||||
|
"mimetype": "text/html",
|
||||||
|
"binary_hash": 6785336133244366107,
|
||||||
|
"filename": "example_09.html"
|
||||||
|
},
|
||||||
|
"furniture": {
|
||||||
|
"self_ref": "#/furniture",
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "furniture",
|
||||||
|
"name": "_root_",
|
||||||
|
"label": "unspecified"
|
||||||
|
},
|
||||||
|
"body": {
|
||||||
|
"self_ref": "#/body",
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/0"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/1"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/6"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/8"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/10"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"content_layer": "body",
|
||||||
|
"name": "_root_",
|
||||||
|
"label": "unspecified"
|
||||||
|
},
|
||||||
|
"groups": [
|
||||||
|
{
|
||||||
|
"self_ref": "#/groups/0",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/texts/3"
|
||||||
|
},
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/5"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/pictures/1"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/7"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/pictures/2"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/9"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/pictures/3"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"content_layer": "body",
|
||||||
|
"name": "list",
|
||||||
|
"label": "list"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/groups/1",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/texts/11"
|
||||||
|
},
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/13"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/14"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/15"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"content_layer": "body",
|
||||||
|
"name": "ordered list",
|
||||||
|
"label": "list"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"texts": [
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/0",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/body"
|
||||||
|
},
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"$ref": "#/pictures/0"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/2"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/3"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "title",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "Introduction to parsing HTML files with Docling",
|
||||||
|
"text": "Introduction to parsing HTML files with Docling"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/1",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/body"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "caption",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "Docling",
|
||||||
|
"text": "Docling"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/2",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/texts/0"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "text",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "Docling simplifies document processing, parsing diverse formats — including HTML — and providing seamless integrations with the gen AI ecosystem.",
|
||||||
|
"text": "Docling simplifies document processing, parsing diverse formats — including HTML — and providing seamless integrations with the gen AI ecosystem."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/3",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/texts/0"
|
||||||
|
},
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/4"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/groups/0"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/11"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "section_header",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "Supported file formats",
|
||||||
|
"text": "Supported file formats",
|
||||||
|
"level": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/4",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/texts/3"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "text",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "Docling supports multiple file formats..",
|
||||||
|
"text": "Docling supports multiple file formats.."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/5",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/groups/0"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "list_item",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "Advanced PDF understanding",
|
||||||
|
"text": "Advanced PDF understanding",
|
||||||
|
"enumerated": false,
|
||||||
|
"marker": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/6",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/body"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "caption",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "PDF",
|
||||||
|
"text": "PDF"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/7",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/groups/0"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "list_item",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "Microsoft Office DOCX",
|
||||||
|
"text": "Microsoft Office DOCX",
|
||||||
|
"enumerated": false,
|
||||||
|
"marker": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/8",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/body"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "caption",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "DOCX",
|
||||||
|
"text": "DOCX"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/9",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/groups/0"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "list_item",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "HTML files (with optional support for images)",
|
||||||
|
"text": "HTML files (with optional support for images)",
|
||||||
|
"enumerated": false,
|
||||||
|
"marker": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/10",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/body"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "caption",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "HTML",
|
||||||
|
"text": "HTML"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/11",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/texts/3"
|
||||||
|
},
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/12"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/groups/1"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "section_header",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "Three backends for handling HTML files",
|
||||||
|
"text": "Three backends for handling HTML files",
|
||||||
|
"level": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/12",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/texts/11"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "text",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "Docling has three backends for parsing HTML files:",
|
||||||
|
"text": "Docling has three backends for parsing HTML files:"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/13",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/groups/1"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "list_item",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "HTMLDocumentBackend Ignores images",
|
||||||
|
"text": "HTMLDocumentBackend Ignores images",
|
||||||
|
"enumerated": true,
|
||||||
|
"marker": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/14",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/groups/1"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "list_item",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "HTMLDocumentBackendImagesInline Extracts images inline",
|
||||||
|
"text": "HTMLDocumentBackendImagesInline Extracts images inline",
|
||||||
|
"enumerated": true,
|
||||||
|
"marker": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/15",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/groups/1"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "list_item",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "HTMLDocumentBackendImagesReferenced Extracts images as references",
|
||||||
|
"text": "HTMLDocumentBackendImagesReferenced Extracts images as references",
|
||||||
|
"enumerated": true,
|
||||||
|
"marker": ""
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"pictures": [
|
||||||
|
{
|
||||||
|
"self_ref": "#/pictures/0",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/texts/0"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "picture",
|
||||||
|
"prov": [],
|
||||||
|
"captions": [
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/1"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"references": [],
|
||||||
|
"footnotes": [],
|
||||||
|
"annotations": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/pictures/1",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/groups/0"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "picture",
|
||||||
|
"prov": [],
|
||||||
|
"captions": [
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/6"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"references": [],
|
||||||
|
"footnotes": [],
|
||||||
|
"annotations": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/pictures/2",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/groups/0"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "picture",
|
||||||
|
"prov": [],
|
||||||
|
"captions": [
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/8"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"references": [],
|
||||||
|
"footnotes": [],
|
||||||
|
"annotations": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/pictures/3",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/groups/0"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "picture",
|
||||||
|
"prov": [],
|
||||||
|
"captions": [
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/10"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"references": [],
|
||||||
|
"footnotes": [],
|
||||||
|
"annotations": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"tables": [],
|
||||||
|
"key_value_items": [],
|
||||||
|
"form_items": [],
|
||||||
|
"pages": {}
|
||||||
|
}
|
32
tests/data/groundtruth/docling_v2/example_09.html.md
vendored
Normal file
32
tests/data/groundtruth/docling_v2/example_09.html.md
vendored
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
# Introduction to parsing HTML files with Docling
|
||||||
|
|
||||||
|
Docling
|
||||||
|
|
||||||
|
<!-- image -->
|
||||||
|
|
||||||
|
Docling simplifies document processing, parsing diverse formats — including HTML — and providing seamless integrations with the gen AI ecosystem.
|
||||||
|
|
||||||
|
## Supported file formats
|
||||||
|
|
||||||
|
Docling supports multiple file formats..
|
||||||
|
|
||||||
|
- Advanced PDF understanding
|
||||||
|
PDF
|
||||||
|
|
||||||
|
<!-- image -->
|
||||||
|
- Microsoft Office DOCX
|
||||||
|
DOCX
|
||||||
|
|
||||||
|
<!-- image -->
|
||||||
|
- HTML files (with optional support for images)
|
||||||
|
HTML
|
||||||
|
|
||||||
|
<!-- image -->
|
||||||
|
|
||||||
|
### Three backends for handling HTML files
|
||||||
|
|
||||||
|
Docling has three backends for parsing HTML files:
|
||||||
|
|
||||||
|
1. HTMLDocumentBackend Ignores images
|
||||||
|
2. HTMLDocumentBackendImagesInline Extracts images inline
|
||||||
|
3. HTMLDocumentBackendImagesReferenced Extracts images as references
|
@ -1,458 +1,485 @@
|
|||||||
item-0 at level 0: unspecified: group _root_
|
item-0 at level 0: unspecified: group _root_
|
||||||
item-1 at level 1: section: group header-1
|
item-1 at level 1: section: group header-1
|
||||||
item-2 at level 2: section_header: Contents
|
item-2 at level 2: section_header: Contents
|
||||||
item-3 at level 3: list: group list
|
item-3 at level 3: text: move to sidebar
|
||||||
item-4 at level 4: list_item: (Top)
|
item-4 at level 3: text: hide
|
||||||
item-5 at level 4: list_item: 1 Etymology
|
item-5 at level 3: list: group list
|
||||||
item-6 at level 5: list: group list
|
item-6 at level 4: list_item: (Top)
|
||||||
item-7 at level 4: list_item: 2 Taxonomy
|
item-7 at level 4: list_item: 1 Etymology
|
||||||
item-8 at level 5: list: group list
|
item-8 at level 5: list: group list
|
||||||
item-9 at level 4: list_item: 3 Morphology
|
item-9 at level 4: list_item: 2 Taxonomy
|
||||||
item-10 at level 5: list: group list
|
item-10 at level 5: list: group list
|
||||||
item-11 at level 4: list_item: 4 Distribution and habitat
|
item-11 at level 4: list_item: 3 Morphology
|
||||||
item-12 at level 5: list: group list
|
item-12 at level 5: list: group list
|
||||||
item-13 at level 4: list_item: 5 Behaviour Toggle Behaviour subsection
|
item-13 at level 4: list_item: 4 Distribution and habitat
|
||||||
item-14 at level 5: list: group list
|
item-14 at level 5: list: group list
|
||||||
item-15 at level 6: list_item: 5.1 Feeding
|
item-15 at level 4: list_item: 5 Behaviour Toggle Behaviour subsection
|
||||||
item-16 at level 7: list: group list
|
item-16 at level 5: list: group list
|
||||||
item-17 at level 6: list_item: 5.2 Breeding
|
item-17 at level 6: list_item: 5.1 Feeding
|
||||||
item-18 at level 7: list: group list
|
item-18 at level 7: list: group list
|
||||||
item-19 at level 6: list_item: 5.3 Communication
|
item-19 at level 6: list_item: 5.2 Breeding
|
||||||
item-20 at level 7: list: group list
|
item-20 at level 7: list: group list
|
||||||
item-21 at level 6: list_item: 5.4 Predators
|
item-21 at level 6: list_item: 5.3 Communication
|
||||||
item-22 at level 7: list: group list
|
item-22 at level 7: list: group list
|
||||||
item-23 at level 4: list_item: 6 Relationship with humans Toggle Relationship with humans subsection
|
item-23 at level 6: list_item: 5.4 Predators
|
||||||
item-24 at level 5: list: group list
|
item-24 at level 7: list: group list
|
||||||
item-25 at level 6: list_item: 6.1 Hunting
|
item-25 at level 4: list_item: 6 Relationship with humans Toggle Relationship with humans subsection
|
||||||
item-26 at level 7: list: group list
|
item-26 at level 5: list: group list
|
||||||
item-27 at level 6: list_item: 6.2 Domestication
|
item-27 at level 6: list_item: 6.1 Hunting
|
||||||
item-28 at level 7: list: group list
|
item-28 at level 7: list: group list
|
||||||
item-29 at level 6: list_item: 6.3 Heraldry
|
item-29 at level 6: list_item: 6.2 Domestication
|
||||||
item-30 at level 7: list: group list
|
item-30 at level 7: list: group list
|
||||||
item-31 at level 6: list_item: 6.4 Cultural references
|
item-31 at level 6: list_item: 6.3 Heraldry
|
||||||
item-32 at level 7: list: group list
|
item-32 at level 7: list: group list
|
||||||
item-33 at level 4: list_item: 7 See also
|
item-33 at level 6: list_item: 6.4 Cultural references
|
||||||
item-34 at level 5: list: group list
|
item-34 at level 7: list: group list
|
||||||
item-35 at level 4: list_item: 8 Notes Toggle Notes subsection
|
item-35 at level 4: list_item: 7 See also
|
||||||
item-36 at level 5: list: group list
|
item-36 at level 5: list: group list
|
||||||
item-37 at level 6: list_item: 8.1 Citations
|
item-37 at level 4: list_item: 8 Notes Toggle Notes subsection
|
||||||
item-38 at level 7: list: group list
|
item-38 at level 5: list: group list
|
||||||
item-39 at level 6: list_item: 8.2 Sources
|
item-39 at level 6: list_item: 8.1 Citations
|
||||||
item-40 at level 7: list: group list
|
item-40 at level 7: list: group list
|
||||||
item-41 at level 4: list_item: 9 External links
|
item-41 at level 6: list_item: 8.2 Sources
|
||||||
item-42 at level 5: list: group list
|
item-42 at level 7: list: group list
|
||||||
item-43 at level 1: title: Duck
|
item-43 at level 4: list_item: 9 External links
|
||||||
item-44 at level 2: list: group list
|
item-44 at level 5: list: group list
|
||||||
item-45 at level 3: list_item: Acèh
|
item-45 at level 3: text: Toggle the table of contents
|
||||||
item-46 at level 3: list_item: Afrikaans
|
item-46 at level 1: title: Duck
|
||||||
item-47 at level 3: list_item: Alemannisch
|
item-47 at level 2: text: 136 languages
|
||||||
item-48 at level 3: list_item: አማርኛ
|
item-48 at level 2: list: group list
|
||||||
item-49 at level 3: list_item: Ænglisc
|
item-49 at level 3: list_item: Acèh
|
||||||
item-50 at level 3: list_item: العربية
|
item-50 at level 3: list_item: Afrikaans
|
||||||
item-51 at level 3: list_item: Aragonés
|
item-51 at level 3: list_item: Alemannisch
|
||||||
item-52 at level 3: list_item: ܐܪܡܝܐ
|
item-52 at level 3: list_item: አማርኛ
|
||||||
item-53 at level 3: list_item: Armãneashti
|
item-53 at level 3: list_item: Ænglisc
|
||||||
item-54 at level 3: list_item: Asturianu
|
item-54 at level 3: list_item: العربية
|
||||||
item-55 at level 3: list_item: Atikamekw
|
item-55 at level 3: list_item: Aragonés
|
||||||
item-56 at level 3: list_item: Авар
|
item-56 at level 3: list_item: ܐܪܡܝܐ
|
||||||
item-57 at level 3: list_item: Aymar aru
|
item-57 at level 3: list_item: Armãneashti
|
||||||
item-58 at level 3: list_item: تۆرکجه
|
item-58 at level 3: list_item: Asturianu
|
||||||
item-59 at level 3: list_item: Basa Bali
|
item-59 at level 3: list_item: Atikamekw
|
||||||
item-60 at level 3: list_item: বাংলা
|
item-60 at level 3: list_item: Авар
|
||||||
item-61 at level 3: list_item: 閩南語 / Bân-lâm-gú
|
item-61 at level 3: list_item: Aymar aru
|
||||||
item-62 at level 3: list_item: Беларуская
|
item-62 at level 3: list_item: تۆرکجه
|
||||||
item-63 at level 3: list_item: Беларуская (тарашкевіца)
|
item-63 at level 3: list_item: Basa Bali
|
||||||
item-64 at level 3: list_item: Bikol Central
|
item-64 at level 3: list_item: বাংলা
|
||||||
item-65 at level 3: list_item: Български
|
item-65 at level 3: list_item: 閩南語 / Bân-lâm-gú
|
||||||
item-66 at level 3: list_item: Brezhoneg
|
item-66 at level 3: list_item: Беларуская
|
||||||
item-67 at level 3: list_item: Буряад
|
item-67 at level 3: list_item: Беларуская (тарашкевіца)
|
||||||
item-68 at level 3: list_item: Català
|
item-68 at level 3: list_item: Bikol Central
|
||||||
item-69 at level 3: list_item: Чӑвашла
|
item-69 at level 3: list_item: Български
|
||||||
item-70 at level 3: list_item: Čeština
|
item-70 at level 3: list_item: Brezhoneg
|
||||||
item-71 at level 3: list_item: ChiShona
|
item-71 at level 3: list_item: Буряад
|
||||||
item-72 at level 3: list_item: Cymraeg
|
item-72 at level 3: list_item: Català
|
||||||
item-73 at level 3: list_item: Dagbanli
|
item-73 at level 3: list_item: Чӑвашла
|
||||||
item-74 at level 3: list_item: Dansk
|
item-74 at level 3: list_item: Čeština
|
||||||
item-75 at level 3: list_item: Deitsch
|
item-75 at level 3: list_item: ChiShona
|
||||||
item-76 at level 3: list_item: Deutsch
|
item-76 at level 3: list_item: Cymraeg
|
||||||
item-77 at level 3: list_item: डोटेली
|
item-77 at level 3: list_item: Dagbanli
|
||||||
item-78 at level 3: list_item: Ελληνικά
|
item-78 at level 3: list_item: Dansk
|
||||||
item-79 at level 3: list_item: Emiliàn e rumagnòl
|
item-79 at level 3: list_item: Deitsch
|
||||||
item-80 at level 3: list_item: Español
|
item-80 at level 3: list_item: Deutsch
|
||||||
item-81 at level 3: list_item: Esperanto
|
item-81 at level 3: list_item: डोटेली
|
||||||
item-82 at level 3: list_item: Euskara
|
item-82 at level 3: list_item: Ελληνικά
|
||||||
item-83 at level 3: list_item: فارسی
|
item-83 at level 3: list_item: Emiliàn e rumagnòl
|
||||||
item-84 at level 3: list_item: Français
|
item-84 at level 3: list_item: Español
|
||||||
item-85 at level 3: list_item: Gaeilge
|
item-85 at level 3: list_item: Esperanto
|
||||||
item-86 at level 3: list_item: Galego
|
item-86 at level 3: list_item: Euskara
|
||||||
item-87 at level 3: list_item: ГӀалгӀай
|
item-87 at level 3: list_item: فارسی
|
||||||
item-88 at level 3: list_item: 贛語
|
item-88 at level 3: list_item: Français
|
||||||
item-89 at level 3: list_item: گیلکی
|
item-89 at level 3: list_item: Gaeilge
|
||||||
item-90 at level 3: list_item: 𐌲𐌿𐍄𐌹𐍃𐌺
|
item-90 at level 3: list_item: Galego
|
||||||
item-91 at level 3: list_item: गोंयची कोंकणी / Gõychi Konknni
|
item-91 at level 3: list_item: ГӀалгӀай
|
||||||
item-92 at level 3: list_item: 客家語 / Hak-kâ-ngî
|
item-92 at level 3: list_item: 贛語
|
||||||
item-93 at level 3: list_item: 한국어
|
item-93 at level 3: list_item: گیلکی
|
||||||
item-94 at level 3: list_item: Hausa
|
item-94 at level 3: list_item: 𐌲𐌿𐍄𐌹𐍃𐌺
|
||||||
item-95 at level 3: list_item: Հայերեն
|
item-95 at level 3: list_item: गोंयची कोंकणी / Gõychi Konknni
|
||||||
item-96 at level 3: list_item: हिन्दी
|
item-96 at level 3: list_item: 客家語 / Hak-kâ-ngî
|
||||||
item-97 at level 3: list_item: Hrvatski
|
item-97 at level 3: list_item: 한국어
|
||||||
item-98 at level 3: list_item: Ido
|
item-98 at level 3: list_item: Hausa
|
||||||
item-99 at level 3: list_item: Bahasa Indonesia
|
item-99 at level 3: list_item: Հայերեն
|
||||||
item-100 at level 3: list_item: Iñupiatun
|
item-100 at level 3: list_item: हिन्दी
|
||||||
item-101 at level 3: list_item: Íslenska
|
item-101 at level 3: list_item: Hrvatski
|
||||||
item-102 at level 3: list_item: Italiano
|
item-102 at level 3: list_item: Ido
|
||||||
item-103 at level 3: list_item: עברית
|
item-103 at level 3: list_item: Bahasa Indonesia
|
||||||
item-104 at level 3: list_item: Jawa
|
item-104 at level 3: list_item: Iñupiatun
|
||||||
item-105 at level 3: list_item: ಕನ್ನಡ
|
item-105 at level 3: list_item: Íslenska
|
||||||
item-106 at level 3: list_item: Kapampangan
|
item-106 at level 3: list_item: Italiano
|
||||||
item-107 at level 3: list_item: ქართული
|
item-107 at level 3: list_item: עברית
|
||||||
item-108 at level 3: list_item: कॉशुर / کٲشُر
|
item-108 at level 3: list_item: Jawa
|
||||||
item-109 at level 3: list_item: Қазақша
|
item-109 at level 3: list_item: ಕನ್ನಡ
|
||||||
item-110 at level 3: list_item: Ikirundi
|
item-110 at level 3: list_item: Kapampangan
|
||||||
item-111 at level 3: list_item: Kongo
|
item-111 at level 3: list_item: ქართული
|
||||||
item-112 at level 3: list_item: Kreyòl ayisyen
|
item-112 at level 3: list_item: कॉशुर / کٲشُر
|
||||||
item-113 at level 3: list_item: Кырык мары
|
item-113 at level 3: list_item: Қазақша
|
||||||
item-114 at level 3: list_item: ລາວ
|
item-114 at level 3: list_item: Ikirundi
|
||||||
item-115 at level 3: list_item: Latina
|
item-115 at level 3: list_item: Kongo
|
||||||
item-116 at level 3: list_item: Latviešu
|
item-116 at level 3: list_item: Kreyòl ayisyen
|
||||||
item-117 at level 3: list_item: Lietuvių
|
item-117 at level 3: list_item: Кырык мары
|
||||||
item-118 at level 3: list_item: Li Niha
|
item-118 at level 3: list_item: ລາວ
|
||||||
item-119 at level 3: list_item: Ligure
|
item-119 at level 3: list_item: Latina
|
||||||
item-120 at level 3: list_item: Limburgs
|
item-120 at level 3: list_item: Latviešu
|
||||||
item-121 at level 3: list_item: Lingála
|
item-121 at level 3: list_item: Lietuvių
|
||||||
item-122 at level 3: list_item: Malagasy
|
item-122 at level 3: list_item: Li Niha
|
||||||
item-123 at level 3: list_item: മലയാളം
|
item-123 at level 3: list_item: Ligure
|
||||||
item-124 at level 3: list_item: मराठी
|
item-124 at level 3: list_item: Limburgs
|
||||||
item-125 at level 3: list_item: مازِرونی
|
item-125 at level 3: list_item: Lingála
|
||||||
item-126 at level 3: list_item: Bahasa Melayu
|
item-126 at level 3: list_item: Malagasy
|
||||||
item-127 at level 3: list_item: ꯃꯤꯇꯩ ꯂꯣꯟ
|
item-127 at level 3: list_item: മലയാളം
|
||||||
item-128 at level 3: list_item: 閩東語 / Mìng-dĕ̤ng-ngṳ̄
|
item-128 at level 3: list_item: मराठी
|
||||||
item-129 at level 3: list_item: Мокшень
|
item-129 at level 3: list_item: مازِرونی
|
||||||
item-130 at level 3: list_item: Монгол
|
item-130 at level 3: list_item: Bahasa Melayu
|
||||||
item-131 at level 3: list_item: မြန်မာဘာသာ
|
item-131 at level 3: list_item: ꯃꯤꯇꯩ ꯂꯣꯟ
|
||||||
item-132 at level 3: list_item: Nederlands
|
item-132 at level 3: list_item: 閩東語 / Mìng-dĕ̤ng-ngṳ̄
|
||||||
item-133 at level 3: list_item: Nedersaksies
|
item-133 at level 3: list_item: Мокшень
|
||||||
item-134 at level 3: list_item: नेपाली
|
item-134 at level 3: list_item: Монгол
|
||||||
item-135 at level 3: list_item: नेपाल भाषा
|
item-135 at level 3: list_item: မြန်မာဘာသာ
|
||||||
item-136 at level 3: list_item: 日本語
|
item-136 at level 3: list_item: Nederlands
|
||||||
item-137 at level 3: list_item: Нохчийн
|
item-137 at level 3: list_item: Nedersaksies
|
||||||
item-138 at level 3: list_item: Norsk nynorsk
|
item-138 at level 3: list_item: नेपाली
|
||||||
item-139 at level 3: list_item: Occitan
|
item-139 at level 3: list_item: नेपाल भाषा
|
||||||
item-140 at level 3: list_item: Oromoo
|
item-140 at level 3: list_item: 日本語
|
||||||
item-141 at level 3: list_item: ਪੰਜਾਬੀ
|
item-141 at level 3: list_item: Нохчийн
|
||||||
item-142 at level 3: list_item: Picard
|
item-142 at level 3: list_item: Norsk nynorsk
|
||||||
item-143 at level 3: list_item: Plattdüütsch
|
item-143 at level 3: list_item: Occitan
|
||||||
item-144 at level 3: list_item: Polski
|
item-144 at level 3: list_item: Oromoo
|
||||||
item-145 at level 3: list_item: Português
|
item-145 at level 3: list_item: ਪੰਜਾਬੀ
|
||||||
item-146 at level 3: list_item: Qırımtatarca
|
item-146 at level 3: list_item: Picard
|
||||||
item-147 at level 3: list_item: Română
|
item-147 at level 3: list_item: Plattdüütsch
|
||||||
item-148 at level 3: list_item: Русский
|
item-148 at level 3: list_item: Polski
|
||||||
item-149 at level 3: list_item: Саха тыла
|
item-149 at level 3: list_item: Português
|
||||||
item-150 at level 3: list_item: ᱥᱟᱱᱛᱟᱲᱤ
|
item-150 at level 3: list_item: Qırımtatarca
|
||||||
item-151 at level 3: list_item: Sardu
|
item-151 at level 3: list_item: Română
|
||||||
item-152 at level 3: list_item: Scots
|
item-152 at level 3: list_item: Русский
|
||||||
item-153 at level 3: list_item: Seeltersk
|
item-153 at level 3: list_item: Саха тыла
|
||||||
item-154 at level 3: list_item: Shqip
|
item-154 at level 3: list_item: ᱥᱟᱱᱛᱟᱲᱤ
|
||||||
item-155 at level 3: list_item: Sicilianu
|
item-155 at level 3: list_item: Sardu
|
||||||
item-156 at level 3: list_item: සිංහල
|
item-156 at level 3: list_item: Scots
|
||||||
item-157 at level 3: list_item: Simple English
|
item-157 at level 3: list_item: Seeltersk
|
||||||
item-158 at level 3: list_item: سنڌي
|
item-158 at level 3: list_item: Shqip
|
||||||
item-159 at level 3: list_item: کوردی
|
item-159 at level 3: list_item: Sicilianu
|
||||||
item-160 at level 3: list_item: Српски / srpski
|
item-160 at level 3: list_item: සිංහල
|
||||||
item-161 at level 3: list_item: Srpskohrvatski / српскохрватски
|
item-161 at level 3: list_item: Simple English
|
||||||
item-162 at level 3: list_item: Sunda
|
item-162 at level 3: list_item: سنڌي
|
||||||
item-163 at level 3: list_item: Svenska
|
item-163 at level 3: list_item: کوردی
|
||||||
item-164 at level 3: list_item: Tagalog
|
item-164 at level 3: list_item: Српски / srpski
|
||||||
item-165 at level 3: list_item: தமிழ்
|
item-165 at level 3: list_item: Srpskohrvatski / српскохрватски
|
||||||
item-166 at level 3: list_item: Taqbaylit
|
item-166 at level 3: list_item: Sunda
|
||||||
item-167 at level 3: list_item: Татарча / tatarça
|
item-167 at level 3: list_item: Svenska
|
||||||
item-168 at level 3: list_item: ไทย
|
item-168 at level 3: list_item: Tagalog
|
||||||
item-169 at level 3: list_item: Türkçe
|
item-169 at level 3: list_item: தமிழ்
|
||||||
item-170 at level 3: list_item: Українська
|
item-170 at level 3: list_item: Taqbaylit
|
||||||
item-171 at level 3: list_item: ئۇيغۇرچە / Uyghurche
|
item-171 at level 3: list_item: Татарча / tatarça
|
||||||
item-172 at level 3: list_item: Vahcuengh
|
item-172 at level 3: list_item: ไทย
|
||||||
item-173 at level 3: list_item: Tiếng Việt
|
item-173 at level 3: list_item: Türkçe
|
||||||
item-174 at level 3: list_item: Walon
|
item-174 at level 3: list_item: Українська
|
||||||
item-175 at level 3: list_item: 文言
|
item-175 at level 3: list_item: ئۇيغۇرچە / Uyghurche
|
||||||
item-176 at level 3: list_item: Winaray
|
item-176 at level 3: list_item: Vahcuengh
|
||||||
item-177 at level 3: list_item: 吴语
|
item-177 at level 3: list_item: Tiếng Việt
|
||||||
item-178 at level 3: list_item: 粵語
|
item-178 at level 3: list_item: Walon
|
||||||
item-179 at level 3: list_item: Žemaitėška
|
item-179 at level 3: list_item: 文言
|
||||||
item-180 at level 3: list_item: 中文
|
item-180 at level 3: list_item: Winaray
|
||||||
item-181 at level 2: list: group list
|
item-181 at level 3: list_item: 吴语
|
||||||
item-182 at level 3: list_item: Article
|
item-182 at level 3: list_item: 粵語
|
||||||
item-183 at level 3: list_item: Talk
|
item-183 at level 3: list_item: Žemaitėška
|
||||||
item-184 at level 2: list: group list
|
item-184 at level 3: list_item: 中文
|
||||||
item-185 at level 2: list: group list
|
item-185 at level 2: text: Edit links
|
||||||
item-186 at level 3: list_item: Read
|
item-186 at level 2: list: group list
|
||||||
item-187 at level 3: list_item: View source
|
item-187 at level 3: list_item: Article
|
||||||
item-188 at level 3: list_item: View history
|
item-188 at level 3: list_item: Talk
|
||||||
item-189 at level 2: text: Tools
|
item-189 at level 2: text: English
|
||||||
item-190 at level 2: text: Actions
|
item-190 at level 2: list: group list
|
||||||
item-191 at level 2: list: group list
|
item-191 at level 2: list: group list
|
||||||
item-192 at level 3: list_item: Read
|
item-192 at level 3: list_item: Read
|
||||||
item-193 at level 3: list_item: View source
|
item-193 at level 3: list_item: View source
|
||||||
item-194 at level 3: list_item: View history
|
item-194 at level 3: list_item: View history
|
||||||
item-195 at level 2: text: General
|
item-195 at level 2: text: Tools
|
||||||
item-196 at level 2: list: group list
|
item-196 at level 2: text: Tools
|
||||||
item-197 at level 3: list_item: What links here
|
item-197 at level 2: text: move to sidebar
|
||||||
item-198 at level 3: list_item: Related changes
|
item-198 at level 2: text: hide
|
||||||
item-199 at level 3: list_item: Upload file
|
item-199 at level 2: text: Actions
|
||||||
item-200 at level 3: list_item: Special pages
|
item-200 at level 2: list: group list
|
||||||
item-201 at level 3: list_item: Permanent link
|
item-201 at level 3: list_item: Read
|
||||||
item-202 at level 3: list_item: Page information
|
item-202 at level 3: list_item: View source
|
||||||
item-203 at level 3: list_item: Cite this page
|
item-203 at level 3: list_item: View history
|
||||||
item-204 at level 3: list_item: Get shortened URL
|
item-204 at level 2: text: General
|
||||||
item-205 at level 3: list_item: Download QR code
|
item-205 at level 2: list: group list
|
||||||
item-206 at level 3: list_item: Wikidata item
|
item-206 at level 3: list_item: What links here
|
||||||
item-207 at level 2: text: Print/export
|
item-207 at level 3: list_item: Related changes
|
||||||
item-208 at level 2: list: group list
|
item-208 at level 3: list_item: Upload file
|
||||||
item-209 at level 3: list_item: Download as PDF
|
item-209 at level 3: list_item: Special pages
|
||||||
item-210 at level 3: list_item: Printable version
|
item-210 at level 3: list_item: Permanent link
|
||||||
item-211 at level 2: text: In other projects
|
item-211 at level 3: list_item: Page information
|
||||||
item-212 at level 2: list: group list
|
item-212 at level 3: list_item: Cite this page
|
||||||
item-213 at level 3: list_item: Wikimedia Commons
|
item-213 at level 3: list_item: Get shortened URL
|
||||||
item-214 at level 3: list_item: Wikiquote
|
item-214 at level 3: list_item: Download QR code
|
||||||
item-215 at level 2: text: Appearance
|
item-215 at level 3: list_item: Wikidata item
|
||||||
item-216 at level 2: picture
|
item-216 at level 2: text: Print/export
|
||||||
item-217 at level 2: text: From Wikipedia, the free encyclopedia
|
item-217 at level 2: list: group list
|
||||||
item-218 at level 2: text: Common name for many species of bird
|
item-218 at level 3: list_item: Download as PDF
|
||||||
item-219 at level 2: text: This article is about the bird. ... as a food, see . For other uses, see .
|
item-219 at level 3: list_item: Printable version
|
||||||
item-220 at level 2: text: "Duckling" redirects here. For other uses, see .
|
item-220 at level 2: text: In other projects
|
||||||
item-221 at level 2: table with [13x2]
|
item-221 at level 2: list: group list
|
||||||
item-222 at level 2: text: Duck is the common name for nume ... und in both fresh water and sea water.
|
item-222 at level 3: list_item: Wikimedia Commons
|
||||||
item-223 at level 2: text: Ducks are sometimes confused wit ... divers, grebes, gallinules and coots.
|
item-223 at level 3: list_item: Wikiquote
|
||||||
item-224 at level 2: section_header: Etymology
|
item-224 at level 2: text: Appearance
|
||||||
item-225 at level 3: text: The word duck comes from Old Eng ... h duiken and German tauchen 'to dive'.
|
item-225 at level 2: text: move to sidebar
|
||||||
item-226 at level 3: picture
|
item-226 at level 2: text: hide
|
||||||
item-226 at level 4: caption: Pacific black duck displaying the characteristic upending "duck"
|
item-227 at level 2: text: From Wikipedia, the free encyclopedia
|
||||||
item-227 at level 3: text: This word replaced Old English e ... nskrit ātí 'water bird', among others.
|
item-228 at level 2: text: (Redirected from Duckling)
|
||||||
item-228 at level 3: text: A duckling is a young duck in do ... , is sometimes labelled as a duckling.
|
item-229 at level 2: text: Common name for many species of bird
|
||||||
item-229 at level 3: text: A male is called a drake and the ... a duck, or in ornithology a hen.[3][4]
|
item-230 at level 2: text: This article is about the bird. ... other uses, see Duck (disambiguation).
|
||||||
item-230 at level 3: picture
|
item-231 at level 2: text: "Duckling" redirects here. For other uses, see Duckling (disambiguation).
|
||||||
item-230 at level 4: caption: Male mallard.
|
item-232 at level 2: picture
|
||||||
item-231 at level 3: picture
|
item-233 at level 2: picture
|
||||||
item-231 at level 4: caption: Wood ducks.
|
item-234 at level 2: table with [13x2]
|
||||||
item-232 at level 2: section_header: Taxonomy
|
item-235 at level 2: text: Duck is the common name for nume ... und in both fresh water and sea water.
|
||||||
item-233 at level 3: text: All ducks belong to the biologic ... ationships between various species.[9]
|
item-236 at level 2: text: Ducks are sometimes confused wit ... divers, grebes, gallinules and coots.
|
||||||
item-234 at level 3: picture
|
item-237 at level 2: section_header: Etymology
|
||||||
item-234 at level 4: caption: Mallard landing in approach
|
item-238 at level 3: text: The word duck comes from Old Eng ... h duiken and German tauchen 'to dive'.
|
||||||
item-235 at level 3: text: In most modern classifications, ... all size and stiff, upright tails.[14]
|
item-239 at level 3: picture
|
||||||
item-236 at level 3: text: A number of other species called ... shelducks in the tribe Tadornini.[15]
|
item-239 at level 4: caption: Pacific black duck displaying the characteristic upending "duck"
|
||||||
item-237 at level 2: section_header: Morphology
|
item-240 at level 3: text: This word replaced Old English e ... nskrit ātí 'water bird', among others.
|
||||||
item-238 at level 3: picture
|
item-241 at level 3: text: A duckling is a young duck in do ... , is sometimes labelled as a duckling.
|
||||||
item-238 at level 4: caption: Male Mandarin duck
|
item-242 at level 3: text: A male is called a drake and the ... a duck, or in ornithology a hen.[3][4]
|
||||||
item-239 at level 3: text: The overall body plan of ducks i ... is moult typically precedes migration.
|
item-243 at level 3: picture
|
||||||
item-240 at level 3: text: The drakes of northern species o ... rkscrew shaped vagina to prevent rape.
|
item-243 at level 4: caption: Male mallard.
|
||||||
item-241 at level 2: section_header: Distribution and habitat
|
|
||||||
item-242 at level 3: picture
|
|
||||||
item-242 at level 4: caption: Flying steamer ducks in Ushuaia, Argentina
|
|
||||||
item-243 at level 3: text: Ducks have a cosmopolitan distri ... endemic to such far-flung islands.[21]
|
|
||||||
item-244 at level 3: picture
|
item-244 at level 3: picture
|
||||||
item-244 at level 4: caption: Female mallard in Cornwall, England
|
item-244 at level 4: caption: Wood ducks.
|
||||||
item-245 at level 3: text: Some duck species, mainly those ... t form after localised heavy rain.[23]
|
item-245 at level 2: section_header: Taxonomy
|
||||||
item-246 at level 2: section_header: Behaviour
|
item-246 at level 3: text: All ducks belong to the biologic ... ationships between various species.[9]
|
||||||
item-247 at level 3: section_header: Feeding
|
item-247 at level 3: picture
|
||||||
item-248 at level 4: picture
|
item-247 at level 4: caption: Mallard landing in approach
|
||||||
item-248 at level 5: caption: Pecten along the bill
|
item-248 at level 3: text: In most modern classifications, ... all size and stiff, upright tails.[14]
|
||||||
item-249 at level 4: picture
|
item-249 at level 3: text: A number of other species called ... shelducks in the tribe Tadornini.[15]
|
||||||
item-249 at level 5: caption: Mallard duckling preening
|
item-250 at level 2: section_header: Morphology
|
||||||
item-250 at level 4: text: Ducks eat food sources such as g ... amphibians, worms, and small molluscs.
|
item-251 at level 3: picture
|
||||||
item-251 at level 4: text: Dabbling ducks feed on the surfa ... thers and to hold slippery food items.
|
item-251 at level 4: caption: Male Mandarin duck
|
||||||
item-252 at level 4: text: Diving ducks and sea ducks forag ... ave more difficulty taking off to fly.
|
item-252 at level 3: text: The overall body plan of ducks i ... is moult typically precedes migration.
|
||||||
item-253 at level 4: text: A few specialized species such a ... apted to catch and swallow large fish.
|
item-253 at level 3: text: The drakes of northern species o ... rkscrew shaped vagina to prevent rape.
|
||||||
item-254 at level 4: text: The others have the characterist ... e nostrils come out through hard horn.
|
item-254 at level 2: section_header: Distribution and habitat
|
||||||
item-255 at level 4: text: The Guardian published an articl ... the ducks and pollutes waterways.[25]
|
item-255 at level 3: text: See also: List of Anseriformes by population
|
||||||
item-256 at level 3: section_header: Breeding
|
item-256 at level 3: picture
|
||||||
item-257 at level 4: picture
|
item-256 at level 4: caption: Flying steamer ducks in Ushuaia, Argentina
|
||||||
item-257 at level 5: caption: A Muscovy duckling
|
item-257 at level 3: text: Ducks have a cosmopolitan distri ... endemic to such far-flung islands.[21]
|
||||||
item-258 at level 4: text: Ducks generally only have one pa ... st and led her ducklings to water.[28]
|
item-258 at level 3: picture
|
||||||
item-259 at level 3: section_header: Communication
|
item-258 at level 4: caption: Female mallard in Cornwall, England
|
||||||
item-260 at level 4: text: Female mallard ducks (as well as ... laying calls or quieter contact calls.
|
item-259 at level 3: text: Some duck species, mainly those ... t form after localised heavy rain.[23]
|
||||||
item-261 at level 4: text: A common urban legend claims tha ... annel television show MythBusters.[32]
|
item-260 at level 2: section_header: Behaviour
|
||||||
item-262 at level 3: section_header: Predators
|
item-261 at level 3: section_header: Feeding
|
||||||
item-263 at level 4: picture
|
item-262 at level 4: picture
|
||||||
item-263 at level 5: caption: Ringed teal
|
item-262 at level 5: caption: Pecten along the bill
|
||||||
item-264 at level 4: text: Ducks have many predators. Duckl ... or large birds, such as hawks or owls.
|
item-263 at level 4: text: Ducks eat food sources such as g ... amphibians, worms, and small molluscs.
|
||||||
item-265 at level 4: text: Adult ducks are fast fliers, but ... its speed and strength to catch ducks.
|
item-264 at level 4: text: Dabbling ducks feed on the surfa ... thers and to hold slippery food items.
|
||||||
item-266 at level 2: section_header: Relationship with humans
|
item-265 at level 4: text: Diving ducks and sea ducks forag ... ave more difficulty taking off to fly.
|
||||||
item-267 at level 3: section_header: Hunting
|
item-266 at level 4: text: A few specialized species such a ... apted to catch and swallow large fish.
|
||||||
item-268 at level 4: text: Humans have hunted ducks since p ... evidence of this is uncommon.[35][42]
|
item-267 at level 4: text: The others have the characterist ... e nostrils come out through hard horn.
|
||||||
item-269 at level 4: text: In many areas, wild ducks (inclu ... inated by pollutants such as PCBs.[44]
|
item-268 at level 4: text: The Guardian published an articl ... the ducks and pollutes waterways.[25]
|
||||||
item-270 at level 3: section_header: Domestication
|
item-269 at level 3: section_header: Breeding
|
||||||
item-271 at level 4: picture
|
item-270 at level 4: picture
|
||||||
item-271 at level 5: caption: Indian Runner ducks, a common breed of domestic ducks
|
item-270 at level 5: caption: A Muscovy duckling
|
||||||
item-272 at level 4: text: Ducks have many economic uses, b ... it weighs less than 1 kg (2.2 lb).[48]
|
item-271 at level 4: text: Ducks generally only have one pa ... st and led her ducklings to water.[28]
|
||||||
item-273 at level 3: section_header: Heraldry
|
item-272 at level 3: section_header: Communication
|
||||||
item-274 at level 4: picture
|
item-273 at level 4: text: Female mallard ducks (as well as ... laying calls or quieter contact calls.
|
||||||
item-274 at level 5: caption: Three black-colored ducks in the coat of arms of Maaninka[49]
|
item-274 at level 4: text: A common urban legend claims tha ... annel television show MythBusters.[32]
|
||||||
item-275 at level 4: text: Ducks appear on several coats of ... the coat of arms of Föglö (Åland).[51]
|
item-275 at level 3: section_header: Predators
|
||||||
item-276 at level 3: section_header: Cultural references
|
item-276 at level 4: picture
|
||||||
item-277 at level 4: text: In 2002, psychologist Richard Wi ... 54] and was made into a movie in 1986.
|
item-276 at level 5: caption: Ringed teal
|
||||||
item-278 at level 4: text: The 1992 Disney film The Mighty ... Ducks minor league baseball team.[55]
|
item-277 at level 4: text: Ducks have many predators. Duckl ... or large birds, such as hawks or owls.
|
||||||
item-279 at level 2: section_header: See also
|
item-278 at level 4: text: Adult ducks are fast fliers, but ... its speed and strength to catch ducks.
|
||||||
item-280 at level 3: list: group list
|
item-279 at level 2: section_header: Relationship with humans
|
||||||
item-281 at level 4: list_item: Birds portal
|
item-280 at level 3: section_header: Hunting
|
||||||
item-282 at level 3: list: group list
|
item-281 at level 4: text: Main article: Waterfowl hunting
|
||||||
item-283 at level 4: list_item: Domestic duck
|
item-282 at level 4: text: Humans have hunted ducks since p ... evidence of this is uncommon.[35][42]
|
||||||
item-284 at level 4: list_item: Duck as food
|
item-283 at level 4: text: In many areas, wild ducks (inclu ... inated by pollutants such as PCBs.[44]
|
||||||
item-285 at level 4: list_item: Duck test
|
item-284 at level 3: section_header: Domestication
|
||||||
item-286 at level 4: list_item: Duck breeds
|
item-285 at level 4: text: Main article: Domestic duck
|
||||||
item-287 at level 4: list_item: Fictional ducks
|
item-286 at level 4: picture
|
||||||
item-288 at level 4: list_item: Rubber duck
|
item-286 at level 5: caption: Indian Runner ducks, a common breed of domestic ducks
|
||||||
item-289 at level 2: section_header: Notes
|
item-287 at level 4: text: Ducks have many economic uses, b ... it weighs less than 1 kg (2.2 lb).[48]
|
||||||
item-290 at level 3: section_header: Citations
|
item-288 at level 3: section_header: Heraldry
|
||||||
item-291 at level 4: list: group ordered list
|
item-289 at level 4: picture
|
||||||
item-292 at level 5: list_item: ^ "Duckling". The American Herit ... n Company. 2006. Retrieved 2015-05-22.
|
item-289 at level 5: caption: Three black-colored ducks in the coat of arms of Maaninka[49]
|
||||||
item-293 at level 5: list_item: ^ "Duckling". Kernerman English ... Ltd. 2000–2006. Retrieved 2015-05-22.
|
item-290 at level 4: text: Ducks appear on several coats of ... the coat of arms of Föglö (Åland).[51]
|
||||||
item-294 at level 5: list_item: ^ Dohner, Janet Vorwald (2001). ... University Press. ISBN 978-0300138139.
|
item-291 at level 3: section_header: Cultural references
|
||||||
item-295 at level 5: list_item: ^ Visca, Curt; Visca, Kelley (20 ... Publishing Group. ISBN 9780823961566.
|
item-292 at level 4: text: In 2002, psychologist Richard Wi ... 54] and was made into a movie in 1986.
|
||||||
item-296 at level 5: list_item: ^ a b c d Carboneras 1992, p. 536.
|
item-293 at level 4: text: The 1992 Disney film The Mighty ... Ducks minor league baseball team.[55]
|
||||||
item-297 at level 5: list_item: ^ Livezey 1986, pp. 737–738.
|
item-294 at level 2: section_header: See also
|
||||||
item-298 at level 5: list_item: ^ Madsen, McHugh & de Kloet 1988, p. 452.
|
item-295 at level 3: list: group list
|
||||||
item-299 at level 5: list_item: ^ Donne-Goussé, Laudet & Hänni 2002, pp. 353–354.
|
item-296 at level 4: list_item: Birds portal
|
||||||
item-300 at level 5: list_item: ^ a b c d e f Carboneras 1992, p. 540.
|
item-297 at level 4: picture
|
||||||
item-301 at level 5: list_item: ^ Elphick, Dunning & Sibley 2001, p. 191.
|
item-298 at level 3: list: group list
|
||||||
item-302 at level 5: list_item: ^ Kear 2005, p. 448.
|
item-299 at level 4: list_item: Domestic duck
|
||||||
item-303 at level 5: list_item: ^ Kear 2005, p. 622–623.
|
item-300 at level 4: list_item: Duck as food
|
||||||
item-304 at level 5: list_item: ^ Kear 2005, p. 686.
|
item-301 at level 4: list_item: Duck test
|
||||||
item-305 at level 5: list_item: ^ Elphick, Dunning & Sibley 2001, p. 193.
|
item-302 at level 4: list_item: Duck breeds
|
||||||
item-306 at level 5: list_item: ^ a b c d e f g Carboneras 1992, p. 537.
|
item-303 at level 4: list_item: Fictional ducks
|
||||||
item-307 at level 5: list_item: ^ American Ornithologists' Union 1998, p. xix.
|
item-304 at level 4: list_item: Rubber duck
|
||||||
item-308 at level 5: list_item: ^ American Ornithologists' Union 1998.
|
item-305 at level 2: section_header: Notes
|
||||||
item-309 at level 5: list_item: ^ Carboneras 1992, p. 538.
|
item-306 at level 3: section_header: Citations
|
||||||
item-310 at level 5: list_item: ^ Christidis & Boles 2008, p. 62.
|
item-307 at level 4: list: group ordered list
|
||||||
item-311 at level 5: list_item: ^ Shirihai 2008, pp. 239, 245.
|
item-308 at level 5: list_item: ^ "Duckling". The American Herit ... n Company. 2006. Retrieved 2015-05-22.
|
||||||
item-312 at level 5: list_item: ^ a b Pratt, Bruner & Berrett 1987, pp. 98–107.
|
item-309 at level 5: list_item: ^ "Duckling". Kernerman English ... Ltd. 2000–2006. Retrieved 2015-05-22.
|
||||||
item-313 at level 5: list_item: ^ Fitter, Fitter & Hosking 2000, pp. 52–3.
|
item-310 at level 5: list_item: ^ Dohner, Janet Vorwald (2001). ... University Press. ISBN 978-0300138139.
|
||||||
item-314 at level 5: list_item: ^ "Pacific Black Duck". www.wiresnr.org. Retrieved 2018-04-27.
|
item-311 at level 5: list_item: ^ Visca, Curt; Visca, Kelley (20 ... Publishing Group. ISBN 9780823961566.
|
||||||
item-315 at level 5: list_item: ^ Ogden, Evans. "Dabbling Ducks". CWE. Retrieved 2006-11-02.
|
item-312 at level 5: list_item: ^ a b c d Carboneras 1992, p. 536.
|
||||||
item-316 at level 5: list_item: ^ Karl Mathiesen (16 March 2015) ... Guardian. Retrieved 13 November 2016.
|
item-313 at level 5: list_item: ^ Livezey 1986, pp. 737–738.
|
||||||
item-317 at level 5: list_item: ^ Rohwer, Frank C.; Anderson, Mi ... 4615-6787-5_4. ISBN 978-1-4615-6789-9.
|
item-314 at level 5: list_item: ^ Madsen, McHugh & de Kloet 1988, p. 452.
|
||||||
item-318 at level 5: list_item: ^ Smith, Cyndi M.; Cooke, Fred; ... 093/condor/102.1.201. hdl:10315/13797.
|
item-315 at level 5: list_item: ^ Donne-Goussé, Laudet & Hänni 2002, pp. 353–354.
|
||||||
item-319 at level 5: list_item: ^ "If You Find An Orphaned Duckl ... l on 2018-09-23. Retrieved 2018-12-22.
|
item-316 at level 5: list_item: ^ a b c d e f Carboneras 1992, p. 540.
|
||||||
item-320 at level 5: list_item: ^ Carver, Heather (2011). The Du ... 9780557901562.[self-published source]
|
item-317 at level 5: list_item: ^ Elphick, Dunning & Sibley 2001, p. 191.
|
||||||
item-321 at level 5: list_item: ^ Titlow, Budd (2013-09-03). Bir ... man & Littlefield. ISBN 9780762797707.
|
item-318 at level 5: list_item: ^ Kear 2005, p. 448.
|
||||||
item-322 at level 5: list_item: ^ Amos, Jonathan (2003-09-08). " ... kers". BBC News. Retrieved 2006-11-02.
|
item-319 at level 5: list_item: ^ Kear 2005, p. 622–623.
|
||||||
item-323 at level 5: list_item: ^ "Mythbusters Episode 8". 12 December 2003.
|
item-320 at level 5: list_item: ^ Kear 2005, p. 686.
|
||||||
item-324 at level 5: list_item: ^ Erlandson 1994, p. 171.
|
item-321 at level 5: list_item: ^ Elphick, Dunning & Sibley 2001, p. 193.
|
||||||
item-325 at level 5: list_item: ^ Jeffries 2008, pp. 168, 243.
|
item-322 at level 5: list_item: ^ a b c d e f g Carboneras 1992, p. 537.
|
||||||
item-326 at level 5: list_item: ^ a b Sued-Badillo 2003, p. 65.
|
item-323 at level 5: list_item: ^ American Ornithologists' Union 1998, p. xix.
|
||||||
item-327 at level 5: list_item: ^ Thorpe 1996, p. 68.
|
item-324 at level 5: list_item: ^ American Ornithologists' Union 1998.
|
||||||
item-328 at level 5: list_item: ^ Maisels 1999, p. 42.
|
item-325 at level 5: list_item: ^ Carboneras 1992, p. 538.
|
||||||
item-329 at level 5: list_item: ^ Rau 1876, p. 133.
|
item-326 at level 5: list_item: ^ Christidis & Boles 2008, p. 62.
|
||||||
item-330 at level 5: list_item: ^ Higman 2012, p. 23.
|
item-327 at level 5: list_item: ^ Shirihai 2008, pp. 239, 245.
|
||||||
item-331 at level 5: list_item: ^ Hume 2012, p. 53.
|
item-328 at level 5: list_item: ^ a b Pratt, Bruner & Berrett 1987, pp. 98–107.
|
||||||
item-332 at level 5: list_item: ^ Hume 2012, p. 52.
|
item-329 at level 5: list_item: ^ Fitter, Fitter & Hosking 2000, pp. 52–3.
|
||||||
item-333 at level 5: list_item: ^ Fieldhouse 2002, p. 167.
|
item-330 at level 5: list_item: ^ "Pacific Black Duck". www.wiresnr.org. Retrieved 2018-04-27.
|
||||||
item-334 at level 5: list_item: ^ Livingston, A. D. (1998-01-01) ... Editions, Limited. ISBN 9781853263774.
|
item-331 at level 5: list_item: ^ Ogden, Evans. "Dabbling Ducks". CWE. Retrieved 2006-11-02.
|
||||||
item-335 at level 5: list_item: ^ "Study plan for waterfowl inju ... on 2022-10-09. Retrieved 2 July 2019.
|
item-332 at level 5: list_item: ^ Karl Mathiesen (16 March 2015) ... Guardian. Retrieved 13 November 2016.
|
||||||
item-336 at level 5: list_item: ^ "FAOSTAT". www.fao.org. Retrieved 2019-10-25.
|
item-333 at level 5: list_item: ^ Rohwer, Frank C.; Anderson, Mi ... 4615-6787-5_4. ISBN 978-1-4615-6789-9.
|
||||||
item-337 at level 5: list_item: ^ "Anas platyrhynchos, Domestic ... . Digimorph.org. Retrieved 2012-12-23.
|
item-334 at level 5: list_item: ^ Smith, Cyndi M.; Cooke, Fred; ... 093/condor/102.1.201. hdl:10315/13797.
|
||||||
item-338 at level 5: list_item: ^ Sy Montgomery. "Mallard; Encyc ... Britannica.com. Retrieved 2012-12-23.
|
item-335 at level 5: list_item: ^ "If You Find An Orphaned Duckl ... l on 2018-09-23. Retrieved 2018-12-22.
|
||||||
item-339 at level 5: list_item: ^ Glenday, Craig (2014). Guinnes ... ited. pp. 135. ISBN 978-1-908843-15-9.
|
item-336 at level 5: list_item: ^ Carver, Heather (2011). The Du ... 9780557901562.[self-published source]
|
||||||
item-340 at level 5: list_item: ^ Suomen kunnallisvaakunat (in F ... tto. 1982. p. 147. ISBN 951-773-085-3.
|
item-337 at level 5: list_item: ^ Titlow, Budd (2013-09-03). Bir ... man & Littlefield. ISBN 9780762797707.
|
||||||
item-341 at level 5: list_item: ^ "Lubānas simbolika" (in Latvian). Retrieved September 9, 2021.
|
item-338 at level 5: list_item: ^ Amos, Jonathan (2003-09-08). " ... kers". BBC News. Retrieved 2006-11-02.
|
||||||
item-342 at level 5: list_item: ^ "Föglö" (in Swedish). Retrieved September 9, 2021.
|
item-339 at level 5: list_item: ^ "Mythbusters Episode 8". 12 December 2003.
|
||||||
item-343 at level 5: list_item: ^ Young, Emma. "World's funniest ... w Scientist. Retrieved 7 January 2019.
|
item-340 at level 5: list_item: ^ Erlandson 1994, p. 171.
|
||||||
item-344 at level 5: list_item: ^ "Howard the Duck (character)". Grand Comics Database.
|
item-341 at level 5: list_item: ^ Jeffries 2008, pp. 168, 243.
|
||||||
item-345 at level 5: list_item: ^ Sanderson, Peter; Gilbert, Lau ... luding this bad-tempered talking duck.
|
item-342 at level 5: list_item: ^ a b Sued-Badillo 2003, p. 65.
|
||||||
item-346 at level 5: list_item: ^ "The Duck". University of Oregon Athletics. Retrieved 2022-01-20.
|
item-343 at level 5: list_item: ^ Thorpe 1996, p. 68.
|
||||||
item-347 at level 3: section_header: Sources
|
item-344 at level 5: list_item: ^ Maisels 1999, p. 42.
|
||||||
item-348 at level 4: list: group list
|
item-345 at level 5: list_item: ^ Rau 1876, p. 133.
|
||||||
item-349 at level 5: list_item: American Ornithologists' Union ( ... (PDF) from the original on 2022-10-09.
|
item-346 at level 5: list_item: ^ Higman 2012, p. 23.
|
||||||
item-350 at level 5: list_item: Carboneras, Carlos (1992). del H ... Lynx Edicions. ISBN 978-84-87334-10-8.
|
item-347 at level 5: list_item: ^ Hume 2012, p. 53.
|
||||||
item-351 at level 5: list_item: Christidis, Les; Boles, Walter E ... ro Publishing. ISBN 978-0-643-06511-6.
|
item-348 at level 5: list_item: ^ Hume 2012, p. 52.
|
||||||
item-352 at level 5: list_item: Donne-Goussé, Carole; Laudet, Vi ... /S1055-7903(02)00019-2. PMID 12099792.
|
item-349 at level 5: list_item: ^ Fieldhouse 2002, p. 167.
|
||||||
item-353 at level 5: list_item: Elphick, Chris; Dunning, John B. ... istopher Helm. ISBN 978-0-7136-6250-4.
|
item-350 at level 5: list_item: ^ Livingston, A. D. (1998-01-01) ... Editions, Limited. ISBN 9781853263774.
|
||||||
item-354 at level 5: list_item: Erlandson, Jon M. (1994). Early ... usiness Media. ISBN 978-1-4419-3231-0.
|
item-351 at level 5: list_item: ^ "Study plan for waterfowl inju ... on 2022-10-09. Retrieved 2 July 2019.
|
||||||
item-355 at level 5: list_item: Fieldhouse, Paul (2002). Food, F ... ara: ABC-CLIO. ISBN 978-1-61069-412-4.
|
item-352 at level 5: list_item: ^ "FAOSTAT". www.fao.org. Retrieved 2019-10-25.
|
||||||
item-356 at level 5: list_item: Fitter, Julian; Fitter, Daniel; ... versity Press. ISBN 978-0-691-10295-5.
|
item-353 at level 5: list_item: ^ "Anas platyrhynchos, Domestic ... . Digimorph.org. Retrieved 2012-12-23.
|
||||||
item-357 at level 5: list_item: Higman, B. W. (2012). How Food M ... Wiley & Sons. ISBN 978-1-4051-8947-7.
|
item-354 at level 5: list_item: ^ Sy Montgomery. "Mallard; Encyc ... Britannica.com. Retrieved 2012-12-23.
|
||||||
item-358 at level 5: list_item: Hume, Julian H. (2012). Extinct ... istopher Helm. ISBN 978-1-4729-3744-5.
|
item-355 at level 5: list_item: ^ Glenday, Craig (2014). Guinnes ... ited. pp. 135. ISBN 978-1-908843-15-9.
|
||||||
item-359 at level 5: list_item: Jeffries, Richard (2008). Holoce ... Alabama Press. ISBN 978-0-8173-1658-7.
|
item-356 at level 5: list_item: ^ Suomen kunnallisvaakunat (in F ... tto. 1982. p. 147. ISBN 951-773-085-3.
|
||||||
item-360 at level 5: list_item: Kear, Janet, ed. (2005). Ducks, ... versity Press. ISBN 978-0-19-861009-0.
|
item-357 at level 5: list_item: ^ "Lubānas simbolika" (in Latvian). Retrieved September 9, 2021.
|
||||||
item-361 at level 5: list_item: Livezey, Bradley C. (October 198 ... (PDF) from the original on 2022-10-09.
|
item-358 at level 5: list_item: ^ "Föglö" (in Swedish). Retrieved September 9, 2021.
|
||||||
item-362 at level 5: list_item: Madsen, Cort S.; McHugh, Kevin P ... (PDF) from the original on 2022-10-09.
|
item-359 at level 5: list_item: ^ Young, Emma. "World's funniest ... w Scientist. Retrieved 7 January 2019.
|
||||||
item-363 at level 5: list_item: Maisels, Charles Keith (1999). E ... on: Routledge. ISBN 978-0-415-10975-8.
|
item-360 at level 5: list_item: ^ "Howard the Duck (character)". Grand Comics Database.
|
||||||
item-364 at level 5: list_item: Pratt, H. Douglas; Bruner, Phill ... University Press. ISBN 0-691-02399-9.
|
item-361 at level 5: list_item: ^ Sanderson, Peter; Gilbert, Lau ... luding this bad-tempered talking duck.
|
||||||
item-365 at level 5: list_item: Rau, Charles (1876). Early Man i ... ork: Harper & Brothers. LCCN 05040168.
|
item-362 at level 5: list_item: ^ "The Duck". University of Oregon Athletics. Retrieved 2022-01-20.
|
||||||
item-366 at level 5: list_item: Shirihai, Hadoram (2008). A Comp ... versity Press. ISBN 978-0-691-13666-0.
|
item-363 at level 3: section_header: Sources
|
||||||
item-367 at level 5: list_item: Sued-Badillo, Jalil (2003). Auto ... Paris: UNESCO. ISBN 978-92-3-103832-7.
|
item-364 at level 4: list: group list
|
||||||
item-368 at level 5: list_item: Thorpe, I. J. (1996). The Origin ... rk: Routledge. ISBN 978-0-415-08009-5.
|
item-365 at level 5: list_item: American Ornithologists' Union ( ... (PDF) from the original on 2022-10-09.
|
||||||
item-369 at level 2: section_header: External links
|
item-366 at level 5: list_item: Carboneras, Carlos (1992). del H ... Lynx Edicions. ISBN 978-84-87334-10-8.
|
||||||
item-370 at level 3: list: group list
|
item-367 at level 5: list_item: Christidis, Les; Boles, Walter E ... ro Publishing. ISBN 978-0-643-06511-6.
|
||||||
item-371 at level 4: list_item: Definitions from Wiktionary
|
item-368 at level 5: list_item: Donne-Goussé, Carole; Laudet, Vi ... /S1055-7903(02)00019-2. PMID 12099792.
|
||||||
item-372 at level 4: list_item: Media from Commons
|
item-369 at level 5: list_item: Elphick, Chris; Dunning, John B. ... istopher Helm. ISBN 978-0-7136-6250-4.
|
||||||
item-373 at level 4: list_item: Quotations from Wikiquote
|
item-370 at level 5: list_item: Erlandson, Jon M. (1994). Early ... usiness Media. ISBN 978-1-4419-3231-0.
|
||||||
item-374 at level 4: list_item: Recipes from Wikibooks
|
item-371 at level 5: list_item: Fieldhouse, Paul (2002). Food, F ... ara: ABC-CLIO. ISBN 978-1-61069-412-4.
|
||||||
item-375 at level 4: list_item: Taxa from Wikispecies
|
item-372 at level 5: list_item: Fitter, Julian; Fitter, Daniel; ... versity Press. ISBN 978-0-691-10295-5.
|
||||||
item-376 at level 4: list_item: Data from Wikidata
|
item-373 at level 5: list_item: Higman, B. W. (2012). How Food M ... Wiley & Sons. ISBN 978-1-4051-8947-7.
|
||||||
item-377 at level 3: list: group list
|
item-374 at level 5: list_item: Hume, Julian H. (2012). Extinct ... istopher Helm. ISBN 978-1-4729-3744-5.
|
||||||
item-378 at level 4: list_item: list of books (useful looking abstracts)
|
item-375 at level 5: list_item: Jeffries, Richard (2008). Holoce ... Alabama Press. ISBN 978-0-8173-1658-7.
|
||||||
item-379 at level 4: list_item: Ducks on postage stamps Archived 2013-05-13 at the Wayback Machine
|
item-376 at level 5: list_item: Kear, Janet, ed. (2005). Ducks, ... versity Press. ISBN 978-0-19-861009-0.
|
||||||
item-380 at level 4: list_item: Ducks at a Distance, by Rob Hine ... uide to identification of US waterfowl
|
item-377 at level 5: list_item: Livezey, Bradley C. (October 198 ... (PDF) from the original on 2022-10-09.
|
||||||
item-381 at level 3: table with [3x2]
|
item-378 at level 5: list_item: Madsen, Cort S.; McHugh, Kevin P ... (PDF) from the original on 2022-10-09.
|
||||||
item-382 at level 3: picture
|
item-379 at level 5: list_item: Maisels, Charles Keith (1999). E ... on: Routledge. ISBN 978-0-415-10975-8.
|
||||||
item-383 at level 3: text: Retrieved from ""
|
item-380 at level 5: list_item: Pratt, H. Douglas; Bruner, Phill ... University Press. ISBN 0-691-02399-9.
|
||||||
item-384 at level 3: text: :
|
item-381 at level 5: list_item: Rau, Charles (1876). Early Man i ... ork: Harper & Brothers. LCCN 05040168.
|
||||||
item-385 at level 3: list: group list
|
item-382 at level 5: list_item: Shirihai, Hadoram (2008). A Comp ... versity Press. ISBN 978-0-691-13666-0.
|
||||||
item-386 at level 4: list_item: Ducks
|
item-383 at level 5: list_item: Sued-Badillo, Jalil (2003). Auto ... Paris: UNESCO. ISBN 978-92-3-103832-7.
|
||||||
item-387 at level 4: list_item: Game birds
|
item-384 at level 5: list_item: Thorpe, I. J. (1996). The Origin ... rk: Routledge. ISBN 978-0-415-08009-5.
|
||||||
item-388 at level 4: list_item: Bird common names
|
item-385 at level 2: section_header: External links
|
||||||
item-389 at level 3: text: Hidden categories:
|
item-386 at level 3: text: Duck at Wikipedia's sister projects
|
||||||
item-390 at level 3: list: group list
|
item-387 at level 3: list: group list
|
||||||
item-391 at level 4: list_item: All accuracy disputes
|
item-388 at level 4: list_item: Definitions from Wiktionary
|
||||||
item-392 at level 4: list_item: Accuracy disputes from February 2020
|
item-389 at level 4: picture
|
||||||
item-393 at level 4: list_item: CS1 Finnish-language sources (fi)
|
item-390 at level 4: list_item: Media from Commons
|
||||||
item-394 at level 4: list_item: CS1 Latvian-language sources (lv)
|
item-391 at level 4: picture
|
||||||
item-395 at level 4: list_item: CS1 Swedish-language sources (sv)
|
item-392 at level 4: list_item: Quotations from Wikiquote
|
||||||
item-396 at level 4: list_item: Articles with short description
|
item-393 at level 4: picture
|
||||||
item-397 at level 4: list_item: Short description is different from Wikidata
|
item-394 at level 4: list_item: Recipes from Wikibooks
|
||||||
item-398 at level 4: list_item: Wikipedia indefinitely move-protected pages
|
item-395 at level 4: picture
|
||||||
item-399 at level 4: list_item: Wikipedia indefinitely semi-protected pages
|
item-396 at level 4: list_item: Taxa from Wikispecies
|
||||||
item-400 at level 4: list_item: Articles with 'species' microformats
|
item-397 at level 4: picture
|
||||||
item-401 at level 4: list_item: Articles containing Old English (ca. 450-1100)-language text
|
item-398 at level 4: list_item: Data from Wikidata
|
||||||
item-402 at level 4: list_item: Articles containing Dutch-language text
|
item-399 at level 4: picture
|
||||||
item-403 at level 4: list_item: Articles containing German-language text
|
item-400 at level 3: list: group list
|
||||||
item-404 at level 4: list_item: Articles containing Norwegian-language text
|
item-401 at level 4: list_item: list of books (useful looking abstracts)
|
||||||
item-405 at level 4: list_item: Articles containing Lithuanian-language text
|
item-402 at level 4: list_item: Ducks on postage stamps Archived 2013-05-13 at the Wayback Machine
|
||||||
item-406 at level 4: list_item: Articles containing Ancient Greek (to 1453)-language text
|
item-403 at level 4: list_item: Ducks at a Distance, by Rob Hine ... uide to identification of US waterfowl
|
||||||
item-407 at level 4: list_item: All articles with self-published sources
|
item-404 at level 3: picture
|
||||||
item-408 at level 4: list_item: Articles with self-published sources from February 2020
|
item-405 at level 3: table with [3x2]
|
||||||
item-409 at level 4: list_item: All articles with unsourced statements
|
item-406 at level 3: text: Retrieved from "https://en.wikip ... index.php?title=Duck&oldid=1246843351"
|
||||||
item-410 at level 4: list_item: Articles with unsourced statements from January 2022
|
item-407 at level 3: text: Categories:
|
||||||
item-411 at level 4: list_item: CS1: long volume value
|
item-408 at level 3: list: group list
|
||||||
item-412 at level 4: list_item: Pages using Sister project links with wikidata mismatch
|
item-409 at level 4: list_item: Ducks
|
||||||
item-413 at level 4: list_item: Pages using Sister project links with hidden wikidata
|
item-410 at level 4: list_item: Game birds
|
||||||
item-414 at level 4: list_item: Webarchive template wayback links
|
item-411 at level 4: list_item: Bird common names
|
||||||
item-415 at level 4: list_item: Articles with Project Gutenberg links
|
item-412 at level 3: text: Hidden categories:
|
||||||
item-416 at level 4: list_item: Articles containing video clips
|
item-413 at level 3: list: group list
|
||||||
item-417 at level 3: list: group list
|
item-414 at level 4: list_item: All accuracy disputes
|
||||||
item-418 at level 4: list_item: This page was last edited on 21 September 2024, at 12:11 (UTC).
|
item-415 at level 4: list_item: Accuracy disputes from February 2020
|
||||||
item-419 at level 4: list_item: Text is available under the Crea ... tion, Inc., a non-profit organization.
|
item-416 at level 4: list_item: CS1 Finnish-language sources (fi)
|
||||||
item-420 at level 3: list: group list
|
item-417 at level 4: list_item: CS1 Latvian-language sources (lv)
|
||||||
item-421 at level 4: list_item: Privacy policy
|
item-418 at level 4: list_item: CS1 Swedish-language sources (sv)
|
||||||
item-422 at level 4: list_item: About Wikipedia
|
item-419 at level 4: list_item: Articles with short description
|
||||||
item-423 at level 4: list_item: Disclaimers
|
item-420 at level 4: list_item: Short description is different from Wikidata
|
||||||
item-424 at level 4: list_item: Contact Wikipedia
|
item-421 at level 4: list_item: Wikipedia indefinitely move-protected pages
|
||||||
item-425 at level 4: list_item: Code of Conduct
|
item-422 at level 4: list_item: Wikipedia indefinitely semi-protected pages
|
||||||
item-426 at level 4: list_item: Developers
|
item-423 at level 4: list_item: Articles with 'species' microformats
|
||||||
item-427 at level 4: list_item: Statistics
|
item-424 at level 4: list_item: Articles containing Old English (ca. 450-1100)-language text
|
||||||
item-428 at level 4: list_item: Cookie statement
|
item-425 at level 4: list_item: Articles containing Dutch-language text
|
||||||
item-429 at level 4: list_item: Mobile view
|
item-426 at level 4: list_item: Articles containing German-language text
|
||||||
item-430 at level 3: list: group list
|
item-427 at level 4: list_item: Articles containing Norwegian-language text
|
||||||
item-431 at level 3: list: group list
|
item-428 at level 4: list_item: Articles containing Lithuanian-language text
|
||||||
item-432 at level 1: caption: Pacific black duck displaying the characteristic upending "duck"
|
item-429 at level 4: list_item: Articles containing Ancient Greek (to 1453)-language text
|
||||||
item-433 at level 1: caption: Male mallard.
|
item-430 at level 4: list_item: All articles with self-published sources
|
||||||
item-434 at level 1: caption: Wood ducks.
|
item-431 at level 4: list_item: Articles with self-published sources from February 2020
|
||||||
item-435 at level 1: caption: Mallard landing in approach
|
item-432 at level 4: list_item: All articles with unsourced statements
|
||||||
item-436 at level 1: caption: Male Mandarin duck
|
item-433 at level 4: list_item: Articles with unsourced statements from January 2022
|
||||||
item-437 at level 1: caption: Flying steamer ducks in Ushuaia, Argentina
|
item-434 at level 4: list_item: CS1: long volume value
|
||||||
item-438 at level 1: caption: Female mallard in Cornwall, England
|
item-435 at level 4: list_item: Pages using Sister project links with wikidata mismatch
|
||||||
item-439 at level 1: caption: Pecten along the bill
|
item-436 at level 4: list_item: Pages using Sister project links with hidden wikidata
|
||||||
item-440 at level 1: caption: Mallard duckling preening
|
item-437 at level 4: list_item: Webarchive template wayback links
|
||||||
item-441 at level 1: caption: A Muscovy duckling
|
item-438 at level 4: list_item: Articles with Project Gutenberg links
|
||||||
item-442 at level 1: caption: Ringed teal
|
item-439 at level 4: list_item: Articles containing video clips
|
||||||
item-443 at level 1: caption: Indian Runner ducks, a common breed of domestic ducks
|
item-440 at level 3: list: group list
|
||||||
item-444 at level 1: caption: Three black-colored ducks in the coat of arms of Maaninka[49]
|
item-441 at level 4: list_item: This page was last edited on 21 September 2024, at 12:11 (UTC).
|
||||||
|
item-442 at level 4: list_item: Text is available under the Crea ... tion, Inc., a non-profit organization.
|
||||||
|
item-443 at level 3: list: group list
|
||||||
|
item-444 at level 4: list_item: Privacy policy
|
||||||
|
item-445 at level 4: list_item: About Wikipedia
|
||||||
|
item-446 at level 4: list_item: Disclaimers
|
||||||
|
item-447 at level 4: list_item: Contact Wikipedia
|
||||||
|
item-448 at level 4: list_item: Code of Conduct
|
||||||
|
item-449 at level 4: list_item: Developers
|
||||||
|
item-450 at level 4: list_item: Statistics
|
||||||
|
item-451 at level 4: list_item: Cookie statement
|
||||||
|
item-452 at level 4: list_item: Mobile view
|
||||||
|
item-453 at level 3: list: group list
|
||||||
|
item-454 at level 4: picture
|
||||||
|
item-454 at level 5: caption: Wikimedia Foundation
|
||||||
|
item-455 at level 4: picture
|
||||||
|
item-455 at level 5: caption: Powered by MediaWiki
|
||||||
|
item-456 at level 3: list: group list
|
||||||
|
item-457 at level 1: caption: Pacific black duck displaying the characteristic upending "duck"
|
||||||
|
item-458 at level 1: caption: Male mallard.
|
||||||
|
item-459 at level 1: caption: Wood ducks.
|
||||||
|
item-460 at level 1: caption: Mallard landing in approach
|
||||||
|
item-461 at level 1: caption: Male Mandarin duck
|
||||||
|
item-462 at level 1: caption: Flying steamer ducks in Ushuaia, Argentina
|
||||||
|
item-463 at level 1: caption: Female mallard in Cornwall, England
|
||||||
|
item-464 at level 1: caption: Pecten along the bill
|
||||||
|
item-465 at level 1: caption: A Muscovy duckling
|
||||||
|
item-466 at level 1: caption: Ringed teal
|
||||||
|
item-467 at level 1: caption: Indian Runner ducks, a common breed of domestic ducks
|
||||||
|
item-468 at level 1: caption: Three black-colored ducks in the coat of arms of Maaninka[49]
|
||||||
|
item-469 at level 1: caption: Wikimedia Foundation
|
||||||
|
item-470 at level 1: caption: Powered by MediaWiki
|
3143
tests/data/groundtruth/docling_v2/wiki_duck.html.json
vendored
3143
tests/data/groundtruth/docling_v2/wiki_duck.html.json
vendored
File diff suppressed because it is too large
Load Diff
183
tests/data/groundtruth/docling_v2/wiki_duck.html.md
vendored
183
tests/data/groundtruth/docling_v2/wiki_duck.html.md
vendored
@ -1,5 +1,9 @@
|
|||||||
## Contents
|
## Contents
|
||||||
|
|
||||||
|
move to sidebar
|
||||||
|
|
||||||
|
hide
|
||||||
|
|
||||||
- (Top)
|
- (Top)
|
||||||
- 1 Etymology
|
- 1 Etymology
|
||||||
- 2 Taxonomy
|
- 2 Taxonomy
|
||||||
@ -21,8 +25,12 @@
|
|||||||
- 8.2 Sources
|
- 8.2 Sources
|
||||||
- 9 External links
|
- 9 External links
|
||||||
|
|
||||||
|
Toggle the table of contents
|
||||||
|
|
||||||
# Duck
|
# Duck
|
||||||
|
|
||||||
|
136 languages
|
||||||
|
|
||||||
- Acèh
|
- Acèh
|
||||||
- Afrikaans
|
- Afrikaans
|
||||||
- Alemannisch
|
- Alemannisch
|
||||||
@ -160,15 +168,25 @@
|
|||||||
- Žemaitėška
|
- Žemaitėška
|
||||||
- 中文
|
- 中文
|
||||||
|
|
||||||
|
Edit links
|
||||||
|
|
||||||
- Article
|
- Article
|
||||||
- Talk
|
- Talk
|
||||||
|
|
||||||
|
English
|
||||||
|
|
||||||
- Read
|
- Read
|
||||||
- View source
|
- View source
|
||||||
- View history
|
- View history
|
||||||
|
|
||||||
Tools
|
Tools
|
||||||
|
|
||||||
|
Tools
|
||||||
|
|
||||||
|
move to sidebar
|
||||||
|
|
||||||
|
hide
|
||||||
|
|
||||||
Actions
|
Actions
|
||||||
|
|
||||||
- Read
|
- Read
|
||||||
@ -200,15 +218,23 @@ In other projects
|
|||||||
|
|
||||||
Appearance
|
Appearance
|
||||||
|
|
||||||
<!-- image -->
|
move to sidebar
|
||||||
|
|
||||||
|
hide
|
||||||
|
|
||||||
From Wikipedia, the free encyclopedia
|
From Wikipedia, the free encyclopedia
|
||||||
|
|
||||||
|
(Redirected from Duckling)
|
||||||
|
|
||||||
Common name for many species of bird
|
Common name for many species of bird
|
||||||
|
|
||||||
This article is about the bird. For duck as a food, see . For other uses, see .
|
This article is about the bird. For duck as a food, see Duck as food. For other uses, see Duck (disambiguation).
|
||||||
|
|
||||||
"Duckling" redirects here. For other uses, see .
|
"Duckling" redirects here. For other uses, see Duckling (disambiguation).
|
||||||
|
|
||||||
|
<!-- image -->
|
||||||
|
|
||||||
|
<!-- image -->
|
||||||
|
|
||||||
| Duck | Duck |
|
| Duck | Duck |
|
||||||
|--------------------------------|--------------------------------|
|
|--------------------------------|--------------------------------|
|
||||||
@ -275,6 +301,8 @@ The drakes of northern species often have extravagant plumage, but that is moult
|
|||||||
|
|
||||||
## Distribution and habitat
|
## Distribution and habitat
|
||||||
|
|
||||||
|
See also: List of Anseriformes by population
|
||||||
|
|
||||||
Flying steamer ducks in Ushuaia, Argentina
|
Flying steamer ducks in Ushuaia, Argentina
|
||||||
|
|
||||||
<!-- image -->
|
<!-- image -->
|
||||||
@ -295,10 +323,6 @@ Pecten along the bill
|
|||||||
|
|
||||||
<!-- image -->
|
<!-- image -->
|
||||||
|
|
||||||
Mallard duckling preening
|
|
||||||
|
|
||||||
<!-- image -->
|
|
||||||
|
|
||||||
Ducks eat food sources such as grasses, aquatic plants, fish, insects, small amphibians, worms, and small molluscs.
|
Ducks eat food sources such as grasses, aquatic plants, fish, insects, small amphibians, worms, and small molluscs.
|
||||||
|
|
||||||
Dabbling ducks feed on the surface of water or on land, or as deep as they can reach by up-ending without completely submerging.[24] Along the edge of the bill, there is a comb-like structure called a pecten. This strains the water squirting from the side of the bill and traps any food. The pecten is also used to preen feathers and to hold slippery food items.
|
Dabbling ducks feed on the surface of water or on land, or as deep as they can reach by up-ending without completely submerging.[24] Along the edge of the bill, there is a comb-like structure called a pecten. This strains the water squirting from the side of the bill and traps any food. The pecten is also used to preen feathers and to hold slippery food items.
|
||||||
@ -339,12 +363,16 @@ Adult ducks are fast fliers, but may be caught on the water by large aquatic pre
|
|||||||
|
|
||||||
### Hunting
|
### Hunting
|
||||||
|
|
||||||
|
Main article: Waterfowl hunting
|
||||||
|
|
||||||
Humans have hunted ducks since prehistoric times. Excavations of middens in California dating to 7800 – 6400 BP have turned up bones of ducks, including at least one now-extinct flightless species.[33] Ducks were captured in "significant numbers" by Holocene inhabitants of the lower Ohio River valley, suggesting they took advantage of the seasonal bounty provided by migrating waterfowl.[34] Neolithic hunters in locations as far apart as the Caribbean,[35] Scandinavia,[36] Egypt,[37] Switzerland,[38] and China relied on ducks as a source of protein for some or all of the year.[39] Archeological evidence shows that Māori people in New Zealand hunted the flightless Finsch's duck, possibly to extinction, though rat predation may also have contributed to its fate.[40] A similar end awaited the Chatham duck, a species with reduced flying capabilities which went extinct shortly after its island was colonised by Polynesian settlers.[41] It is probable that duck eggs were gathered by Neolithic hunter-gathers as well, though hard evidence of this is uncommon.[35][42]
|
Humans have hunted ducks since prehistoric times. Excavations of middens in California dating to 7800 – 6400 BP have turned up bones of ducks, including at least one now-extinct flightless species.[33] Ducks were captured in "significant numbers" by Holocene inhabitants of the lower Ohio River valley, suggesting they took advantage of the seasonal bounty provided by migrating waterfowl.[34] Neolithic hunters in locations as far apart as the Caribbean,[35] Scandinavia,[36] Egypt,[37] Switzerland,[38] and China relied on ducks as a source of protein for some or all of the year.[39] Archeological evidence shows that Māori people in New Zealand hunted the flightless Finsch's duck, possibly to extinction, though rat predation may also have contributed to its fate.[40] A similar end awaited the Chatham duck, a species with reduced flying capabilities which went extinct shortly after its island was colonised by Polynesian settlers.[41] It is probable that duck eggs were gathered by Neolithic hunter-gathers as well, though hard evidence of this is uncommon.[35][42]
|
||||||
|
|
||||||
In many areas, wild ducks (including ducks farmed and released into the wild) are hunted for food or sport,[43] by shooting, or by being trapped using duck decoys. Because an idle floating duck or a duck squatting on land cannot react to fly or move quickly, "a sitting duck" has come to mean "an easy target". These ducks may be contaminated by pollutants such as PCBs.[44]
|
In many areas, wild ducks (including ducks farmed and released into the wild) are hunted for food or sport,[43] by shooting, or by being trapped using duck decoys. Because an idle floating duck or a duck squatting on land cannot react to fly or move quickly, "a sitting duck" has come to mean "an easy target". These ducks may be contaminated by pollutants such as PCBs.[44]
|
||||||
|
|
||||||
### Domestication
|
### Domestication
|
||||||
|
|
||||||
|
Main article: Domestic duck
|
||||||
|
|
||||||
Indian Runner ducks, a common breed of domestic ducks
|
Indian Runner ducks, a common breed of domestic ducks
|
||||||
|
|
||||||
<!-- image -->
|
<!-- image -->
|
||||||
@ -368,6 +396,7 @@ The 1992 Disney film The Mighty Ducks, starring Emilio Estevez, chose the duck a
|
|||||||
## See also
|
## See also
|
||||||
|
|
||||||
- Birds portal
|
- Birds portal
|
||||||
|
<!-- image -->
|
||||||
|
|
||||||
- Domestic duck
|
- Domestic duck
|
||||||
- Duck as food
|
- Duck as food
|
||||||
@ -382,106 +411,114 @@ The 1992 Disney film The Mighty Ducks, starring Emilio Estevez, chose the duck a
|
|||||||
|
|
||||||
1. ^ "Duckling". The American Heritage Dictionary of the English Language, Fourth Edition. Houghton Mifflin Company. 2006. Retrieved 2015-05-22.
|
1. ^ "Duckling". The American Heritage Dictionary of the English Language, Fourth Edition. Houghton Mifflin Company. 2006. Retrieved 2015-05-22.
|
||||||
2. ^ "Duckling". Kernerman English Multilingual Dictionary (Beta Version). K. Dictionaries Ltd. 2000–2006. Retrieved 2015-05-22.
|
2. ^ "Duckling". Kernerman English Multilingual Dictionary (Beta Version). K. Dictionaries Ltd. 2000–2006. Retrieved 2015-05-22.
|
||||||
3. ^ Dohner, Janet Vorwald (2001). The Encyclopedia of Historic and Endangered Livestock and Poultry Breeds. Yale University Press. ISBN 978-0300138139.
|
3. ^ Dohner, Janet Vorwald (2001). The Encyclopedia of Historic and Endangered Livestock and Poultry Breeds. Yale University Press. ISBN 978-0300138139.
|
||||||
4. ^ Visca, Curt; Visca, Kelley (2003). How to Draw Cartoon Birds. The Rosen Publishing Group. ISBN 9780823961566.
|
4. ^ Visca, Curt; Visca, Kelley (2003). How to Draw Cartoon Birds. The Rosen Publishing Group. ISBN 9780823961566.
|
||||||
5. ^ a b c d Carboneras 1992, p. 536.
|
5. ^ a b c d Carboneras 1992, p. 536.
|
||||||
6. ^ Livezey 1986, pp. 737–738.
|
6. ^ Livezey 1986, pp. 737–738.
|
||||||
7. ^ Madsen, McHugh & de Kloet 1988, p. 452.
|
7. ^ Madsen, McHugh & de Kloet 1988, p. 452.
|
||||||
8. ^ Donne-Goussé, Laudet & Hänni 2002, pp. 353–354.
|
8. ^ Donne-Goussé, Laudet & Hänni 2002, pp. 353–354.
|
||||||
9. ^ a b c d e f Carboneras 1992, p. 540.
|
9. ^ a b c d e f Carboneras 1992, p. 540.
|
||||||
10. ^ Elphick, Dunning & Sibley 2001, p. 191.
|
10. ^ Elphick, Dunning & Sibley 2001, p. 191.
|
||||||
11. ^ Kear 2005, p. 448.
|
11. ^ Kear 2005, p. 448.
|
||||||
12. ^ Kear 2005, p. 622–623.
|
12. ^ Kear 2005, p. 622–623.
|
||||||
13. ^ Kear 2005, p. 686.
|
13. ^ Kear 2005, p. 686.
|
||||||
14. ^ Elphick, Dunning & Sibley 2001, p. 193.
|
14. ^ Elphick, Dunning & Sibley 2001, p. 193.
|
||||||
15. ^ a b c d e f g Carboneras 1992, p. 537.
|
15. ^ a b c d e f g Carboneras 1992, p. 537.
|
||||||
16. ^ American Ornithologists' Union 1998, p. xix.
|
16. ^ American Ornithologists' Union 1998, p. xix.
|
||||||
17. ^ American Ornithologists' Union 1998.
|
17. ^ American Ornithologists' Union 1998.
|
||||||
18. ^ Carboneras 1992, p. 538.
|
18. ^ Carboneras 1992, p. 538.
|
||||||
19. ^ Christidis & Boles 2008, p. 62.
|
19. ^ Christidis & Boles 2008, p. 62.
|
||||||
20. ^ Shirihai 2008, pp. 239, 245.
|
20. ^ Shirihai 2008, pp. 239, 245.
|
||||||
21. ^ a b Pratt, Bruner & Berrett 1987, pp. 98–107.
|
21. ^ a b Pratt, Bruner & Berrett 1987, pp. 98–107.
|
||||||
22. ^ Fitter, Fitter & Hosking 2000, pp. 52–3.
|
22. ^ Fitter, Fitter & Hosking 2000, pp. 52–3.
|
||||||
23. ^ "Pacific Black Duck". www.wiresnr.org. Retrieved 2018-04-27.
|
23. ^ "Pacific Black Duck". www.wiresnr.org. Retrieved 2018-04-27.
|
||||||
24. ^ Ogden, Evans. "Dabbling Ducks". CWE. Retrieved 2006-11-02.
|
24. ^ Ogden, Evans. "Dabbling Ducks". CWE. Retrieved 2006-11-02.
|
||||||
25. ^ Karl Mathiesen (16 March 2015). "Don't feed the ducks bread, say conservationists". The Guardian. Retrieved 13 November 2016.
|
25. ^ Karl Mathiesen (16 March 2015). "Don't feed the ducks bread, say conservationists". The Guardian. Retrieved 13 November 2016.
|
||||||
26. ^ Rohwer, Frank C.; Anderson, Michael G. (1988). "Female-Biased Philopatry, Monogamy, and the Timing of Pair Formation in Migratory Waterfowl". Current Ornithology. pp. 187–221. doi:10.1007/978-1-4615-6787-5\_4. ISBN 978-1-4615-6789-9.
|
26. ^ Rohwer, Frank C.; Anderson, Michael G. (1988). "Female-Biased Philopatry, Monogamy, and the Timing of Pair Formation in Migratory Waterfowl". Current Ornithology. pp. 187–221. doi:10.1007/978-1-4615-6787-5\_4. ISBN 978-1-4615-6789-9.
|
||||||
27. ^ Smith, Cyndi M.; Cooke, Fred; Robertson, Gregory J.; Goudie, R. Ian; Boyd, W. Sean (2000). "Long-Term Pair Bonds in Harlequin Ducks". The Condor. 102 (1): 201–205. doi:10.1093/condor/102.1.201. hdl:10315/13797.
|
27. ^ Smith, Cyndi M.; Cooke, Fred; Robertson, Gregory J.; Goudie, R. Ian; Boyd, W. Sean (2000). "Long-Term Pair Bonds in Harlequin Ducks". The Condor. 102 (1): 201–205. doi:10.1093/condor/102.1.201. hdl:10315/13797.
|
||||||
28. ^ "If You Find An Orphaned Duckling - Wildlife Rehabber". wildliferehabber.com. Archived from the original on 2018-09-23. Retrieved 2018-12-22.
|
28. ^ "If You Find An Orphaned Duckling - Wildlife Rehabber". wildliferehabber.com. Archived from the original on 2018-09-23. Retrieved 2018-12-22.
|
||||||
29. ^ Carver, Heather (2011). The Duck Bible. Lulu.com. ISBN 9780557901562.[self-published source]
|
29. ^ Carver, Heather (2011). The Duck Bible. Lulu.com. ISBN 9780557901562.[self-published source]
|
||||||
30. ^ Titlow, Budd (2013-09-03). Bird Brains: Inside the Strange Minds of Our Fine Feathered Friends. Rowman & Littlefield. ISBN 9780762797707.
|
30. ^ Titlow, Budd (2013-09-03). Bird Brains: Inside the Strange Minds of Our Fine Feathered Friends. Rowman & Littlefield. ISBN 9780762797707.
|
||||||
31. ^ Amos, Jonathan (2003-09-08). "Sound science is quackers". BBC News. Retrieved 2006-11-02.
|
31. ^ Amos, Jonathan (2003-09-08). "Sound science is quackers". BBC News. Retrieved 2006-11-02.
|
||||||
32. ^ "Mythbusters Episode 8". 12 December 2003.
|
32. ^ "Mythbusters Episode 8". 12 December 2003.
|
||||||
33. ^ Erlandson 1994, p. 171.
|
33. ^ Erlandson 1994, p. 171.
|
||||||
34. ^ Jeffries 2008, pp. 168, 243.
|
34. ^ Jeffries 2008, pp. 168, 243.
|
||||||
35. ^ a b Sued-Badillo 2003, p. 65.
|
35. ^ a b Sued-Badillo 2003, p. 65.
|
||||||
36. ^ Thorpe 1996, p. 68.
|
36. ^ Thorpe 1996, p. 68.
|
||||||
37. ^ Maisels 1999, p. 42.
|
37. ^ Maisels 1999, p. 42.
|
||||||
38. ^ Rau 1876, p. 133.
|
38. ^ Rau 1876, p. 133.
|
||||||
39. ^ Higman 2012, p. 23.
|
39. ^ Higman 2012, p. 23.
|
||||||
40. ^ Hume 2012, p. 53.
|
40. ^ Hume 2012, p. 53.
|
||||||
41. ^ Hume 2012, p. 52.
|
41. ^ Hume 2012, p. 52.
|
||||||
42. ^ Fieldhouse 2002, p. 167.
|
42. ^ Fieldhouse 2002, p. 167.
|
||||||
43. ^ Livingston, A. D. (1998-01-01). Guide to Edible Plants and Animals. Wordsworth Editions, Limited. ISBN 9781853263774.
|
43. ^ Livingston, A. D. (1998-01-01). Guide to Edible Plants and Animals. Wordsworth Editions, Limited. ISBN 9781853263774.
|
||||||
44. ^ "Study plan for waterfowl injury assessment: Determining PCB concentrations in Hudson river resident waterfowl" (PDF). New York State Department of Environmental Conservation. US Department of Commerce. December 2008. p. 3. Archived (PDF) from the original on 2022-10-09. Retrieved 2 July 2019.
|
44. ^ "Study plan for waterfowl injury assessment: Determining PCB concentrations in Hudson river resident waterfowl" (PDF). New York State Department of Environmental Conservation. US Department of Commerce. December 2008. p. 3. Archived (PDF) from the original on 2022-10-09. Retrieved 2 July 2019.
|
||||||
45. ^ "FAOSTAT". www.fao.org. Retrieved 2019-10-25.
|
45. ^ "FAOSTAT". www.fao.org. Retrieved 2019-10-25.
|
||||||
46. ^ "Anas platyrhynchos, Domestic Duck; DigiMorph Staff - The University of Texas at Austin". Digimorph.org. Retrieved 2012-12-23.
|
46. ^ "Anas platyrhynchos, Domestic Duck; DigiMorph Staff - The University of Texas at Austin". Digimorph.org. Retrieved 2012-12-23.
|
||||||
47. ^ Sy Montgomery. "Mallard; Encyclopædia Britannica". Britannica.com. Retrieved 2012-12-23.
|
47. ^ Sy Montgomery. "Mallard; Encyclopædia Britannica". Britannica.com. Retrieved 2012-12-23.
|
||||||
48. ^ Glenday, Craig (2014). Guinness World Records. Guinness World Records Limited. pp. 135. ISBN 978-1-908843-15-9.
|
48. ^ Glenday, Craig (2014). Guinness World Records. Guinness World Records Limited. pp. 135. ISBN 978-1-908843-15-9.
|
||||||
49. ^ Suomen kunnallisvaakunat (in Finnish). Suomen Kunnallisliitto. 1982. p. 147. ISBN 951-773-085-3.
|
49. ^ Suomen kunnallisvaakunat (in Finnish). Suomen Kunnallisliitto. 1982. p. 147. ISBN 951-773-085-3.
|
||||||
50. ^ "Lubānas simbolika" (in Latvian). Retrieved September 9, 2021.
|
50. ^ "Lubānas simbolika" (in Latvian). Retrieved September 9, 2021.
|
||||||
51. ^ "Föglö" (in Swedish). Retrieved September 9, 2021.
|
51. ^ "Föglö" (in Swedish). Retrieved September 9, 2021.
|
||||||
52. ^ Young, Emma. "World's funniest joke revealed". New Scientist. Retrieved 7 January 2019.
|
52. ^ Young, Emma. "World's funniest joke revealed". New Scientist. Retrieved 7 January 2019.
|
||||||
53. ^ "Howard the Duck (character)". Grand Comics Database.
|
53. ^ "Howard the Duck (character)". Grand Comics Database.
|
||||||
54. ^ Sanderson, Peter; Gilbert, Laura (2008). "1970s". Marvel Chronicle A Year by Year History. London, United Kingdom: Dorling Kindersley. p. 161. ISBN 978-0756641238. December saw the debut of the cigar-smoking Howard the Duck. In this story by writer Steve Gerber and artist Val Mayerik, various beings from different realities had begun turning up in the Man-Thing's Florida swamp, including this bad-tempered talking duck.
|
54. ^ Sanderson, Peter; Gilbert, Laura (2008). "1970s". Marvel Chronicle A Year by Year History. London, United Kingdom: Dorling Kindersley. p. 161. ISBN 978-0756641238. December saw the debut of the cigar-smoking Howard the Duck. In this story by writer Steve Gerber and artist Val Mayerik, various beings from different realities had begun turning up in the Man-Thing's Florida swamp, including this bad-tempered talking duck.
|
||||||
55. ^ "The Duck". University of Oregon Athletics. Retrieved 2022-01-20.
|
55. ^ "The Duck". University of Oregon Athletics. Retrieved 2022-01-20.
|
||||||
|
|
||||||
### Sources
|
### Sources
|
||||||
|
|
||||||
- American Ornithologists' Union (1998). Checklist of North American Birds (PDF). Washington, DC: American Ornithologists' Union. ISBN 978-1-891276-00-2. Archived (PDF) from the original on 2022-10-09.
|
- American Ornithologists' Union (1998). Checklist of North American Birds (PDF). Washington, DC: American Ornithologists' Union. ISBN 978-1-891276-00-2. Archived (PDF) from the original on 2022-10-09.
|
||||||
- Carboneras, Carlos (1992). del Hoyo, Josep; Elliott, Andrew; Sargatal, Jordi (eds.). Handbook of the Birds of the World. Vol. 1: Ostrich to Ducks. Barcelona: Lynx Edicions. ISBN 978-84-87334-10-8.
|
- Carboneras, Carlos (1992). del Hoyo, Josep; Elliott, Andrew; Sargatal, Jordi (eds.). Handbook of the Birds of the World. Vol. 1: Ostrich to Ducks. Barcelona: Lynx Edicions. ISBN 978-84-87334-10-8.
|
||||||
- Christidis, Les; Boles, Walter E., eds. (2008). Systematics and Taxonomy of Australian Birds. Collingwood, VIC: Csiro Publishing. ISBN 978-0-643-06511-6.
|
- Christidis, Les; Boles, Walter E., eds. (2008). Systematics and Taxonomy of Australian Birds. Collingwood, VIC: Csiro Publishing. ISBN 978-0-643-06511-6.
|
||||||
- Donne-Goussé, Carole; Laudet, Vincent; Hänni, Catherine (July 2002). "A molecular phylogeny of Anseriformes based on mitochondrial DNA analysis". Molecular Phylogenetics and Evolution. 23 (3): 339–356. Bibcode:2002MolPE..23..339D. doi:10.1016/S1055-7903(02)00019-2. PMID 12099792.
|
- Donne-Goussé, Carole; Laudet, Vincent; Hänni, Catherine (July 2002). "A molecular phylogeny of Anseriformes based on mitochondrial DNA analysis". Molecular Phylogenetics and Evolution. 23 (3): 339–356. Bibcode:2002MolPE..23..339D. doi:10.1016/S1055-7903(02)00019-2. PMID 12099792.
|
||||||
- Elphick, Chris; Dunning, John B. Jr.; Sibley, David, eds. (2001). The Sibley Guide to Bird Life and Behaviour. London: Christopher Helm. ISBN 978-0-7136-6250-4.
|
- Elphick, Chris; Dunning, John B. Jr.; Sibley, David, eds. (2001). The Sibley Guide to Bird Life and Behaviour. London: Christopher Helm. ISBN 978-0-7136-6250-4.
|
||||||
- Erlandson, Jon M. (1994). Early Hunter-Gatherers of the California Coast. New York, NY: Springer Science & Business Media. ISBN 978-1-4419-3231-0.
|
- Erlandson, Jon M. (1994). Early Hunter-Gatherers of the California Coast. New York, NY: Springer Science & Business Media. ISBN 978-1-4419-3231-0.
|
||||||
- Fieldhouse, Paul (2002). Food, Feasts, and Faith: An Encyclopedia of Food Culture in World Religions. Vol. I: A–K. Santa Barbara: ABC-CLIO. ISBN 978-1-61069-412-4.
|
- Fieldhouse, Paul (2002). Food, Feasts, and Faith: An Encyclopedia of Food Culture in World Religions. Vol. I: A–K. Santa Barbara: ABC-CLIO. ISBN 978-1-61069-412-4.
|
||||||
- Fitter, Julian; Fitter, Daniel; Hosking, David (2000). Wildlife of the Galápagos. Princeton, NJ: Princeton University Press. ISBN 978-0-691-10295-5.
|
- Fitter, Julian; Fitter, Daniel; Hosking, David (2000). Wildlife of the Galápagos. Princeton, NJ: Princeton University Press. ISBN 978-0-691-10295-5.
|
||||||
- Higman, B. W. (2012). How Food Made History. Chichester, UK: John Wiley & Sons. ISBN 978-1-4051-8947-7.
|
- Higman, B. W. (2012). How Food Made History. Chichester, UK: John Wiley & Sons. ISBN 978-1-4051-8947-7.
|
||||||
- Hume, Julian H. (2012). Extinct Birds. London: Christopher Helm. ISBN 978-1-4729-3744-5.
|
- Hume, Julian H. (2012). Extinct Birds. London: Christopher Helm. ISBN 978-1-4729-3744-5.
|
||||||
- Jeffries, Richard (2008). Holocene Hunter-Gatherers of the Lower Ohio River Valley. Tuscaloosa: University of Alabama Press. ISBN 978-0-8173-1658-7.
|
- Jeffries, Richard (2008). Holocene Hunter-Gatherers of the Lower Ohio River Valley. Tuscaloosa: University of Alabama Press. ISBN 978-0-8173-1658-7.
|
||||||
- Kear, Janet, ed. (2005). Ducks, Geese and Swans: Species Accounts (Cairina to Mergus). Bird Families of the World. Oxford: Oxford University Press. ISBN 978-0-19-861009-0.
|
- Kear, Janet, ed. (2005). Ducks, Geese and Swans: Species Accounts (Cairina to Mergus). Bird Families of the World. Oxford: Oxford University Press. ISBN 978-0-19-861009-0.
|
||||||
- Livezey, Bradley C. (October 1986). "A phylogenetic analysis of recent Anseriform genera using morphological characters" (PDF). The Auk. 103 (4): 737–754. doi:10.1093/auk/103.4.737. Archived (PDF) from the original on 2022-10-09.
|
- Livezey, Bradley C. (October 1986). "A phylogenetic analysis of recent Anseriform genera using morphological characters" (PDF). The Auk. 103 (4): 737–754. doi:10.1093/auk/103.4.737. Archived (PDF) from the original on 2022-10-09.
|
||||||
- Madsen, Cort S.; McHugh, Kevin P.; de Kloet, Siwo R. (July 1988). "A partial classification of waterfowl (Anatidae) based on single-copy DNA" (PDF). The Auk. 105 (3): 452–459. doi:10.1093/auk/105.3.452. Archived (PDF) from the original on 2022-10-09.
|
- Madsen, Cort S.; McHugh, Kevin P.; de Kloet, Siwo R. (July 1988). "A partial classification of waterfowl (Anatidae) based on single-copy DNA" (PDF). The Auk. 105 (3): 452–459. doi:10.1093/auk/105.3.452. Archived (PDF) from the original on 2022-10-09.
|
||||||
- Maisels, Charles Keith (1999). Early Civilizations of the Old World. London: Routledge. ISBN 978-0-415-10975-8.
|
- Maisels, Charles Keith (1999). Early Civilizations of the Old World. London: Routledge. ISBN 978-0-415-10975-8.
|
||||||
- Pratt, H. Douglas; Bruner, Phillip L.; Berrett, Delwyn G. (1987). A Field Guide to the Birds of Hawaii and the Tropical Pacific. Princeton, NJ: Princeton University Press. ISBN 0-691-02399-9.
|
- Pratt, H. Douglas; Bruner, Phillip L.; Berrett, Delwyn G. (1987). A Field Guide to the Birds of Hawaii and the Tropical Pacific. Princeton, NJ: Princeton University Press. ISBN 0-691-02399-9.
|
||||||
- Rau, Charles (1876). Early Man in Europe. New York: Harper & Brothers. LCCN 05040168.
|
- Rau, Charles (1876). Early Man in Europe. New York: Harper & Brothers. LCCN 05040168.
|
||||||
- Shirihai, Hadoram (2008). A Complete Guide to Antarctic Wildlife. Princeton, NJ, US: Princeton University Press. ISBN 978-0-691-13666-0.
|
- Shirihai, Hadoram (2008). A Complete Guide to Antarctic Wildlife. Princeton, NJ, US: Princeton University Press. ISBN 978-0-691-13666-0.
|
||||||
- Sued-Badillo, Jalil (2003). Autochthonous Societies. General History of the Caribbean. Paris: UNESCO. ISBN 978-92-3-103832-7.
|
- Sued-Badillo, Jalil (2003). Autochthonous Societies. General History of the Caribbean. Paris: UNESCO. ISBN 978-92-3-103832-7.
|
||||||
- Thorpe, I. J. (1996). The Origins of Agriculture in Europe. New York: Routledge. ISBN 978-0-415-08009-5.
|
- Thorpe, I. J. (1996). The Origins of Agriculture in Europe. New York: Routledge. ISBN 978-0-415-08009-5.
|
||||||
|
|
||||||
## External links
|
## External links
|
||||||
|
|
||||||
|
Duck at Wikipedia's sister projects
|
||||||
|
|
||||||
- Definitions from Wiktionary
|
- Definitions from Wiktionary
|
||||||
|
<!-- image -->
|
||||||
- Media from Commons
|
- Media from Commons
|
||||||
|
<!-- image -->
|
||||||
- Quotations from Wikiquote
|
- Quotations from Wikiquote
|
||||||
|
<!-- image -->
|
||||||
- Recipes from Wikibooks
|
- Recipes from Wikibooks
|
||||||
|
<!-- image -->
|
||||||
- Taxa from Wikispecies
|
- Taxa from Wikispecies
|
||||||
|
<!-- image -->
|
||||||
- Data from Wikidata
|
- Data from Wikidata
|
||||||
|
<!-- image -->
|
||||||
|
|
||||||
- list of books (useful looking abstracts)
|
- list of books (useful looking abstracts)
|
||||||
- Ducks on postage stamps Archived 2013-05-13 at the Wayback Machine
|
- Ducks on postage stamps Archived 2013-05-13 at the Wayback Machine
|
||||||
- Ducks at a Distance, by Rob Hines at Project Gutenberg - A modern illustrated guide to identification of US waterfowl
|
- Ducks at a Distance, by Rob Hines at Project Gutenberg - A modern illustrated guide to identification of US waterfowl
|
||||||
|
|
||||||
|
<!-- image -->
|
||||||
|
|
||||||
| Authority control databases | Authority control databases |
|
| Authority control databases | Authority control databases |
|
||||||
|--------------------------------|----------------------------------------------|
|
|--------------------------------|----------------------------------------------|
|
||||||
| National | United StatesFranceBnF dataJapanLatviaIsrael |
|
| National | United StatesFranceBnF dataJapanLatviaIsrael |
|
||||||
| Other | IdRef |
|
| Other | IdRef |
|
||||||
|
|
||||||
<!-- image -->
|
Retrieved from "https://en.wikipedia.org/w/index.php?title=Duck&oldid=1246843351"
|
||||||
|
|
||||||
Retrieved from ""
|
Categories:
|
||||||
|
|
||||||
:
|
|
||||||
|
|
||||||
- Ducks
|
- Ducks
|
||||||
- Game birds
|
- Game birds
|
||||||
@ -516,9 +553,8 @@ Hidden categories:
|
|||||||
- Articles with Project Gutenberg links
|
- Articles with Project Gutenberg links
|
||||||
- Articles containing video clips
|
- Articles containing video clips
|
||||||
|
|
||||||
- This page was last edited on 21 September 2024, at 12:11 (UTC).
|
- This page was last edited on 21 September 2024, at 12:11 (UTC).
|
||||||
- Text is available under the Creative Commons Attribution-ShareAlike License 4.0;
|
- Text is available under the Creative Commons Attribution-ShareAlike License 4.0; additional terms may apply. By using this site, you agree to the Terms of Use and Privacy Policy. Wikipedia® is a registered trademark of the Wikimedia Foundation, Inc., a non-profit organization.
|
||||||
additional terms may apply. By using this site, you agree to the Terms of Use and Privacy Policy. Wikipedia® is a registered trademark of the Wikimedia Foundation, Inc., a non-profit organization.
|
|
||||||
|
|
||||||
- Privacy policy
|
- Privacy policy
|
||||||
- About Wikipedia
|
- About Wikipedia
|
||||||
@ -528,4 +564,11 @@ additional terms may apply. By using this site, you agree to the Terms of Use an
|
|||||||
- Developers
|
- Developers
|
||||||
- Statistics
|
- Statistics
|
||||||
- Cookie statement
|
- Cookie statement
|
||||||
- Mobile view
|
- Mobile view
|
||||||
|
|
||||||
|
Wikimedia Foundation
|
||||||
|
|
||||||
|
<!-- image -->
|
||||||
|
Powered by MediaWiki
|
||||||
|
|
||||||
|
<!-- image -->
|
21
tests/data/html/example_09.html
vendored
Normal file
21
tests/data/html/example_09.html
vendored
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
<h1>Introduction to parsing HTML files with <img src="https://docling-project.github.io/docling/assets/logo.png" alt="Docling" height="64"> Docling</h1>
|
||||||
|
<p>Docling simplifies document processing, parsing diverse formats — including HTML — and providing seamless integrations with the gen AI ecosystem.</p>
|
||||||
|
<h2>Supported file formats</h2>
|
||||||
|
<p>Docling supports multiple file formats..</p>
|
||||||
|
<ul>
|
||||||
|
<li><img src="https://github.com/docling-project/docling/tree/main/docs/assets/pdf.png" height="32" alt="PDF">Advanced PDF understanding</li>
|
||||||
|
<li><img src="https://github.com/docling-project/docling/tree/main/docs/assets/docx.png" height="32" alt="DOCX">Microsoft Office DOCX</li>
|
||||||
|
<li><img src="https://github.com/docling-project/docling/tree/main/docs/assets/html.png" height="32" alt="HTML">HTML files (with optional support for images)</li>
|
||||||
|
</ul>
|
||||||
|
<h3>Three backends for handling HTML files</h3>
|
||||||
|
<p>Docling has three backends for parsing HTML files:</p>
|
||||||
|
<ol>
|
||||||
|
<li><b>HTMLDocumentBackend</b> Ignores images</li>
|
||||||
|
<li><b>HTMLDocumentBackendImagesInline</b> Extracts images inline</li>
|
||||||
|
<li><b>HTMLDocumentBackendImagesReferenced</b> Extracts images as references</li>
|
||||||
|
</ol>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
|
@ -1,8 +1,6 @@
|
|||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from docling.backend.html_backend import HTMLDocumentBackend
|
from docling.backend.html_backend import HTMLDocumentBackend
|
||||||
from docling.datamodel.base_models import InputFormat
|
from docling.datamodel.base_models import InputFormat
|
||||||
from docling.datamodel.document import (
|
from docling.datamodel.document import (
|
||||||
@ -37,17 +35,15 @@ def test_heading_levels():
|
|||||||
if isinstance(item, SectionHeaderItem):
|
if isinstance(item, SectionHeaderItem):
|
||||||
if item.text == "Etymology":
|
if item.text == "Etymology":
|
||||||
found_lvl_1 = True
|
found_lvl_1 = True
|
||||||
|
# h2 becomes level 1 because of h1 as title
|
||||||
assert item.level == 1
|
assert item.level == 1
|
||||||
elif item.text == "Feeding":
|
elif item.text == "Feeding":
|
||||||
found_lvl_2 = True
|
found_lvl_2 = True
|
||||||
|
# h3 becomes level 2 because of h1 as title
|
||||||
assert item.level == 2
|
assert item.level == 2
|
||||||
assert found_lvl_1 and found_lvl_2
|
assert found_lvl_1 and found_lvl_2
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(
|
|
||||||
"Temporarily disabled since docling-core>=2.21.0 does not support ordered lists "
|
|
||||||
"with custom start value"
|
|
||||||
)
|
|
||||||
def test_ordered_lists():
|
def test_ordered_lists():
|
||||||
test_set: list[tuple[bytes, str]] = []
|
test_set: list[tuple[bytes, str]] = []
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user