mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-23 18:45:00 +00:00
Merge branch 'main' of github.com:DS4SD/docling into copilot/fix-1903
This commit is contained in:
commit
d571f36299
28
CHANGELOG.md
28
CHANGELOG.md
@ -1,3 +1,31 @@
|
||||
## [v2.42.1](https://github.com/docling-project/docling/releases/tag/v2.42.1) - 2025-07-22
|
||||
|
||||
### Fix
|
||||
|
||||
* Keep formula clusters also when empty ([#1970](https://github.com/docling-project/docling/issues/1970)) ([`67441ca`](https://github.com/docling-project/docling/commit/67441ca4188d532c79df788c461e7f6f7d2f8170))
|
||||
|
||||
### Documentation
|
||||
|
||||
* Enrich existing DoclingDocument ([#1969](https://github.com/docling-project/docling/issues/1969)) ([`90a7cc4`](https://github.com/docling-project/docling/commit/90a7cc4bdda7272cd87d6f4ab3c0b7966f6e9c73))
|
||||
* Add documentation for confidence scores ([#1912](https://github.com/docling-project/docling/issues/1912)) ([`5d98bce`](https://github.com/docling-project/docling/commit/5d98bcea1bd03aff426f903211c931620ff8fcc1))
|
||||
|
||||
## [v2.42.0](https://github.com/docling-project/docling/releases/tag/v2.42.0) - 2025-07-18
|
||||
|
||||
### Feature
|
||||
|
||||
* Add option to control empty clusters in layout postprocessing ([#1940](https://github.com/docling-project/docling/issues/1940)) ([`a436be7`](https://github.com/docling-project/docling/commit/a436be73676101cc9461a17ae7a9ae72316a5096))
|
||||
|
||||
### Fix
|
||||
|
||||
* Safe pipeline init, use device_map in transformers models ([#1917](https://github.com/docling-project/docling/issues/1917)) ([`cca05c4`](https://github.com/docling-project/docling/commit/cca05c45eaec154ae8470f9eb3577852d17773cd))
|
||||
* Fix HTML table parser and JATS backend bugs ([#1948](https://github.com/docling-project/docling/issues/1948)) ([`e1e3053`](https://github.com/docling-project/docling/commit/e1e305369552b82d3f09f0c113ea8b54d5c90658))
|
||||
* KeyError: 'fPr' when processing latex fractions in DOCX files ([#1926](https://github.com/docling-project/docling/issues/1926)) ([`95e7096`](https://github.com/docling-project/docling/commit/95e70962f1d7cf1f339a88fde9c907111e194726))
|
||||
* Change granite vision model URL from preview to stable version ([#1925](https://github.com/docling-project/docling/issues/1925)) ([`c5fb353`](https://github.com/docling-project/docling/commit/c5fb353f109dfe79b51c201ebb1ff33fceeae34a))
|
||||
|
||||
### Documentation
|
||||
|
||||
* Fix typos ([#1943](https://github.com/docling-project/docling/issues/1943)) ([`d6d2dbe`](https://github.com/docling-project/docling/commit/d6d2dbe2f99bd965c1bc8eec3d332d0acf731189))
|
||||
|
||||
## [v2.41.0](https://github.com/docling-project/docling/releases/tag/v2.41.0) - 2025-07-10
|
||||
|
||||
### Feature
|
||||
|
@ -1,10 +1,11 @@
|
||||
import logging
|
||||
import re
|
||||
import traceback
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from typing import Final, Optional, Union, cast
|
||||
|
||||
from bs4 import BeautifulSoup, NavigableString, PageElement, Tag
|
||||
from bs4 import BeautifulSoup, NavigableString, Tag
|
||||
from bs4.element import PreformattedString
|
||||
from docling_core.types.doc import (
|
||||
DocItem,
|
||||
@ -15,6 +16,7 @@ from docling_core.types.doc import (
|
||||
GroupLabel,
|
||||
TableCell,
|
||||
TableData,
|
||||
TextItem,
|
||||
)
|
||||
from docling_core.types.doc.document import ContentLayer
|
||||
from pydantic import BaseModel
|
||||
@ -26,10 +28,14 @@ from docling.datamodel.document import InputDocument
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
|
||||
# tags that generate NodeItem elements
|
||||
TAGS_FOR_NODE_ITEMS: Final = [
|
||||
DEFAULT_IMAGE_WIDTH = 128
|
||||
DEFAULT_IMAGE_HEIGHT = 128
|
||||
|
||||
# Tags that initiate distinct Docling items
|
||||
_BLOCK_TAGS: Final = {
|
||||
"address",
|
||||
"details",
|
||||
"figure",
|
||||
"h1",
|
||||
"h2",
|
||||
"h3",
|
||||
@ -41,12 +47,9 @@ TAGS_FOR_NODE_ITEMS: Final = [
|
||||
"code",
|
||||
"ul",
|
||||
"ol",
|
||||
"li",
|
||||
"summary",
|
||||
"table",
|
||||
"figure",
|
||||
"img",
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
class _Context(BaseModel):
|
||||
@ -56,12 +59,16 @@ class _Context(BaseModel):
|
||||
|
||||
class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
||||
@override
|
||||
def __init__(self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path]):
|
||||
def __init__(
|
||||
self,
|
||||
in_doc: InputDocument,
|
||||
path_or_stream: Union[BytesIO, Path],
|
||||
):
|
||||
super().__init__(in_doc, path_or_stream)
|
||||
self.soup: Optional[Tag] = None
|
||||
# HTML file:
|
||||
self.path_or_stream = path_or_stream
|
||||
# Initialise the parents for the hierarchy
|
||||
|
||||
# Initialize the parents for the hierarchy
|
||||
self.max_levels = 10
|
||||
self.level = 0
|
||||
self.parents: dict[int, Optional[Union[DocItem, GroupItem]]] = {}
|
||||
@ -70,13 +77,12 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
||||
self.parents[i] = None
|
||||
|
||||
try:
|
||||
if isinstance(self.path_or_stream, BytesIO):
|
||||
text_stream = self.path_or_stream.getvalue()
|
||||
self.soup = BeautifulSoup(text_stream, "html.parser")
|
||||
if isinstance(self.path_or_stream, Path):
|
||||
with open(self.path_or_stream, "rb") as f:
|
||||
html_content = f.read()
|
||||
self.soup = BeautifulSoup(html_content, "html.parser")
|
||||
raw = (
|
||||
path_or_stream.getvalue()
|
||||
if isinstance(path_or_stream, BytesIO)
|
||||
else Path(path_or_stream).read_bytes()
|
||||
)
|
||||
self.soup = BeautifulSoup(raw, "html.parser")
|
||||
except Exception as e:
|
||||
raise RuntimeError(
|
||||
"Could not initialize HTML backend for file with "
|
||||
@ -96,7 +102,6 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
||||
def unload(self):
|
||||
if isinstance(self.path_or_stream, BytesIO):
|
||||
self.path_or_stream.close()
|
||||
|
||||
self.path_or_stream = None
|
||||
|
||||
@classmethod
|
||||
@ -106,211 +111,156 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
||||
|
||||
@override
|
||||
def convert(self) -> DoclingDocument:
|
||||
# access self.path_or_stream to load stuff
|
||||
_log.debug("Starting HTML conversion...")
|
||||
if not self.is_valid():
|
||||
raise RuntimeError("Invalid HTML document.")
|
||||
|
||||
origin = DocumentOrigin(
|
||||
filename=self.file.name or "file",
|
||||
mimetype="text/html",
|
||||
binary_hash=self.document_hash,
|
||||
)
|
||||
|
||||
doc = DoclingDocument(name=self.file.stem or "file", origin=origin)
|
||||
_log.debug("Trying to convert HTML...")
|
||||
|
||||
if self.is_valid():
|
||||
assert self.soup is not None
|
||||
content = self.soup.body or self.soup
|
||||
# Replace <br> tags with newline characters
|
||||
# TODO: remove style to avoid losing text from tags like i, b, span, ...
|
||||
for br in content("br"):
|
||||
br.replace_with(NavigableString("\n"))
|
||||
assert self.soup is not None
|
||||
# set the title as furniture, since it is part of the document metadata
|
||||
title = self.soup.title
|
||||
if title:
|
||||
doc.add_title(
|
||||
text=title.get_text(separator=" ", strip=True),
|
||||
content_layer=ContentLayer.FURNITURE,
|
||||
)
|
||||
# remove scripts/styles
|
||||
for tag in self.soup(["script", "style"]):
|
||||
tag.decompose()
|
||||
content = self.soup.body or self.soup
|
||||
# normalize <br> tags
|
||||
for br in content("br"):
|
||||
br.replace_with(NavigableString("\n"))
|
||||
# set default content layer
|
||||
headers = content.find(["h1", "h2", "h3", "h4", "h5", "h6"])
|
||||
self.content_layer = (
|
||||
ContentLayer.BODY if headers is None else ContentLayer.FURNITURE
|
||||
)
|
||||
# reset context
|
||||
self.ctx = _Context()
|
||||
|
||||
try:
|
||||
self._walk(content, doc)
|
||||
except Exception:
|
||||
print(traceback.format_exc())
|
||||
|
||||
headers = content.find(["h1", "h2", "h3", "h4", "h5", "h6"])
|
||||
self.content_layer = (
|
||||
ContentLayer.BODY if headers is None else ContentLayer.FURNITURE
|
||||
)
|
||||
self.ctx = _Context() # reset context
|
||||
self.walk(content, doc)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
f"Cannot convert doc with {self.document_hash} because the backend "
|
||||
"failed to init."
|
||||
)
|
||||
return doc
|
||||
|
||||
def walk(self, tag: Tag, doc: DoclingDocument) -> None:
|
||||
# Iterate over elements in the body of the document
|
||||
text: str = ""
|
||||
for element in tag.children:
|
||||
if isinstance(element, Tag):
|
||||
try:
|
||||
self.analyze_tag(cast(Tag, element), doc)
|
||||
except Exception as exc_child:
|
||||
_log.error(
|
||||
f"Error processing child from tag {tag.name}:\n{traceback.format_exc()}"
|
||||
)
|
||||
raise exc_child
|
||||
elif isinstance(element, NavigableString) and not isinstance(
|
||||
element, PreformattedString
|
||||
):
|
||||
# Floating text outside paragraphs or analyzed tags
|
||||
text += element
|
||||
siblings: list[Tag] = [
|
||||
item for item in element.next_siblings if isinstance(item, Tag)
|
||||
]
|
||||
if element.next_sibling is None or any(
|
||||
item.name in TAGS_FOR_NODE_ITEMS for item in siblings
|
||||
):
|
||||
text = text.strip()
|
||||
if text and tag.name in ["div"]:
|
||||
doc.add_text(
|
||||
parent=self.parents[self.level],
|
||||
label=DocItemLabel.TEXT,
|
||||
text=text,
|
||||
content_layer=self.content_layer,
|
||||
)
|
||||
text = ""
|
||||
def _walk(self, element: Tag, doc: DoclingDocument) -> None:
|
||||
"""Parse an XML tag by recursively walking its content.
|
||||
|
||||
return
|
||||
While walking, the method buffers inline text across tags like <b> or <span>,
|
||||
emitting text nodes only at block boundaries.
|
||||
|
||||
def analyze_tag(self, tag: Tag, doc: DoclingDocument) -> None:
|
||||
if tag.name in ["h1", "h2", "h3", "h4", "h5", "h6"]:
|
||||
self.handle_header(tag, doc)
|
||||
elif tag.name in ["p", "address", "summary"]:
|
||||
self.handle_paragraph(tag, doc)
|
||||
elif tag.name in ["pre", "code"]:
|
||||
self.handle_code(tag, doc)
|
||||
elif tag.name in ["ul", "ol"]:
|
||||
self.handle_list(tag, doc)
|
||||
elif tag.name in ["li"]:
|
||||
self.handle_list_item(tag, doc)
|
||||
elif tag.name == "table":
|
||||
self.handle_table(tag, doc)
|
||||
elif tag.name == "figure":
|
||||
self.handle_figure(tag, doc)
|
||||
elif tag.name == "img":
|
||||
self.handle_image(tag, doc)
|
||||
elif tag.name == "details":
|
||||
self.handle_details(tag, doc)
|
||||
else:
|
||||
self.walk(tag, doc)
|
||||
Args:
|
||||
element: The XML tag to parse.
|
||||
doc: The Docling document to be updated with the parsed content.
|
||||
"""
|
||||
buffer: list[str] = []
|
||||
|
||||
def get_text(self, item: PageElement) -> str:
|
||||
"""Get the text content of a tag."""
|
||||
parts: list[str] = self.extract_text_recursively(item)
|
||||
|
||||
return "".join(parts) + " "
|
||||
|
||||
# Function to recursively extract text from all child nodes
|
||||
def extract_text_recursively(self, item: PageElement) -> list[str]:
|
||||
result: list[str] = []
|
||||
|
||||
if isinstance(item, NavigableString):
|
||||
return [item]
|
||||
|
||||
tag = cast(Tag, item)
|
||||
if tag.name not in ["ul", "ol"]:
|
||||
for child in tag:
|
||||
# Recursively get the child's text content
|
||||
result.extend(self.extract_text_recursively(child))
|
||||
|
||||
return ["".join(result) + " "]
|
||||
|
||||
def handle_details(self, element: Tag, doc: DoclingDocument) -> None:
|
||||
"""Handle details tag (details) and its content."""
|
||||
|
||||
self.parents[self.level + 1] = doc.add_group(
|
||||
name="details",
|
||||
label=GroupLabel.SECTION,
|
||||
parent=self.parents[self.level],
|
||||
content_layer=self.content_layer,
|
||||
)
|
||||
|
||||
self.level += 1
|
||||
self.walk(element, doc)
|
||||
self.parents[self.level + 1] = None
|
||||
self.level -= 1
|
||||
|
||||
def handle_header(self, element: Tag, doc: DoclingDocument) -> None:
|
||||
"""Handles header tags (h1, h2, etc.)."""
|
||||
hlevel = int(element.name.replace("h", ""))
|
||||
text = element.text.strip()
|
||||
|
||||
self.content_layer = ContentLayer.BODY
|
||||
|
||||
if hlevel == 1:
|
||||
for key in self.parents.keys():
|
||||
self.parents[key] = None
|
||||
|
||||
self.level = 1
|
||||
self.parents[self.level] = doc.add_text(
|
||||
parent=self.parents[0],
|
||||
label=DocItemLabel.TITLE,
|
||||
text=text,
|
||||
content_layer=self.content_layer,
|
||||
)
|
||||
else:
|
||||
if hlevel > self.level:
|
||||
# add invisible group
|
||||
for i in range(self.level + 1, hlevel):
|
||||
self.parents[i] = doc.add_group(
|
||||
name=f"header-{i}",
|
||||
label=GroupLabel.SECTION,
|
||||
parent=self.parents[i - 1],
|
||||
def flush_buffer():
|
||||
if not buffer:
|
||||
return
|
||||
text = "".join(buffer).strip()
|
||||
buffer.clear()
|
||||
if not text:
|
||||
return
|
||||
for part in text.split("\n"):
|
||||
seg = part.strip()
|
||||
if seg:
|
||||
doc.add_text(
|
||||
DocItemLabel.TEXT,
|
||||
seg,
|
||||
parent=self.parents[self.level],
|
||||
content_layer=self.content_layer,
|
||||
)
|
||||
self.level = hlevel
|
||||
|
||||
elif hlevel < self.level:
|
||||
for node in element.contents:
|
||||
if isinstance(node, Tag):
|
||||
name = node.name.lower()
|
||||
if name == "img":
|
||||
flush_buffer()
|
||||
self._emit_image(node, doc)
|
||||
elif name in _BLOCK_TAGS:
|
||||
flush_buffer()
|
||||
self._handle_block(node, doc)
|
||||
elif node.find(_BLOCK_TAGS):
|
||||
flush_buffer()
|
||||
self._walk(node, doc)
|
||||
else:
|
||||
buffer.append(node.text)
|
||||
elif isinstance(node, NavigableString) and not isinstance(
|
||||
node, PreformattedString
|
||||
):
|
||||
buffer.append(str(node))
|
||||
|
||||
flush_buffer()
|
||||
|
||||
def _handle_heading(self, tag: Tag, doc: DoclingDocument) -> None:
|
||||
tag_name = tag.name.lower()
|
||||
# set default content layer to BODY as soon as we encounter a heading
|
||||
self.content_layer = ContentLayer.BODY
|
||||
level = int(tag_name[1])
|
||||
text = tag.get_text(strip=True, separator=" ")
|
||||
# the first level is for the title item
|
||||
if level == 1:
|
||||
for key in self.parents.keys():
|
||||
self.parents[key] = None
|
||||
self.level = 0
|
||||
self.parents[self.level + 1] = doc.add_title(
|
||||
text, content_layer=self.content_layer
|
||||
)
|
||||
# the other levels need to be lowered by 1 if a title was set
|
||||
else:
|
||||
level -= 1
|
||||
if level > self.level:
|
||||
# add invisible group
|
||||
for i in range(self.level, level):
|
||||
_log.debug(f"Adding invisible group to level {i}")
|
||||
self.parents[i + 1] = doc.add_group(
|
||||
name=f"header-{i + 1}",
|
||||
label=GroupLabel.SECTION,
|
||||
parent=self.parents[i],
|
||||
content_layer=self.content_layer,
|
||||
)
|
||||
self.level = level
|
||||
elif level < self.level:
|
||||
# remove the tail
|
||||
for key in self.parents.keys():
|
||||
if key > hlevel:
|
||||
if key > level + 1:
|
||||
_log.debug(f"Remove the tail of level {key}")
|
||||
self.parents[key] = None
|
||||
self.level = hlevel
|
||||
|
||||
self.parents[hlevel] = doc.add_heading(
|
||||
parent=self.parents[hlevel - 1],
|
||||
text=text,
|
||||
level=hlevel - 1,
|
||||
content_layer=self.content_layer,
|
||||
)
|
||||
|
||||
def handle_code(self, element: Tag, doc: DoclingDocument) -> None:
|
||||
"""Handles monospace code snippets (pre)."""
|
||||
if element.text is None:
|
||||
return
|
||||
text = element.text.strip()
|
||||
if text:
|
||||
doc.add_code(
|
||||
self.level = level
|
||||
self.parents[self.level + 1] = doc.add_heading(
|
||||
parent=self.parents[self.level],
|
||||
text=text,
|
||||
level=self.level,
|
||||
content_layer=self.content_layer,
|
||||
)
|
||||
self.level += 1
|
||||
for img_tag in tag("img"):
|
||||
if isinstance(img_tag, Tag):
|
||||
self._emit_image(img_tag, doc)
|
||||
|
||||
def handle_paragraph(self, element: Tag, doc: DoclingDocument) -> None:
|
||||
"""Handles paragraph tags (p) or equivalent ones."""
|
||||
if element.text is None:
|
||||
return
|
||||
text = element.text.strip()
|
||||
if text:
|
||||
doc.add_text(
|
||||
parent=self.parents[self.level],
|
||||
label=DocItemLabel.TEXT,
|
||||
text=text,
|
||||
content_layer=self.content_layer,
|
||||
)
|
||||
|
||||
def handle_list(self, element: Tag, doc: DoclingDocument) -> None:
|
||||
"""Handles list tags (ul, ol) and their list items."""
|
||||
|
||||
def _handle_list(self, tag: Tag, doc: DoclingDocument) -> None:
|
||||
tag_name = tag.name.lower()
|
||||
start: Optional[int] = None
|
||||
if is_ordered := element.name == "ol":
|
||||
start_attr = element.get("start")
|
||||
name: str = ""
|
||||
is_ordered = tag_name == "ol"
|
||||
if is_ordered:
|
||||
start_attr = tag.get("start")
|
||||
if isinstance(start_attr, str) and start_attr.isnumeric():
|
||||
start = int(start_attr)
|
||||
name = "ordered list" + (f" start {start}" if start is not None else "")
|
||||
else:
|
||||
name = "list"
|
||||
# create a list group
|
||||
# Create the list container
|
||||
list_group = doc.add_list_group(
|
||||
name=name,
|
||||
parent=self.parents[self.level],
|
||||
@ -320,64 +270,171 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
||||
self.ctx.list_ordered_flag_by_ref[list_group.self_ref] = is_ordered
|
||||
if is_ordered and start is not None:
|
||||
self.ctx.list_start_by_ref[list_group.self_ref] = start
|
||||
|
||||
self.level += 1
|
||||
|
||||
self.walk(element, doc)
|
||||
# For each top-level <li> in this list
|
||||
for li in tag.find_all({"li", "ul", "ol"}, recursive=False):
|
||||
if not isinstance(li, Tag):
|
||||
continue
|
||||
|
||||
# sub-list items should be indented under main list items, but temporarily
|
||||
# addressing invalid HTML (docling-core/issues/357)
|
||||
if li.name in {"ul", "ol"}:
|
||||
self._handle_block(li, doc)
|
||||
|
||||
else:
|
||||
# 1) determine the marker
|
||||
if is_ordered and start is not None:
|
||||
marker = f"{start + len(list_group.children)}."
|
||||
else:
|
||||
marker = ""
|
||||
|
||||
# 2) extract only the "direct" text from this <li>
|
||||
parts: list[str] = []
|
||||
for child in li.contents:
|
||||
if isinstance(child, NavigableString) and not isinstance(
|
||||
child, PreformattedString
|
||||
):
|
||||
parts.append(child)
|
||||
elif isinstance(child, Tag) and child.name not in ("ul", "ol"):
|
||||
text_part = child.get_text()
|
||||
if text_part:
|
||||
parts.append(text_part)
|
||||
li_text = re.sub(r"\s+|\n+", " ", "".join(parts)).strip()
|
||||
|
||||
# 3) add the list item
|
||||
if li_text:
|
||||
self.parents[self.level + 1] = doc.add_list_item(
|
||||
text=li_text,
|
||||
enumerated=is_ordered,
|
||||
marker=marker,
|
||||
parent=list_group,
|
||||
content_layer=self.content_layer,
|
||||
)
|
||||
|
||||
# 4) recurse into any nested lists, attaching them to this <li> item
|
||||
for sublist in li({"ul", "ol"}, recursive=False):
|
||||
if isinstance(sublist, Tag):
|
||||
self.level += 1
|
||||
self._handle_block(sublist, doc)
|
||||
self.parents[self.level + 1] = None
|
||||
self.level -= 1
|
||||
else:
|
||||
for sublist in li({"ul", "ol"}, recursive=False):
|
||||
if isinstance(sublist, Tag):
|
||||
self._handle_block(sublist, doc)
|
||||
|
||||
# 5) extract any images under this <li>
|
||||
for img_tag in li("img"):
|
||||
if isinstance(img_tag, Tag):
|
||||
self._emit_image(img_tag, doc)
|
||||
|
||||
self.parents[self.level + 1] = None
|
||||
self.level -= 1
|
||||
|
||||
def handle_list_item(self, element: Tag, doc: DoclingDocument) -> None:
|
||||
"""Handles list item tags (li)."""
|
||||
nested_list = element.find(["ul", "ol"])
|
||||
def _handle_block(self, tag: Tag, doc: DoclingDocument) -> None:
|
||||
tag_name = tag.name.lower()
|
||||
|
||||
parent = self.parents[self.level]
|
||||
if parent is None:
|
||||
_log.debug(f"list-item has no parent in DoclingDocument: {element}")
|
||||
return
|
||||
enumerated = self.ctx.list_ordered_flag_by_ref.get(parent.self_ref, False)
|
||||
if enumerated and (start := self.ctx.list_start_by_ref.get(parent.self_ref)):
|
||||
marker = f"{start + len(parent.children)}."
|
||||
else:
|
||||
marker = ""
|
||||
if tag_name == "figure":
|
||||
img_tag = tag.find("img")
|
||||
if isinstance(img_tag, Tag):
|
||||
self._emit_image(img_tag, doc)
|
||||
|
||||
if nested_list:
|
||||
# Text in list item can be hidden within hierarchy, hence
|
||||
# we need to extract it recursively
|
||||
text: str = self.get_text(element)
|
||||
# Flatten text, remove break lines:
|
||||
text = text.replace("\n", "").replace("\r", "")
|
||||
text = " ".join(text.split()).strip()
|
||||
elif tag_name in {"h1", "h2", "h3", "h4", "h5", "h6"}:
|
||||
self._handle_heading(tag, doc)
|
||||
|
||||
if len(text) > 0:
|
||||
# create a list-item
|
||||
self.parents[self.level + 1] = doc.add_list_item(
|
||||
text=text,
|
||||
enumerated=enumerated,
|
||||
marker=marker,
|
||||
parent=parent,
|
||||
elif tag_name in {"ul", "ol"}:
|
||||
self._handle_list(tag, doc)
|
||||
|
||||
elif tag_name in {"p", "address", "summary"}:
|
||||
for part in tag.text.split("\n"):
|
||||
seg = part.strip()
|
||||
if seg:
|
||||
doc.add_text(
|
||||
parent=self.parents[self.level],
|
||||
label=DocItemLabel.TEXT,
|
||||
text=seg,
|
||||
content_layer=self.content_layer,
|
||||
)
|
||||
for img_tag in tag("img"):
|
||||
if isinstance(img_tag, Tag):
|
||||
self._emit_image(img_tag, doc)
|
||||
|
||||
elif tag_name == "table":
|
||||
data = HTMLDocumentBackend.parse_table_data(tag)
|
||||
for img_tag in tag("img"):
|
||||
if isinstance(img_tag, Tag):
|
||||
self._emit_image(tag, doc)
|
||||
if data is not None:
|
||||
doc.add_table(
|
||||
data=data,
|
||||
parent=self.parents[self.level],
|
||||
content_layer=self.content_layer,
|
||||
)
|
||||
self.level += 1
|
||||
self.walk(element, doc)
|
||||
self.parents[self.level + 1] = None
|
||||
self.level -= 1
|
||||
else:
|
||||
self.walk(element, doc)
|
||||
|
||||
elif element.text.strip():
|
||||
text = element.text.strip()
|
||||
elif tag_name in {"pre", "code"}:
|
||||
# handle monospace code snippets (pre).
|
||||
text = tag.get_text(strip=True)
|
||||
if text:
|
||||
doc.add_code(
|
||||
parent=self.parents[self.level],
|
||||
text=text,
|
||||
content_layer=self.content_layer,
|
||||
)
|
||||
|
||||
doc.add_list_item(
|
||||
text=text,
|
||||
enumerated=enumerated,
|
||||
marker=marker,
|
||||
parent=parent,
|
||||
elif tag_name == "details":
|
||||
# handle details and its content.
|
||||
self.parents[self.level + 1] = doc.add_group(
|
||||
name="details",
|
||||
label=GroupLabel.SECTION,
|
||||
parent=self.parents[self.level],
|
||||
content_layer=self.content_layer,
|
||||
)
|
||||
else:
|
||||
_log.debug(f"list-item has no text: {element}")
|
||||
self.level += 1
|
||||
self._walk(tag, doc)
|
||||
self.parents[self.level + 1] = None
|
||||
self.level -= 1
|
||||
|
||||
def _emit_image(self, img_tag: Tag, doc: DoclingDocument) -> None:
|
||||
figure = img_tag.find_parent("figure")
|
||||
caption: str = ""
|
||||
if isinstance(figure, Tag):
|
||||
caption_tag = figure.find("figcaption", recursive=False)
|
||||
if isinstance(caption_tag, Tag):
|
||||
caption = caption_tag.get_text()
|
||||
if not caption:
|
||||
caption = str(img_tag.get("alt", "")).strip()
|
||||
|
||||
caption_item: Optional[TextItem] = None
|
||||
if caption:
|
||||
caption_item = doc.add_text(
|
||||
DocItemLabel.CAPTION, text=caption, content_layer=self.content_layer
|
||||
)
|
||||
|
||||
doc.add_picture(
|
||||
caption=caption_item,
|
||||
parent=self.parents[self.level],
|
||||
content_layer=self.content_layer,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _get_cell_spans(cell: Tag) -> tuple[int, int]:
|
||||
"""Extract colspan and rowspan values from a table cell tag.
|
||||
|
||||
This function retrieves the 'colspan' and 'rowspan' attributes from a given
|
||||
table cell tag.
|
||||
If the attribute does not exist or it is not numeric, it defaults to 1.
|
||||
"""
|
||||
raw_spans: tuple[str, str] = (
|
||||
str(cell.get("colspan", "1")),
|
||||
str(cell.get("rowspan", "1")),
|
||||
)
|
||||
int_spans: tuple[int, int] = (
|
||||
int(raw_spans[0]) if raw_spans[0].isnumeric() else 1,
|
||||
int(raw_spans[1]) if raw_spans[0].isnumeric() else 1,
|
||||
)
|
||||
|
||||
return int_spans
|
||||
|
||||
@staticmethod
|
||||
def parse_table_data(element: Tag) -> Optional[TableData]: # noqa: C901
|
||||
@ -398,10 +455,9 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
||||
if not isinstance(row, Tag):
|
||||
continue
|
||||
cell_tag = cast(Tag, cell)
|
||||
val = cell_tag.get("colspan", "1")
|
||||
colspan = int(val) if (isinstance(val, str) and val.isnumeric()) else 1
|
||||
col_count += colspan
|
||||
if cell_tag.name == "td" or cell_tag.get("rowspan") is None:
|
||||
col_span, row_span = HTMLDocumentBackend._get_cell_spans(cell_tag)
|
||||
col_count += col_span
|
||||
if cell_tag.name == "td" or row_span == 1:
|
||||
is_row_header = False
|
||||
num_cols = max(num_cols, col_count)
|
||||
if not is_row_header:
|
||||
@ -428,10 +484,11 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
||||
row_header = True
|
||||
for html_cell in cells:
|
||||
if isinstance(html_cell, Tag):
|
||||
_, row_span = HTMLDocumentBackend._get_cell_spans(html_cell)
|
||||
if html_cell.name == "td":
|
||||
col_header = False
|
||||
row_header = False
|
||||
elif html_cell.get("rowspan") is None:
|
||||
elif row_span == 1:
|
||||
row_header = False
|
||||
if not row_header:
|
||||
row_idx += 1
|
||||
@ -456,18 +513,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
||||
text = html_cell.text
|
||||
|
||||
# label = html_cell.name
|
||||
col_val = html_cell.get("colspan", "1")
|
||||
col_span = (
|
||||
int(col_val)
|
||||
if isinstance(col_val, str) and col_val.isnumeric()
|
||||
else 1
|
||||
)
|
||||
row_val = html_cell.get("rowspan", "1")
|
||||
row_span = (
|
||||
int(row_val)
|
||||
if isinstance(row_val, str) and row_val.isnumeric()
|
||||
else 1
|
||||
)
|
||||
col_span, row_span = HTMLDocumentBackend._get_cell_spans(html_cell)
|
||||
if row_header:
|
||||
row_span -= 1
|
||||
while (
|
||||
@ -494,84 +540,3 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
||||
data.table_cells.append(table_cell)
|
||||
|
||||
return data
|
||||
|
||||
def handle_table(self, element: Tag, doc: DoclingDocument) -> None:
|
||||
"""Handles table tags."""
|
||||
|
||||
table_data = HTMLDocumentBackend.parse_table_data(element)
|
||||
|
||||
if table_data is not None:
|
||||
doc.add_table(
|
||||
data=table_data,
|
||||
parent=self.parents[self.level],
|
||||
content_layer=self.content_layer,
|
||||
)
|
||||
|
||||
def get_list_text(self, list_element: Tag, level: int = 0) -> list[str]:
|
||||
"""Recursively extract text from <ul> or <ol> with proper indentation."""
|
||||
result = []
|
||||
bullet_char = "*" # Default bullet character for unordered lists
|
||||
|
||||
if list_element.name == "ol": # For ordered lists, use numbers
|
||||
for i, li in enumerate(list_element("li", recursive=False), 1):
|
||||
if not isinstance(li, Tag):
|
||||
continue
|
||||
# Add numbering for ordered lists
|
||||
result.append(f"{' ' * level}{i}. {li.get_text(strip=True)}")
|
||||
# Handle nested lists
|
||||
nested_list = li.find(["ul", "ol"])
|
||||
if isinstance(nested_list, Tag):
|
||||
result.extend(self.get_list_text(nested_list, level + 1))
|
||||
elif list_element.name == "ul": # For unordered lists, use bullet points
|
||||
for li in list_element("li", recursive=False):
|
||||
if not isinstance(li, Tag):
|
||||
continue
|
||||
# Add bullet points for unordered lists
|
||||
result.append(
|
||||
f"{' ' * level}{bullet_char} {li.get_text(strip=True)}"
|
||||
)
|
||||
# Handle nested lists
|
||||
nested_list = li.find(["ul", "ol"])
|
||||
if isinstance(nested_list, Tag):
|
||||
result.extend(self.get_list_text(nested_list, level + 1))
|
||||
|
||||
return result
|
||||
|
||||
def handle_figure(self, element: Tag, doc: DoclingDocument) -> None:
|
||||
"""Handles image tags (img)."""
|
||||
|
||||
# Extract the image URI from the <img> tag
|
||||
# image_uri = root.xpath('//figure//img/@src')[0]
|
||||
|
||||
contains_captions = element.find(["figcaption"])
|
||||
if not isinstance(contains_captions, Tag):
|
||||
doc.add_picture(
|
||||
parent=self.parents[self.level],
|
||||
caption=None,
|
||||
content_layer=self.content_layer,
|
||||
)
|
||||
else:
|
||||
texts = []
|
||||
for item in contains_captions:
|
||||
texts.append(item.text)
|
||||
|
||||
fig_caption = doc.add_text(
|
||||
label=DocItemLabel.CAPTION,
|
||||
text=("".join(texts)).strip(),
|
||||
content_layer=self.content_layer,
|
||||
)
|
||||
doc.add_picture(
|
||||
parent=self.parents[self.level],
|
||||
caption=fig_caption,
|
||||
content_layer=self.content_layer,
|
||||
)
|
||||
|
||||
def handle_image(self, element: Tag, doc: DoclingDocument) -> None:
|
||||
"""Handles image tags (img)."""
|
||||
_log.debug(f"ignoring <img> tags at the moment: {element}")
|
||||
|
||||
doc.add_picture(
|
||||
parent=self.parents[self.level],
|
||||
caption=None,
|
||||
content_layer=self.content_layer,
|
||||
)
|
||||
|
@ -93,8 +93,8 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
|
||||
|
||||
# Initialize the root of the document hierarchy
|
||||
self.root: Optional[NodeItem] = None
|
||||
|
||||
self.valid = False
|
||||
self.hlevel: int = 0
|
||||
self.valid: bool = False
|
||||
try:
|
||||
if isinstance(self.path_or_stream, BytesIO):
|
||||
self.path_or_stream.seek(0)
|
||||
@ -147,6 +147,7 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
|
||||
binary_hash=self.document_hash,
|
||||
)
|
||||
doc = DoclingDocument(name=self.file.stem or "file", origin=origin)
|
||||
self.hlevel = 0
|
||||
|
||||
# Get metadata XML components
|
||||
xml_components: XMLComponents = self._parse_metadata()
|
||||
@ -304,7 +305,9 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
|
||||
title: str = abstract["label"] or DEFAULT_HEADER_ABSTRACT
|
||||
if not text:
|
||||
continue
|
||||
parent = doc.add_heading(parent=self.root, text=title)
|
||||
parent = doc.add_heading(
|
||||
parent=self.root, text=title, level=self.hlevel + 1
|
||||
)
|
||||
doc.add_text(
|
||||
parent=parent,
|
||||
text=text,
|
||||
@ -637,7 +640,10 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
|
||||
elif child.tag == "ack":
|
||||
text = DEFAULT_HEADER_ACKNOWLEDGMENTS
|
||||
if text:
|
||||
new_parent = doc.add_heading(text=text, parent=parent)
|
||||
self.hlevel += 1
|
||||
new_parent = doc.add_heading(
|
||||
text=text, parent=parent, level=self.hlevel
|
||||
)
|
||||
elif child.tag == "list":
|
||||
new_parent = doc.add_group(
|
||||
label=GroupLabel.LIST, name="list", parent=parent
|
||||
@ -694,6 +700,8 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
|
||||
new_text = self._walk_linear(doc, new_parent, child)
|
||||
if not (node.getparent().tag == "p" and node.tag in flush_tags):
|
||||
node_text += new_text
|
||||
if child.tag in ("sec", "ack") and text:
|
||||
self.hlevel -= 1
|
||||
|
||||
# pick up the tail text
|
||||
node_text += child.tail.replace("\n", " ") if child.tail else ""
|
||||
|
@ -279,6 +279,9 @@ class LayoutOptions(BaseModel):
|
||||
"""Options for layout processing."""
|
||||
|
||||
create_orphan_clusters: bool = True # Whether to create clusters for orphaned cells
|
||||
keep_empty_clusters: bool = (
|
||||
False # Whether to keep clusters that contain no text cells
|
||||
)
|
||||
model_spec: LayoutModelConfig = DOCLING_LAYOUT_V2
|
||||
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
import hashlib
|
||||
import logging
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
from collections.abc import Iterable, Iterator
|
||||
from functools import partial
|
||||
@ -49,6 +50,7 @@ from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline
|
||||
from docling.utils.utils import chunkify
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
_PIPELINE_CACHE_LOCK = threading.Lock()
|
||||
|
||||
|
||||
class FormatOption(BaseModel):
|
||||
@ -315,17 +317,18 @@ class DocumentConverter:
|
||||
# Use a composite key to cache pipelines
|
||||
cache_key = (pipeline_class, options_hash)
|
||||
|
||||
if cache_key not in self.initialized_pipelines:
|
||||
_log.info(
|
||||
f"Initializing pipeline for {pipeline_class.__name__} with options hash {options_hash}"
|
||||
)
|
||||
self.initialized_pipelines[cache_key] = pipeline_class(
|
||||
pipeline_options=pipeline_options
|
||||
)
|
||||
else:
|
||||
_log.debug(
|
||||
f"Reusing cached pipeline for {pipeline_class.__name__} with options hash {options_hash}"
|
||||
)
|
||||
with _PIPELINE_CACHE_LOCK:
|
||||
if cache_key not in self.initialized_pipelines:
|
||||
_log.info(
|
||||
f"Initializing pipeline for {pipeline_class.__name__} with options hash {options_hash}"
|
||||
)
|
||||
self.initialized_pipelines[cache_key] = pipeline_class(
|
||||
pipeline_options=pipeline_options
|
||||
)
|
||||
else:
|
||||
_log.debug(
|
||||
f"Reusing cached pipeline for {pipeline_class.__name__} with options hash {options_hash}"
|
||||
)
|
||||
|
||||
return self.initialized_pipelines[cache_key]
|
||||
|
||||
|
@ -65,6 +65,7 @@ class PictureDescriptionVlmModel(
|
||||
self.processor = AutoProcessor.from_pretrained(artifacts_path)
|
||||
self.model = AutoModelForVision2Seq.from_pretrained(
|
||||
artifacts_path,
|
||||
device_map=self.device,
|
||||
torch_dtype=torch.bfloat16,
|
||||
_attn_implementation=(
|
||||
"flash_attention_2"
|
||||
@ -72,7 +73,7 @@ class PictureDescriptionVlmModel(
|
||||
and accelerator_options.cuda_use_flash_attention2
|
||||
else "eager"
|
||||
),
|
||||
).to(self.device)
|
||||
)
|
||||
|
||||
self.provenance = f"{self.options.repo_id}"
|
||||
|
||||
|
@ -267,8 +267,14 @@ class LayoutPostprocessor:
|
||||
# Initial cell assignment
|
||||
clusters = self._assign_cells_to_clusters(clusters)
|
||||
|
||||
# Remove clusters with no cells
|
||||
clusters = [cluster for cluster in clusters if cluster.cells]
|
||||
# Remove clusters with no cells (if keep_empty_clusters is False),
|
||||
# but always keep clusters with label DocItemLabel.FORMULA
|
||||
if not self.options.keep_empty_clusters:
|
||||
clusters = [
|
||||
cluster
|
||||
for cluster in clusters
|
||||
if cluster.cells or cluster.label == DocItemLabel.FORMULA
|
||||
]
|
||||
|
||||
# Handle orphaned cells
|
||||
unassigned = self._find_unassigned_cells(clusters)
|
||||
|
BIN
docs/assets/confidence_scores.png
vendored
Normal file
BIN
docs/assets/confidence_scores.png
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 139 KiB |
61
docs/concepts/confidence_scores.md
vendored
Normal file
61
docs/concepts/confidence_scores.md
vendored
Normal file
@ -0,0 +1,61 @@
|
||||
## Introduction
|
||||
|
||||
**Confidence grades** were introduced in [v2.34.0](https://github.com/docling-project/docling/releases/tag/v2.34.0) to help users understand how well a conversion performed and guide decisions about post-processing workflows. They are available in the [`confidence`](../../reference/document_converter/#docling.document_converter.ConversionResult.confidence) field of the [`ConversionResult`](../../reference/document_converter/#docling.document_converter.ConversionResult) object returned by the document converter.
|
||||
|
||||
## Purpose
|
||||
|
||||
Complex layouts, poor scan quality, or challenging formatting can lead to suboptimal document conversion results that may require additional attention or alternative conversion pipelines.
|
||||
|
||||
Confidence scores provide a quantitative assessment of document conversion quality. Each confidence report includes a **numerical score** (0.0 to 1.0) measuring conversion accuracy, and a **quality grade** (poor, fair, good, excellent) for quick interpretation.
|
||||
|
||||
!!! note "Focus on quality grades!"
|
||||
|
||||
Users can and should safely focus on the document-level grade fields — `mean_grade` and `low_grade` — to assess overall conversion quality. Numerical scores are used internally and are for informational purposes only; their computation and weighting may change in the future.
|
||||
|
||||
Use cases for confidence grades include:
|
||||
|
||||
- Identify documents requiring manual review after the conversion
|
||||
- Adjust conversion pipelines to the most appropriate for each document type
|
||||
- Set confidence thresholds for unattended batch conversions
|
||||
- Catch potential conversion issues early in your workflow.
|
||||
|
||||
## Concepts
|
||||
|
||||
### Scores and grades
|
||||
|
||||
A confidence report contains *scores* and *grades*:
|
||||
|
||||
- **Scores**: Numerical values between 0.0 and 1.0, where higher values indicate better conversion quality, for internal use only
|
||||
- **Grades**: Categorical quality assessments based on score thresholds, used to assess the overall conversion confidence:
|
||||
- `POOR`
|
||||
- `FAIR`
|
||||
- `GOOD`
|
||||
- `EXCELLENT`
|
||||
|
||||
### Types of confidence calculated
|
||||
|
||||
Each confidence report includes four component scores and grades:
|
||||
|
||||
- **`layout_score`**: Overall quality of document element recognition
|
||||
- **`ocr_score`**: Quality of OCR-extracted content
|
||||
- **`parse_score`**: 10th percentile score of digital text cells (emphasizes problem areas)
|
||||
- **`table_score`**: Table extraction quality *(not yet implemented)*
|
||||
|
||||
### Summary grades
|
||||
|
||||
Two aggregate grades provide overall document quality assessment:
|
||||
|
||||
- **`mean_grade`**: Average of the four component scores
|
||||
- **`low_grade`**: 5th percentile score (highlights worst-performing areas)
|
||||
|
||||
### Page-level vs document-level
|
||||
|
||||
Confidence grades are calculated at two levels:
|
||||
|
||||
- **Page-level**: Individual scores and grades for each page, stored in the `pages` field
|
||||
- **Document-level**: Overall scores and grades for the entire document, calculated as averages of the page-level grades and stored in fields equally named in the root [`ConfidenceReport`](h../../reference/document_converter/#docling.document_converter.ConversionResult.confidence)
|
||||
|
||||
### Example
|
||||
|
||||

|
||||
|
132
docs/examples/enrich_doclingdocument.py
vendored
Normal file
132
docs/examples/enrich_doclingdocument.py
vendored
Normal file
@ -0,0 +1,132 @@
|
||||
## Enrich DoclingDocument
|
||||
# This example allows to run Docling enrichment models on documents which have been already converted
|
||||
# and stored as serialized DoclingDocument JSON files.
|
||||
|
||||
### Load modules
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Iterable, Optional
|
||||
|
||||
from docling_core.types.doc import BoundingBox, DocItem, DoclingDocument, NodeItem
|
||||
from rich.pretty import pprint
|
||||
|
||||
from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
|
||||
from docling.datamodel.accelerator_options import AcceleratorOptions
|
||||
from docling.datamodel.base_models import InputFormat, ItemAndImageEnrichmentElement
|
||||
from docling.datamodel.document import InputDocument
|
||||
from docling.models.base_model import BaseItemAndImageEnrichmentModel
|
||||
from docling.models.document_picture_classifier import (
|
||||
DocumentPictureClassifier,
|
||||
DocumentPictureClassifierOptions,
|
||||
)
|
||||
from docling.utils.utils import chunkify
|
||||
|
||||
### Define batch size used for processing
|
||||
|
||||
BATCH_SIZE = 4
|
||||
|
||||
### From DocItem to the model inputs
|
||||
# The following function is responsible for taking an item and applying the required pre-processing for the model.
|
||||
# In this case we generate a cropped image from the document backend.
|
||||
|
||||
|
||||
def prepare_element(
|
||||
doc: DoclingDocument,
|
||||
backend: PyPdfiumDocumentBackend,
|
||||
model: BaseItemAndImageEnrichmentModel,
|
||||
element: NodeItem,
|
||||
) -> Optional[ItemAndImageEnrichmentElement]:
|
||||
if not model.is_processable(doc=doc, element=element):
|
||||
return None
|
||||
|
||||
assert isinstance(element, DocItem)
|
||||
element_prov = element.prov[0]
|
||||
|
||||
bbox = element_prov.bbox
|
||||
width = bbox.r - bbox.l
|
||||
height = bbox.t - bbox.b
|
||||
|
||||
expanded_bbox = BoundingBox(
|
||||
l=bbox.l - width * model.expansion_factor,
|
||||
t=bbox.t + height * model.expansion_factor,
|
||||
r=bbox.r + width * model.expansion_factor,
|
||||
b=bbox.b - height * model.expansion_factor,
|
||||
coord_origin=bbox.coord_origin,
|
||||
)
|
||||
|
||||
page_ix = element_prov.page_no - 1
|
||||
page_backend = backend.load_page(page_no=page_ix)
|
||||
cropped_image = page_backend.get_page_image(
|
||||
scale=model.images_scale, cropbox=expanded_bbox
|
||||
)
|
||||
return ItemAndImageEnrichmentElement(item=element, image=cropped_image)
|
||||
|
||||
|
||||
### Iterate through the document
|
||||
# This block defines the `enrich_document()` which is responsible for iterating through the document
|
||||
# and batch the selected document items for running through the model.
|
||||
|
||||
|
||||
def enrich_document(
|
||||
doc: DoclingDocument,
|
||||
backend: PyPdfiumDocumentBackend,
|
||||
model: BaseItemAndImageEnrichmentModel,
|
||||
) -> DoclingDocument:
|
||||
def _prepare_elements(
|
||||
doc: DoclingDocument,
|
||||
backend: PyPdfiumDocumentBackend,
|
||||
model: BaseItemAndImageEnrichmentModel,
|
||||
) -> Iterable[NodeItem]:
|
||||
for doc_element, _level in doc.iterate_items():
|
||||
prepared_element = prepare_element(
|
||||
doc=doc, backend=backend, model=model, element=doc_element
|
||||
)
|
||||
if prepared_element is not None:
|
||||
yield prepared_element
|
||||
|
||||
for element_batch in chunkify(
|
||||
_prepare_elements(doc, backend, model),
|
||||
BATCH_SIZE,
|
||||
):
|
||||
for element in model(doc=doc, element_batch=element_batch): # Must exhaust!
|
||||
pass
|
||||
|
||||
return doc
|
||||
|
||||
|
||||
### Open and process
|
||||
# The `main()` function which initializes the document and model objects for calling `enrich_document()`.
|
||||
|
||||
|
||||
def main():
|
||||
data_folder = Path(__file__).parent / "../../tests/data"
|
||||
input_pdf_path = data_folder / "pdf/2206.01062.pdf"
|
||||
|
||||
input_doc_path = data_folder / "groundtruth/docling_v2/2206.01062.json"
|
||||
|
||||
doc = DoclingDocument.load_from_json(input_doc_path)
|
||||
|
||||
in_pdf_doc = InputDocument(
|
||||
input_pdf_path,
|
||||
format=InputFormat.PDF,
|
||||
backend=PyPdfiumDocumentBackend,
|
||||
filename=input_pdf_path.name,
|
||||
)
|
||||
backend = in_pdf_doc._backend
|
||||
|
||||
model = DocumentPictureClassifier(
|
||||
enabled=True,
|
||||
artifacts_path=None,
|
||||
options=DocumentPictureClassifierOptions(),
|
||||
accelerator_options=AcceleratorOptions(),
|
||||
)
|
||||
|
||||
doc = enrich_document(doc=doc, backend=backend, model=model)
|
||||
|
||||
for pic in doc.pictures[:5]:
|
||||
print(pic.self_ref)
|
||||
pprint(pic.annotations)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
6
docs/usage/vision_models.md
vendored
6
docs/usage/vision_models.md
vendored
@ -1,5 +1,5 @@
|
||||
|
||||
The `VlmPipeline` in Docling allows to convert documents end-to-end using a vision-language model.
|
||||
The `VlmPipeline` in Docling allows you to convert documents end-to-end using a vision-language model.
|
||||
|
||||
Docling supports vision-language models which output:
|
||||
|
||||
@ -39,7 +39,7 @@ For running Docling using local models with the `VlmPipeline`:
|
||||
## Available local models
|
||||
|
||||
By default, the vision-language models are running locally.
|
||||
Docling allows to choose between the Hugging Face [Transformers](https://github.com/huggingface/transformers) framweork and the [MLX](https://github.com/Blaizzy/mlx-vlm) (for Apple devices with MPS acceleration) one.
|
||||
Docling allows to choose between the Hugging Face [Transformers](https://github.com/huggingface/transformers) framework and the [MLX](https://github.com/Blaizzy/mlx-vlm) (for Apple devices with MPS acceleration) one.
|
||||
|
||||
The following table reports the models currently available out-of-the-box.
|
||||
|
||||
@ -54,7 +54,7 @@ The following table reports the models currently available out-of-the-box.
|
||||
| `vlm_model_specs.PHI4_TRANSFORMERS` | [microsoft/Phi-4-multimodal-instruct](https://huggingface.co/microsoft/Phi-4-multimodal-instruct) | `Transformers/AutoModelForCasualLM` | CPU | 1 | 1175.67 |
|
||||
| `vlm_model_specs.PIXTRAL_12B_TRANSFORMERS` | [mistral-community/pixtral-12b](https://huggingface.co/mistral-community/pixtral-12b) | `Transformers/AutoModelForVision2Seq` | CPU | 1 | 1828.21 |
|
||||
|
||||
_Inference time is computed on a Macbook M3 Max using the example page `tests/data/pdf/2305.03393v1-pg9.pdf`. The comparision is done with the example [compare_vlm_models.py](./../examples/compare_vlm_models.py)._
|
||||
_Inference time is computed on a Macbook M3 Max using the example page `tests/data/pdf/2305.03393v1-pg9.pdf`. The comparison is done with the example [compare_vlm_models.py](./../examples/compare_vlm_models.py)._
|
||||
|
||||
For choosing the model, the code snippet above can be extended as follow
|
||||
|
||||
|
@ -68,6 +68,7 @@ nav:
|
||||
- Architecture: concepts/architecture.md
|
||||
- Docling Document: concepts/docling_document.md
|
||||
- Serialization: concepts/serialization.md
|
||||
- Confidence Scores: concepts/confidence_scores.md
|
||||
- Chunking: concepts/chunking.md
|
||||
- Plugins: concepts/plugins.md
|
||||
- Examples:
|
||||
@ -106,6 +107,7 @@ nav:
|
||||
- ✨ Enrichment development:
|
||||
- "Figure enrichment": examples/develop_picture_enrichment.py
|
||||
- "Formula enrichment": examples/develop_formula_understanding.py
|
||||
- "Enrich a DoclingDocument": examples/enrich_doclingdocument.py
|
||||
- 🗂️ More examples:
|
||||
- examples/rag_milvus.ipynb
|
||||
- examples/rag_weaviate.ipynb
|
||||
|
@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "docling"
|
||||
version = "2.41.0" # DO NOT EDIT, updated automatically
|
||||
version = "2.42.1" # DO NOT EDIT, updated automatically
|
||||
description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications."
|
||||
license = "MIT"
|
||||
keywords = [
|
||||
@ -70,6 +70,7 @@ dependencies = [
|
||||
'scipy (>=1.6.0,<2.0.0)',
|
||||
# 'scipy (>=1.6.0,<2.0.0) ; python_version >= "3.10"',
|
||||
# 'scipy (>=1.6.0,<1.14.0) ; python_version < "3.10"',
|
||||
"accelerate>=1.0.0,<2",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
|
@ -1,70 +0,0 @@
|
||||
item-0 at level 0: unspecified: group _root_
|
||||
item-1 at level 1: title: Evolving general practice consul ... Britain: issues of length and context
|
||||
item-2 at level 2: paragraph: George K Freeman, John P Horder, ... on P Hill, Nayan C Shah, Andrew Wilson
|
||||
item-3 at level 2: paragraph: Centre for Primary Care and Soci ... ersity of Leicester, Leicester LE5 4PW
|
||||
item-4 at level 2: text: In 1999 Shah1 and others said th ... per consultation in general practice?
|
||||
item-5 at level 2: text: We report on the outcome of exte ... review identified 14 relevant papers.
|
||||
item-6 at level 2: section_header: Summary points
|
||||
item-7 at level 3: list: group list
|
||||
item-8 at level 4: list_item: Longer consultations are associa ... ith a range of better patient outcomes
|
||||
item-9 at level 4: list_item: Modern consultations in general ... th more serious and chronic conditions
|
||||
item-10 at level 4: list_item: Increasing patient participation ... interaction, which demands extra time
|
||||
item-11 at level 4: list_item: Difficulties with access and wit ... e and lead to further pressure on time
|
||||
item-12 at level 4: list_item: Longer consultations should be a ... t to maximise interpersonal continuity
|
||||
item-13 at level 4: list_item: Research on implementation is needed
|
||||
item-14 at level 2: section_header: Longer consultations: benefits for patients
|
||||
item-15 at level 3: text: The systematic review consistent ... ther some doctors insist on more time.
|
||||
item-16 at level 3: text: A national survey in 1998 report ... s the effects of their own experience.
|
||||
item-17 at level 2: section_header: Context of modern consultations
|
||||
item-18 at level 3: text: Shorter consultations were more ... potential length of the consultation.
|
||||
item-19 at level 2: section_header: Participatory consultation style
|
||||
item-20 at level 3: text: The most effective consultations ... style usually lengthens consultations.
|
||||
item-21 at level 2: section_header: Extended professional agenda
|
||||
item-22 at level 3: text: The traditional consultation in ... agerial expectations of good practice.
|
||||
item-23 at level 3: text: Adequate time is essential. It m ... inevitably leads to pressure on time.
|
||||
item-24 at level 2: section_header: Access problems
|
||||
item-25 at level 3: text: In a service free at the point o ... ort notice squeeze consultation times.
|
||||
item-26 at level 3: text: While appointment systems can an ... for the inadequate access to doctors.
|
||||
item-27 at level 3: text: In response to perception of del ... ntation is currently being negotiated.
|
||||
item-28 at level 3: text: Virtually all patients think tha ... e that is free at the point of access.
|
||||
item-29 at level 3: text: A further government initiative ... ealth advice and first line treatment.
|
||||
item-30 at level 2: section_header: Loss of interpersonal continuity
|
||||
item-31 at level 3: text: If a patient has to consult seve ... unning and professional frustration.18
|
||||
item-32 at level 3: text: Mechanic described how loss of l ... patient and professional satisfaction.
|
||||
item-33 at level 2: section_header: Health service reforms
|
||||
item-34 at level 3: text: Finally, for the past 15 years t ... ents and staff) and what is delivered.
|
||||
item-35 at level 2: section_header: The future
|
||||
item-36 at level 3: text: We think that the way ahead must ... p further the care of chronic disease.
|
||||
item-37 at level 3: text: The challenge posed to general p ... ermedicalisation need to be exploited.
|
||||
item-38 at level 3: text: We must ensure better communicat ... between planned and ad hoc consulting.
|
||||
item-39 at level 2: section_header: Next steps
|
||||
item-40 at level 3: text: General practitioners do not beh ... ailable time in complex consultations.
|
||||
item-41 at level 3: text: Devising appropriate incentives ... and interpersonal knowledge and trust.
|
||||
item-42 at level 2: section_header: Acknowledgments
|
||||
item-43 at level 3: text: We thank the other members of th ... Practitioners for administrative help.
|
||||
item-44 at level 2: section_header: References
|
||||
item-45 at level 3: list: group list
|
||||
item-46 at level 4: list_item: Shah NC. Viewpoint: Consultation ... y men!”. Br J Gen Pract 49:497 (1999).
|
||||
item-47 at level 4: list_item: Mechanic D. How should hamsters ... BMJ 323:266–268 (2001). PMID: 11485957
|
||||
item-48 at level 4: list_item: Howie JGR, Porter AMD, Heaney DJ ... n Pract 41:48–54 (1991). PMID: 2031735
|
||||
item-49 at level 4: list_item: Howie JGR, Heaney DJ, Maxwell M, ... BMJ 319:738–743 (1999). PMID: 10487999
|
||||
item-50 at level 4: list_item: Kaplan SH, Greenfield S, Ware JE ... c disease. Med Care 27:110–125 (1989).
|
||||
item-51 at level 4: list_item: Airey C, Erens B. National surve ... e, 1998. London: NHS Executive (1999).
|
||||
item-52 at level 4: list_item: Hart JT. Expectations of health ... h Expect 1:3–13 (1998). PMID: 11281857
|
||||
item-53 at level 4: list_item: Tuckett D, Boulton M, Olson C, W ... London: Tavistock Publications (1985).
|
||||
item-54 at level 4: list_item: General Medical Council. Draft r ... ctors/index.htm (accessed 2 Jan 2002).
|
||||
item-55 at level 4: list_item: Balint M. The doctor, his patien ... the illness. London: Tavistock (1957).
|
||||
item-56 at level 4: list_item: Stott NCH, Davies RH. The except ... J R Coll Gen Pract 29:210–205 (1979).
|
||||
item-57 at level 4: list_item: Hill AP, Hill AP. Challenges for ... nium. London: King's Fund75–86 (2000).
|
||||
item-58 at level 4: list_item: National service framework for c ... . London: Department of Health (2000).
|
||||
item-59 at level 4: list_item: Hart JT. A new kind of doctor: t ... ommunity. London: Merlin Press (1988).
|
||||
item-60 at level 4: list_item: Morrison I, Smith R. Hamster hea ... J 321:1541–1542 (2000). PMID: 11124164
|
||||
item-61 at level 4: list_item: Arber S, Sawyer L. Do appointmen ... BMJ 284:478–480 (1982). PMID: 6800503
|
||||
item-62 at level 4: list_item: Hjortdahl P, Borchgrevink CF. Co ... MJ 303:1181–1184 (1991). PMID: 1747619
|
||||
item-63 at level 4: list_item: Howie JGR, Hopton JL, Heaney DJ, ... Pract 42:181–185 (1992). PMID: 1389427
|
||||
item-64 at level 4: list_item: Freeman G, Shepperd S, Robinson ... ), Summer 2000. London: NCCSDO (2001).
|
||||
item-65 at level 4: list_item: Wilson A, McDonald P, Hayes L, C ... Pract 41:184–187 (1991). PMID: 1878267
|
||||
item-66 at level 4: list_item: De Maeseneer J, Hjortdahl P, Sta ... J 320:1616–1617 (2000). PMID: 10856043
|
||||
item-67 at level 4: list_item: Freeman G, Hjortdahl P. What fut ... MJ 314:1870–1873 (1997). PMID: 9224130
|
||||
item-68 at level 4: list_item: Kibbe DC, Bentz E, McLaughlin CP ... Pract 36:304–308 (1993). PMID: 8454977
|
||||
item-69 at level 4: list_item: Williams M, Neal RD. Time for a ... ct 48:1783–1786 (1998). PMID: 10198490
|
1080
tests/data/groundtruth/docling_v2/bmj_sample.xml.json
vendored
1080
tests/data/groundtruth/docling_v2/bmj_sample.xml.json
vendored
File diff suppressed because it is too large
Load Diff
105
tests/data/groundtruth/docling_v2/bmj_sample.xml.md
vendored
105
tests/data/groundtruth/docling_v2/bmj_sample.xml.md
vendored
@ -1,105 +0,0 @@
|
||||
# Evolving general practice consultation in Britain: issues of length and context
|
||||
|
||||
George K Freeman, John P Horder, John G R Howie, A Pali Hungin, Alison P Hill, Nayan C Shah, Andrew Wilson
|
||||
|
||||
Centre for Primary Care and Social Medicine, Imperial College of Science, Technology and Medicine, London W6 8RP; Royal College of General Practitioners, London SW7 1PU; Department of General Practice, University of Edinburgh, Edinburgh EH8 9DX; Centre for Health Studies, University of Durham, Durham DH1 3HN; Kilburn Park Medical Centre, London NW6; Department of General Practice and Primary Health Care, University of Leicester, Leicester LE5 4PW
|
||||
|
||||
In 1999 Shah1 and others said that the Royal College of General Practitioners should advocate longer consultations in general practice as a matter of policy. The college set up a working group chaired by A P Hungin, and a systematic review of literature on consultation length in general practice was commissioned. The working group agreed that the available evidence would be hard to interpret without discussion of the changing context within which consultations now take place. For many years general practitioners and those who have surveyed patients' opinions in the United Kingdom have complained about short consultation time, despite a steady increase in actual mean length. Recently Mechanic pointed out that this is also true in the United States.2 Is there any justification for a further increase in mean time allocated per consultation in general practice?
|
||||
|
||||
We report on the outcome of extensive debate among a group of general practitioners with an interest in the process of care, with reference to the interim findings of the commissioned systematic review and our personal databases. The review identified 14 relevant papers.
|
||||
|
||||
## Summary points
|
||||
|
||||
- Longer consultations are associated with a range of better patient outcomes
|
||||
- Modern consultations in general practice deal with patients with more serious and chronic conditions
|
||||
- Increasing patient participation means more complex interaction, which demands extra time
|
||||
- Difficulties with access and with loss of continuity add to perceived stress and poor performance and lead to further pressure on time
|
||||
- Longer consultations should be a professional priority, combined with increased use of technology and more flexible practice management to maximise interpersonal continuity
|
||||
- Research on implementation is needed
|
||||
|
||||
## Longer consultations: benefits for patients
|
||||
|
||||
The systematic review consistently showed that doctors with longer consultation times prescribe less and offer more advice on lifestyle and other health promoting activities. Longer consultations have been significantly associated with better recognition and handling of psychosocial problems3 and with better patient enablement.4 Also clinical care for some chronic illnesses is better in practices with longer booked intervals between one appointment and the next.5 It is not clear whether time is itself the main influence or whether some doctors insist on more time.
|
||||
|
||||
A national survey in 1998 reported that most (87%) patients were satisfied with the length of their most recent consultation.6 Satisfaction with any service will be high if expectations are met or exceeded. But expectations are modified by previous experience.7 The result is that primary care patients are likely to be satisfied with what they are used to unless the context modifies the effects of their own experience.
|
||||
|
||||
## Context of modern consultations
|
||||
|
||||
Shorter consultations were more appropriate when the population was younger, when even a brief absence from employment due to sickness required a doctor's note, and when many simple remedies were available only on prescription. Recently at least five important influences have increased the content and hence the potential length of the consultation.
|
||||
|
||||
## Participatory consultation style
|
||||
|
||||
The most effective consultations are those in which doctors most directly acknowledge and perhaps respond to patients' problems and concerns. In addition, for patients to be committed to taking advantage of medical advice they must agree with both the goals and methods proposed. A landmark publication in the United Kingdom was Meetings Between Experts, which argued that while doctors are the experts about medical problems in general patients are the experts on how they themselves experience these problems.8 New emphasis on teaching consulting skills in general practice advocated specific attention to the patient's agenda, beliefs, understanding, and agreement. Currently the General Medical Council, aware that communication difficulties underlie many complaints about doctors, has further emphasised the importance of involving patients in consultations in its revised guidance to medical schools.9 More patient involvement should give a better outcome, but this participatory style usually lengthens consultations.
|
||||
|
||||
## Extended professional agenda
|
||||
|
||||
The traditional consultation in general practice was brief.2 The patient presented symptoms and the doctor prescribed treatment. In 1957 Balint gave new insights into the meaning of symptoms.10 By 1979 an enhanced model of consultation was presented, in which the doctors dealt with ongoing as well as presenting problems and added health promotion and education about future appropriate use of services.11 Now, with an ageing population and more community care of chronic illness, there are more issues to be considered at each consultation. Ideas of what constitutes good general practice are more complex.12 Good practice now includes both extended care of chronic medical problems—for example, coronary heart disease13—and a public health role. At first this model was restricted to those who lead change (“early adopters”) and enthusiasts14 but now it is embedded in professional and managerial expectations of good practice.
|
||||
|
||||
Adequate time is essential. It may be difficult for an elderly patient with several active problems to undress, be examined, and get adequate professional consideration in under 15 minutes. Here the doctor is faced with the choice of curtailing the consultation or of reducing the time available for the next patient. Having to cope with these situations often contributes to professional dissatisfaction.15 This combination of more care, more options, and more genuine discussion of those options with informed patient choice inevitably leads to pressure on time.
|
||||
|
||||
## Access problems
|
||||
|
||||
In a service free at the point of access, rising demand will tend to increase rationing by delay. But attempts to improve access by offering more consultations at short notice squeeze consultation times.
|
||||
|
||||
While appointment systems can and should reduce queuing time for consultations, they have long tended to be used as a brake on total demand.16 This may seriously erode patients' confidence in being able to see their doctor or nurse when they need to. Patients are offered appointments further ahead but may keep these even if their symptoms have remitted “just in case.” Availability of consultations is thus blocked. Receptionists are then inappropriately blamed for the inadequate access to doctors.
|
||||
|
||||
In response to perception of delay, the government has set targets in the NHS plan of “guaranteed access to a primary care professional within 24 hours and to a primary care doctor within 48 hours.” Implementation is currently being negotiated.
|
||||
|
||||
Virtually all patients think that they would not consult unless it was absolutely necessary. They do not think they are wasting NHS time and do not like being made to feel so. But underlying general practitioners' willingness to make patients wait several days is their perception that few of the problems are urgent. Patients and general practitioners evidently do not agree about the urgency of so called minor problems. To some extent general practice in the United Kingdom may have scored an “own goal” by setting up perceived access barriers (appointment systems and out of hours cooperatives) in the attempt to increase professional standards and control demand in a service that is free at the point of access.
|
||||
|
||||
A further government initiative has been to bypass general practice with new services—notably, walk-in centres (primary care clinics in which no appointment is needed) and NHS Direct (a professional telephone helpline giving advice on simple remedies and access to services). Introduced widely and rapidly, these services each potentially provide significant features of primary care—namely, quick access to skilled health advice and first line treatment.
|
||||
|
||||
## Loss of interpersonal continuity
|
||||
|
||||
If a patient has to consult several different professionals, particularly over a short period of time, there is inevitable duplication of stories, risk of naive diagnoses, potential for conflicting advice, and perhaps loss of trust. Trust is essential if patients are to accept the “wait and see” management policy which is, or should be, an important part of the management of self limiting conditions, which are often on the boundary between illness and non-illness.17 Such duplication again increases pressure for more extra (unscheduled) consultations resulting in late running and professional frustration.18
|
||||
|
||||
Mechanic described how loss of longitudinal (and perhaps personal and relational19) continuity influences the perception and use of time through an inability to build on previous consultations.2 Knowing the doctor well, particularly in smaller practices, is associated with enhanced patient enablement in shorter time.4 Though Mechanic pointed out that three quarters of UK patients have been registered with their general practitioner five years or more, this may be misleading. Practices are growing, with larger teams and more registered patients. Being registered with a doctor in a larger practice is usually no guarantee that the patient will be able to see the same doctor or the doctor of his or her choice, who may be different. Thus the system does not encourage adequate personal continuity. This adds to pressure on time and reduces both patient and professional satisfaction.
|
||||
|
||||
## Health service reforms
|
||||
|
||||
Finally, for the past 15 years the NHS has experienced unprecedented change with a succession of major administrative reforms. Recent reforms have focused on an NHS led by primary care, including the aim of shifting care from the secondary specialist sector to primary care. One consequence is increased demand for primary care of patients with more serious and less stable problems. With the limited piloting of reforms we do not know whether such major redirection can be achieved without greatly altering the delicate balance between expectations (of both patients and staff) and what is delivered.
|
||||
|
||||
## The future
|
||||
|
||||
We think that the way ahead must embrace both longer mean consultation times and more flexibility. More time is needed for high quality consultations with patients with major and complex problems of all kinds. But patients also need access to simpler services and advice. This should be more appropriate (and cost less) when it is given by professionals who know the patient and his or her medical history and social circumstances. For doctors, the higher quality associated with longer consultations may lead to greater professional satisfaction and, if these longer consultations are combined with more realistic scheduling, to reduced levels of stress.20 They will also find it easier to develop further the care of chronic disease.
|
||||
|
||||
The challenge posed to general practice by walk-in centres and NHS Direct is considerable, and the diversion of funding from primary care is large. The risk of waste and duplication increases as more layers of complexity are added to a primary care service that started out as something familiar, simple, and local and which is still envied in other developed countries.21 Access needs to be simple, and the advantages of personal knowledge and trust in minimising duplication and overmedicalisation need to be exploited.
|
||||
|
||||
We must ensure better communication and access so that patients can more easily deal with minor issues and queries with someone they know and trust and avoid the formality and inconvenience of a full face to face consultation. Too often this has to be with a different professional, unfamiliar with the nuances of the case. There should be far more managerial emphasis on helping patients to interact with their chosen practitioner22; such a programme has been described.23 Modern information systems make it much easier to record which doctor(s) a patient prefers to see and to monitor how often this is achieved. The telephone is hardly modern but is underused. Email avoids the problems inherent in arranging simultaneous availability necessary for telephone consultations but at the cost of reducing the communication of emotions. There is a place for both.2 Access without prior appointment is a valued feature of primary care, and we need to know more about the right balance between planned and ad hoc consulting.
|
||||
|
||||
## Next steps
|
||||
|
||||
General practitioners do not behave in a uniform way. They can be categorised as slow, medium, and fast and react in different ways to changes in consulting speed.18 They are likely to have differing views about a widespread move to lengthen consultation time. We do not need further confirmation that longer consultations are desirable and necessary, but research could show us the best way to learn how to introduce them with minimal disruption to the way in which patients and practices like primary care to be provided.24 We also need to learn how to make the most of available time in complex consultations.
|
||||
|
||||
Devising appropriate incentives and helping practices move beyond just reacting to demand in the traditional way by working harder and faster is perhaps our greatest challenge in the United Kingdom. The new primary are trusts need to work together with the growing primary care research networks to carry out the necessary development work. In particular, research is needed on how a primary care team can best provide the right balance of quick access and interpersonal knowledge and trust.
|
||||
|
||||
## Acknowledgments
|
||||
|
||||
We thank the other members of the working group: Susan Childs, Paul Freeling, Iona Heath, Marshall Marinker, and Bonnie Sibbald. We also thank Fenny Green of the Royal College of General Practitioners for administrative help.
|
||||
|
||||
## References
|
||||
|
||||
- Shah NC. Viewpoint: Consultation time—time for a change? Still the “perfunctory work of perfunctory men!”. Br J Gen Pract 49:497 (1999).
|
||||
- Mechanic D. How should hamsters run? Some observations about sufficient patient time in primary care. BMJ 323:266–268 (2001). PMID: 11485957
|
||||
- Howie JGR, Porter AMD, Heaney DJ, Hopton JL. Long to short consultation ratio: a proxy measure of quality of care for general practice. Br J Gen Pract 41:48–54 (1991). PMID: 2031735
|
||||
- Howie JGR, Heaney DJ, Maxwell M, Walker JJ, Freeman GK, Rai H. Quality at general practice consultations: cross-sectional survey. BMJ 319:738–743 (1999). PMID: 10487999
|
||||
- Kaplan SH, Greenfield S, Ware JE. Assessing the effects of physician-patient interactions on the outcome of chronic disease. Med Care 27:110–125 (1989).
|
||||
- Airey C, Erens B. National surveys of NHS patients: general practice, 1998. London: NHS Executive (1999).
|
||||
- Hart JT. Expectations of health care: promoted, managed or shared?. Health Expect 1:3–13 (1998). PMID: 11281857
|
||||
- Tuckett D, Boulton M, Olson C, Williams A. Meetings between experts: an approach to sharing ideas in medical consultations. London: Tavistock Publications (1985).
|
||||
- General Medical Council. Draft recommendations on undergraduate medical education. July 2001. www.gmc-uk.org/med\_ed/tomorrowsdoctors/index.htm (accessed 2 Jan 2002).
|
||||
- Balint M. The doctor, his patient and the illness. London: Tavistock (1957).
|
||||
- Stott NCH, Davies RH. The exceptional potential in each primary care consultation. J R Coll Gen Pract 29:210–205 (1979).
|
||||
- Hill AP, Hill AP. Challenges for primary care. What's gone wrong with health care? Challenges for the new millennium. London: King's Fund75–86 (2000).
|
||||
- National service framework for coronary heart disease. London: Department of Health (2000).
|
||||
- Hart JT. A new kind of doctor: the general practitioner's part in the health of the community. London: Merlin Press (1988).
|
||||
- Morrison I, Smith R. Hamster health care. BMJ 321:1541–1542 (2000). PMID: 11124164
|
||||
- Arber S, Sawyer L. Do appointment systems work?. BMJ 284:478–480 (1982). PMID: 6800503
|
||||
- Hjortdahl P, Borchgrevink CF. Continuity of care: influence of general practitioners' knowledge about their patients on use of resources in consultations. BMJ 303:1181–1184 (1991). PMID: 1747619
|
||||
- Howie JGR, Hopton JL, Heaney DJ, Porter AMD. Attitudes to medical care, the organization of work, and stress among general practitioners. Br J Gen Pract 42:181–185 (1992). PMID: 1389427
|
||||
- Freeman G, Shepperd S, Robinson I, Ehrich K, Richards SC, Pitman P. Continuity of care: report of a scoping exercise for the national co-ordinating centre for NHS Service Delivery and Organisation R&D (NCCSDO), Summer 2000. London: NCCSDO (2001).
|
||||
- Wilson A, McDonald P, Hayes L, Cooney J. Longer booking intervals in general practice: effects on doctors' stress and arousal. Br J Gen Pract 41:184–187 (1991). PMID: 1878267
|
||||
- De Maeseneer J, Hjortdahl P, Starfield B. Fix what's wrong, not what's right, with general practice in Britain. BMJ 320:1616–1617 (2000). PMID: 10856043
|
||||
- Freeman G, Hjortdahl P. What future for continuity of care in general practice?. BMJ 314:1870–1873 (1997). PMID: 9224130
|
||||
- Kibbe DC, Bentz E, McLaughlin CP. Continuous quality improvement for continuity of care. J Fam Pract 36:304–308 (1993). PMID: 8454977
|
||||
- Williams M, Neal RD. Time for a change? The process of lengthening booking intervals in general practice. Br J Gen Pract 48:1783–1786 (1998). PMID: 10198490
|
7265
tests/data/groundtruth/docling_v2/elife-56337.nxml.json
vendored
Normal file
7265
tests/data/groundtruth/docling_v2/elife-56337.nxml.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
@ -99,16 +99,16 @@ Key resources table
|
||||
| Cell line (Mus musculus) | B6;129‐ Gt(ROSA)26Sortm1(cre/ERT)Nat/J | The Jackson Laboratory | 004847 | ES cells used to generate KO cell lines and mice |
|
||||
| Cell line (Mus musculus) | R1 ES cells | Andras Nagy lab | R1 | 129 ES cells used to generate KO cell lines and mice |
|
||||
| Cell line (Mus musculus) | F9 Embryonic carcinoma cells | ATCC | ATCC CRL-1720 | |
|
||||
| Antibody | Mouse monoclonal ANTI-FLAG M2 antibody | Sigma-Aldrich | Cat# F1804, RRID:AB\_262044 | ChIP (1 µg/107 cells) |
|
||||
| Antibody | Rabbit polyclonal anti-HA | Abcam | Cat# ab9110, RRID:AB\_307019 | ChIP (1 µg/107 cells) |
|
||||
| Antibody | Mouse monoclonal anti-HA | Covance | Cat# MMS-101P-200, RRID:AB\_10064068 | |
|
||||
| Antibody | Rabbit polyclonal anti-H3K9me3 | Active Motif | Cat# 39161, RRID:AB\_2532132 | ChIP (3 µl/107 cells) |
|
||||
| Antibody | Rabbit polyclonal anti-GFP | Thermo Fisher Scientific | Cat# A-11122, RRID:AB\_221569 | ChIP (1 µg/107 cells) |
|
||||
| Antibody | Rabbit polyclonal anti- H3K4me3 | Abcam | Cat# ab8580, RRID:AB\_306649 | ChIP (1 µg/107 cells) |
|
||||
| Antibody | Rabbit polyclonal anti- H3K4me1 | Abcam | Cat# ab8895, RRID:AB\_306847 | ChIP (1 µg/107 cells) |
|
||||
| Antibody | Rabbit polyclonal anti- H3K27ac | Abcam | Cat# ab4729, RRID:AB\_2118291 | ChIP (1 µg/107 cells) |
|
||||
| Recombinant DNA reagent | pCW57.1 | Addgene | RRID:Addgene\_41393 | Inducible lentiviral expression vector |
|
||||
| Recombinant DNA reagent | pX330-U6-Chimeric\_BB-CBh-hSpCas9 | Addgene | RRID:Addgene\_42230 | CRISPR/Cas9 expression construct |
|
||||
| Antibody | Mouse monoclonal ANTI-FLAG M2 antibody | Sigma-Aldrich | Cat# F1804, RRID:AB_262044 | ChIP (1 µg/107 cells) |
|
||||
| Antibody | Rabbit polyclonal anti-HA | Abcam | Cat# ab9110, RRID:AB_307019 | ChIP (1 µg/107 cells) |
|
||||
| Antibody | Mouse monoclonal anti-HA | Covance | Cat# MMS-101P-200, RRID:AB_10064068 | |
|
||||
| Antibody | Rabbit polyclonal anti-H3K9me3 | Active Motif | Cat# 39161, RRID:AB_2532132 | ChIP (3 µl/107 cells) |
|
||||
| Antibody | Rabbit polyclonal anti-GFP | Thermo Fisher Scientific | Cat# A-11122, RRID:AB_221569 | ChIP (1 µg/107 cells) |
|
||||
| Antibody | Rabbit polyclonal anti- H3K4me3 | Abcam | Cat# ab8580, RRID:AB_306649 | ChIP (1 µg/107 cells) |
|
||||
| Antibody | Rabbit polyclonal anti- H3K4me1 | Abcam | Cat# ab8895, RRID:AB_306847 | ChIP (1 µg/107 cells) |
|
||||
| Antibody | Rabbit polyclonal anti- H3K27ac | Abcam | Cat# ab4729, RRID:AB_2118291 | ChIP (1 µg/107 cells) |
|
||||
| Recombinant DNA reagent | pCW57.1 | Addgene | RRID:Addgene_41393 | Inducible lentiviral expression vector |
|
||||
| Recombinant DNA reagent | pX330-U6-Chimeric_BB-CBh-hSpCas9 | Addgene | RRID:Addgene_42230 | CRISPR/Cas9 expression construct |
|
||||
| Sequence-based reagent | Chr2-cl KO gRNA.1 | This paper | Cas9 gRNA | GCCGTTGCTCAGTCCAAATG |
|
||||
| Sequenced-based reagent | Chr2-cl KO gRNA.2 | This paper | Cas9 gRNA | GATACCAGAGGTGGCCGCAAG |
|
||||
| Sequenced-based reagent | Chr4-cl KO gRNA.1 | This paper | Cas9 gRNA | GCAAAGGGGCTCCTCGATGGA |
|
||||
@ -120,9 +120,9 @@ Key resources table
|
||||
| Sequenced-based reagent | Chr13.2-cl KO gRNA.1 | This paper | Cas9 gRNA | GGGTTTCTGAGAAACGTGTA |
|
||||
| Sequenced-based reagent | Chr13.2-cl KO gRNA.2 | This paper | Cas9 gRNA | GTGTAATGAGTTCTTATATC |
|
||||
| Commercial assay or kit | SureSelectQXT Target Enrichment kit | Agilent | G9681-90000 | |
|
||||
| Software, algorithm | Bowtie | http://bowtie-bio.sourceforge.net | RRID:SCR\_005476 | |
|
||||
| Software, algorithm | MACS14 | https://bio.tools/macs | RRID:SCR\_013291 | |
|
||||
| Software, algorithm | Tophat | https://ccb.jhu.edu | RRID:SCR\_013035 | |
|
||||
| Software, algorithm | Bowtie | http://bowtie-bio.sourceforge.net | RRID:SCR_005476 | |
|
||||
| Software, algorithm | MACS14 | https://bio.tools/macs | RRID:SCR_013291 | |
|
||||
| Software, algorithm | Tophat | https://ccb.jhu.edu | RRID:SCR_013035 | |
|
||||
|
||||
### Cell lines and transgenic mice
|
||||
|
@ -4,6 +4,7 @@ item-0 at level 0: unspecified: group _root_
|
||||
item-3 at level 2: section_header: Background
|
||||
item-4 at level 3: text: Some background information here.
|
||||
item-5 at level 3: picture
|
||||
item-5 at level 4: caption: Example image
|
||||
item-6 at level 3: list: group list
|
||||
item-7 at level 4: list_item: First item in unordered list
|
||||
item-8 at level 4: list_item: Second item in unordered list
|
||||
@ -12,4 +13,5 @@ item-0 at level 0: unspecified: group _root_
|
||||
item-11 at level 4: list_item: Second item in ordered list
|
||||
item-12 at level 3: list: group ordered list start 42
|
||||
item-13 at level 4: list_item: First item in ordered list with start
|
||||
item-14 at level 4: list_item: Second item in ordered list with start
|
||||
item-14 at level 4: list_item: Second item in ordered list with start
|
||||
item-15 at level 1: caption: Example image
|
@ -19,6 +19,9 @@
|
||||
"children": [
|
||||
{
|
||||
"$ref": "#/texts/0"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/4"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
@ -33,10 +36,10 @@
|
||||
},
|
||||
"children": [
|
||||
{
|
||||
"$ref": "#/texts/4"
|
||||
"$ref": "#/texts/5"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/5"
|
||||
"$ref": "#/texts/6"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
@ -50,10 +53,10 @@
|
||||
},
|
||||
"children": [
|
||||
{
|
||||
"$ref": "#/texts/6"
|
||||
"$ref": "#/texts/7"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/7"
|
||||
"$ref": "#/texts/8"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
@ -67,10 +70,10 @@
|
||||
},
|
||||
"children": [
|
||||
{
|
||||
"$ref": "#/texts/8"
|
||||
"$ref": "#/texts/9"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/9"
|
||||
"$ref": "#/texts/10"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
@ -153,6 +156,18 @@
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/4",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "caption",
|
||||
"prov": [],
|
||||
"orig": "Example image",
|
||||
"text": "Example image"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/5",
|
||||
"parent": {
|
||||
"$ref": "#/groups/0"
|
||||
},
|
||||
@ -166,7 +181,7 @@
|
||||
"marker": ""
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/5",
|
||||
"self_ref": "#/texts/6",
|
||||
"parent": {
|
||||
"$ref": "#/groups/0"
|
||||
},
|
||||
@ -180,7 +195,7 @@
|
||||
"marker": ""
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/6",
|
||||
"self_ref": "#/texts/7",
|
||||
"parent": {
|
||||
"$ref": "#/groups/1"
|
||||
},
|
||||
@ -194,7 +209,7 @@
|
||||
"marker": ""
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/7",
|
||||
"self_ref": "#/texts/8",
|
||||
"parent": {
|
||||
"$ref": "#/groups/1"
|
||||
},
|
||||
@ -208,7 +223,7 @@
|
||||
"marker": ""
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/8",
|
||||
"self_ref": "#/texts/9",
|
||||
"parent": {
|
||||
"$ref": "#/groups/2"
|
||||
},
|
||||
@ -222,7 +237,7 @@
|
||||
"marker": "42."
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/9",
|
||||
"self_ref": "#/texts/10",
|
||||
"parent": {
|
||||
"$ref": "#/groups/2"
|
||||
},
|
||||
@ -246,7 +261,11 @@
|
||||
"content_layer": "body",
|
||||
"label": "picture",
|
||||
"prov": [],
|
||||
"captions": [],
|
||||
"captions": [
|
||||
{
|
||||
"$ref": "#/texts/4"
|
||||
}
|
||||
],
|
||||
"references": [],
|
||||
"footnotes": [],
|
||||
"annotations": []
|
||||
|
@ -6,6 +6,8 @@ This is the first paragraph of the introduction.
|
||||
|
||||
Some background information here.
|
||||
|
||||
Example image
|
||||
|
||||
<!-- image -->
|
||||
|
||||
- First item in unordered list
|
||||
|
@ -3,8 +3,8 @@ item-0 at level 0: unspecified: group _root_
|
||||
item-2 at level 1: text: This is another div with text.
|
||||
item-3 at level 1: text: This is a regular paragraph.
|
||||
item-4 at level 1: text: This is a third div
|
||||
with a new line.
|
||||
item-5 at level 1: section: group details
|
||||
item-6 at level 2: text: Heading for the details element
|
||||
item-7 at level 2: text: Description of the details element.
|
||||
item-8 at level 1: text: This is a fourth div with a bold paragraph.
|
||||
item-5 at level 1: text: with a new line.
|
||||
item-6 at level 1: section: group details
|
||||
item-7 at level 2: text: Heading for the details element
|
||||
item-8 at level 2: text: Description of the details element.
|
||||
item-9 at level 1: text: This is a fourth div with a bold paragraph.
|
@ -29,11 +29,17 @@
|
||||
{
|
||||
"$ref": "#/texts/3"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/4"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/5"
|
||||
},
|
||||
{
|
||||
"$ref": "#/groups/0"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/6"
|
||||
"$ref": "#/texts/8"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
@ -48,10 +54,10 @@
|
||||
},
|
||||
"children": [
|
||||
{
|
||||
"$ref": "#/texts/4"
|
||||
"$ref": "#/texts/6"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/5"
|
||||
"$ref": "#/texts/7"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
@ -66,6 +72,18 @@
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "furniture",
|
||||
"label": "title",
|
||||
"prov": [],
|
||||
"orig": "Sample HTML File",
|
||||
"text": "Sample HTML File"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/1",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "text",
|
||||
"prov": [],
|
||||
@ -73,7 +91,7 @@
|
||||
"text": "This is a div with text."
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/1",
|
||||
"self_ref": "#/texts/2",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
@ -85,7 +103,7 @@
|
||||
"text": "This is another div with text."
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/2",
|
||||
"self_ref": "#/texts/3",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
@ -97,7 +115,7 @@
|
||||
"text": "This is a regular paragraph."
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/3",
|
||||
"self_ref": "#/texts/4",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
@ -105,11 +123,23 @@
|
||||
"content_layer": "body",
|
||||
"label": "text",
|
||||
"prov": [],
|
||||
"orig": "This is a third div\nwith a new line.",
|
||||
"text": "This is a third div\nwith a new line."
|
||||
"orig": "This is a third div",
|
||||
"text": "This is a third div"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/4",
|
||||
"self_ref": "#/texts/5",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "text",
|
||||
"prov": [],
|
||||
"orig": "with a new line.",
|
||||
"text": "with a new line."
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/6",
|
||||
"parent": {
|
||||
"$ref": "#/groups/0"
|
||||
},
|
||||
@ -121,7 +151,7 @@
|
||||
"text": "Heading for the details element"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/5",
|
||||
"self_ref": "#/texts/7",
|
||||
"parent": {
|
||||
"$ref": "#/groups/0"
|
||||
},
|
||||
@ -133,7 +163,7 @@
|
||||
"text": "Description of the details element."
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/6",
|
||||
"self_ref": "#/texts/8",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
|
@ -5,6 +5,7 @@ This is another div with text.
|
||||
This is a regular paragraph.
|
||||
|
||||
This is a third div
|
||||
|
||||
with a new line.
|
||||
|
||||
Heading for the details element
|
||||
|
27
tests/data/groundtruth/docling_v2/example_09.html.itxt
vendored
Normal file
27
tests/data/groundtruth/docling_v2/example_09.html.itxt
vendored
Normal file
@ -0,0 +1,27 @@
|
||||
item-0 at level 0: unspecified: group _root_
|
||||
item-1 at level 1: title: Introduction to parsing HTML files with Docling
|
||||
item-2 at level 2: picture
|
||||
item-2 at level 3: caption: Docling
|
||||
item-3 at level 2: text: Docling simplifies document proc ... ntegrations with the gen AI ecosystem.
|
||||
item-4 at level 2: section_header: Supported file formats
|
||||
item-5 at level 3: text: Docling supports multiple file formats..
|
||||
item-6 at level 3: list: group list
|
||||
item-7 at level 4: list_item: Advanced PDF understanding
|
||||
item-8 at level 4: picture
|
||||
item-8 at level 5: caption: PDF
|
||||
item-9 at level 4: list_item: Microsoft Office DOCX
|
||||
item-10 at level 4: picture
|
||||
item-10 at level 5: caption: DOCX
|
||||
item-11 at level 4: list_item: HTML files (with optional support for images)
|
||||
item-12 at level 4: picture
|
||||
item-12 at level 5: caption: HTML
|
||||
item-13 at level 3: section_header: Three backends for handling HTML files
|
||||
item-14 at level 4: text: Docling has three backends for parsing HTML files:
|
||||
item-15 at level 4: list: group ordered list
|
||||
item-16 at level 5: list_item: HTMLDocumentBackend Ignores images
|
||||
item-17 at level 5: list_item: HTMLDocumentBackendImagesInline Extracts images inline
|
||||
item-18 at level 5: list_item: HTMLDocumentBackendImagesReferenced Extracts images as references
|
||||
item-19 at level 1: caption: Docling
|
||||
item-20 at level 1: caption: PDF
|
||||
item-21 at level 1: caption: DOCX
|
||||
item-22 at level 1: caption: HTML
|
404
tests/data/groundtruth/docling_v2/example_09.html.json
vendored
Normal file
404
tests/data/groundtruth/docling_v2/example_09.html.json
vendored
Normal file
@ -0,0 +1,404 @@
|
||||
{
|
||||
"schema_name": "DoclingDocument",
|
||||
"version": "1.5.0",
|
||||
"name": "example_09",
|
||||
"origin": {
|
||||
"mimetype": "text/html",
|
||||
"binary_hash": 6785336133244366107,
|
||||
"filename": "example_09.html"
|
||||
},
|
||||
"furniture": {
|
||||
"self_ref": "#/furniture",
|
||||
"children": [],
|
||||
"content_layer": "furniture",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
"body": {
|
||||
"self_ref": "#/body",
|
||||
"children": [
|
||||
{
|
||||
"$ref": "#/texts/0"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/1"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/6"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/8"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/10"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
"groups": [
|
||||
{
|
||||
"self_ref": "#/groups/0",
|
||||
"parent": {
|
||||
"$ref": "#/texts/3"
|
||||
},
|
||||
"children": [
|
||||
{
|
||||
"$ref": "#/texts/5"
|
||||
},
|
||||
{
|
||||
"$ref": "#/pictures/1"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/7"
|
||||
},
|
||||
{
|
||||
"$ref": "#/pictures/2"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/9"
|
||||
},
|
||||
{
|
||||
"$ref": "#/pictures/3"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
"name": "list",
|
||||
"label": "list"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/groups/1",
|
||||
"parent": {
|
||||
"$ref": "#/texts/11"
|
||||
},
|
||||
"children": [
|
||||
{
|
||||
"$ref": "#/texts/13"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/14"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/15"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
"name": "ordered list",
|
||||
"label": "list"
|
||||
}
|
||||
],
|
||||
"texts": [
|
||||
{
|
||||
"self_ref": "#/texts/0",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [
|
||||
{
|
||||
"$ref": "#/pictures/0"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/2"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/3"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
"label": "title",
|
||||
"prov": [],
|
||||
"orig": "Introduction to parsing HTML files with Docling",
|
||||
"text": "Introduction to parsing HTML files with Docling"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/1",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "caption",
|
||||
"prov": [],
|
||||
"orig": "Docling",
|
||||
"text": "Docling"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/2",
|
||||
"parent": {
|
||||
"$ref": "#/texts/0"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "text",
|
||||
"prov": [],
|
||||
"orig": "Docling simplifies document processing, parsing diverse formats — including HTML — and providing seamless integrations with the gen AI ecosystem.",
|
||||
"text": "Docling simplifies document processing, parsing diverse formats — including HTML — and providing seamless integrations with the gen AI ecosystem."
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/3",
|
||||
"parent": {
|
||||
"$ref": "#/texts/0"
|
||||
},
|
||||
"children": [
|
||||
{
|
||||
"$ref": "#/texts/4"
|
||||
},
|
||||
{
|
||||
"$ref": "#/groups/0"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/11"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
"label": "section_header",
|
||||
"prov": [],
|
||||
"orig": "Supported file formats",
|
||||
"text": "Supported file formats",
|
||||
"level": 1
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/4",
|
||||
"parent": {
|
||||
"$ref": "#/texts/3"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "text",
|
||||
"prov": [],
|
||||
"orig": "Docling supports multiple file formats..",
|
||||
"text": "Docling supports multiple file formats.."
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/5",
|
||||
"parent": {
|
||||
"$ref": "#/groups/0"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "list_item",
|
||||
"prov": [],
|
||||
"orig": "Advanced PDF understanding",
|
||||
"text": "Advanced PDF understanding",
|
||||
"enumerated": false,
|
||||
"marker": ""
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/6",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "caption",
|
||||
"prov": [],
|
||||
"orig": "PDF",
|
||||
"text": "PDF"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/7",
|
||||
"parent": {
|
||||
"$ref": "#/groups/0"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "list_item",
|
||||
"prov": [],
|
||||
"orig": "Microsoft Office DOCX",
|
||||
"text": "Microsoft Office DOCX",
|
||||
"enumerated": false,
|
||||
"marker": ""
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/8",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "caption",
|
||||
"prov": [],
|
||||
"orig": "DOCX",
|
||||
"text": "DOCX"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/9",
|
||||
"parent": {
|
||||
"$ref": "#/groups/0"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "list_item",
|
||||
"prov": [],
|
||||
"orig": "HTML files (with optional support for images)",
|
||||
"text": "HTML files (with optional support for images)",
|
||||
"enumerated": false,
|
||||
"marker": ""
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/10",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "caption",
|
||||
"prov": [],
|
||||
"orig": "HTML",
|
||||
"text": "HTML"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/11",
|
||||
"parent": {
|
||||
"$ref": "#/texts/3"
|
||||
},
|
||||
"children": [
|
||||
{
|
||||
"$ref": "#/texts/12"
|
||||
},
|
||||
{
|
||||
"$ref": "#/groups/1"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
"label": "section_header",
|
||||
"prov": [],
|
||||
"orig": "Three backends for handling HTML files",
|
||||
"text": "Three backends for handling HTML files",
|
||||
"level": 2
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/12",
|
||||
"parent": {
|
||||
"$ref": "#/texts/11"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "text",
|
||||
"prov": [],
|
||||
"orig": "Docling has three backends for parsing HTML files:",
|
||||
"text": "Docling has three backends for parsing HTML files:"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/13",
|
||||
"parent": {
|
||||
"$ref": "#/groups/1"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "list_item",
|
||||
"prov": [],
|
||||
"orig": "HTMLDocumentBackend Ignores images",
|
||||
"text": "HTMLDocumentBackend Ignores images",
|
||||
"enumerated": true,
|
||||
"marker": ""
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/14",
|
||||
"parent": {
|
||||
"$ref": "#/groups/1"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "list_item",
|
||||
"prov": [],
|
||||
"orig": "HTMLDocumentBackendImagesInline Extracts images inline",
|
||||
"text": "HTMLDocumentBackendImagesInline Extracts images inline",
|
||||
"enumerated": true,
|
||||
"marker": ""
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/15",
|
||||
"parent": {
|
||||
"$ref": "#/groups/1"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "list_item",
|
||||
"prov": [],
|
||||
"orig": "HTMLDocumentBackendImagesReferenced Extracts images as references",
|
||||
"text": "HTMLDocumentBackendImagesReferenced Extracts images as references",
|
||||
"enumerated": true,
|
||||
"marker": ""
|
||||
}
|
||||
],
|
||||
"pictures": [
|
||||
{
|
||||
"self_ref": "#/pictures/0",
|
||||
"parent": {
|
||||
"$ref": "#/texts/0"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "picture",
|
||||
"prov": [],
|
||||
"captions": [
|
||||
{
|
||||
"$ref": "#/texts/1"
|
||||
}
|
||||
],
|
||||
"references": [],
|
||||
"footnotes": [],
|
||||
"annotations": []
|
||||
},
|
||||
{
|
||||
"self_ref": "#/pictures/1",
|
||||
"parent": {
|
||||
"$ref": "#/groups/0"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "picture",
|
||||
"prov": [],
|
||||
"captions": [
|
||||
{
|
||||
"$ref": "#/texts/6"
|
||||
}
|
||||
],
|
||||
"references": [],
|
||||
"footnotes": [],
|
||||
"annotations": []
|
||||
},
|
||||
{
|
||||
"self_ref": "#/pictures/2",
|
||||
"parent": {
|
||||
"$ref": "#/groups/0"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "picture",
|
||||
"prov": [],
|
||||
"captions": [
|
||||
{
|
||||
"$ref": "#/texts/8"
|
||||
}
|
||||
],
|
||||
"references": [],
|
||||
"footnotes": [],
|
||||
"annotations": []
|
||||
},
|
||||
{
|
||||
"self_ref": "#/pictures/3",
|
||||
"parent": {
|
||||
"$ref": "#/groups/0"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "picture",
|
||||
"prov": [],
|
||||
"captions": [
|
||||
{
|
||||
"$ref": "#/texts/10"
|
||||
}
|
||||
],
|
||||
"references": [],
|
||||
"footnotes": [],
|
||||
"annotations": []
|
||||
}
|
||||
],
|
||||
"tables": [],
|
||||
"key_value_items": [],
|
||||
"form_items": [],
|
||||
"pages": {}
|
||||
}
|
32
tests/data/groundtruth/docling_v2/example_09.html.md
vendored
Normal file
32
tests/data/groundtruth/docling_v2/example_09.html.md
vendored
Normal file
@ -0,0 +1,32 @@
|
||||
# Introduction to parsing HTML files with Docling
|
||||
|
||||
Docling
|
||||
|
||||
<!-- image -->
|
||||
|
||||
Docling simplifies document processing, parsing diverse formats — including HTML — and providing seamless integrations with the gen AI ecosystem.
|
||||
|
||||
## Supported file formats
|
||||
|
||||
Docling supports multiple file formats..
|
||||
|
||||
- Advanced PDF understanding
|
||||
PDF
|
||||
|
||||
<!-- image -->
|
||||
- Microsoft Office DOCX
|
||||
DOCX
|
||||
|
||||
<!-- image -->
|
||||
- HTML files (with optional support for images)
|
||||
HTML
|
||||
|
||||
<!-- image -->
|
||||
|
||||
### Three backends for handling HTML files
|
||||
|
||||
Docling has three backends for parsing HTML files:
|
||||
|
||||
1. HTMLDocumentBackend Ignores images
|
||||
2. HTMLDocumentBackendImagesInline Extracts images inline
|
||||
3. HTMLDocumentBackendImagesReferenced Extracts images as references
|
@ -1,8 +0,0 @@
|
||||
item-0 at level 0: unspecified: group _root_
|
||||
item-1 at level 1: section: group header-1
|
||||
item-2 at level 2: section_header: Pivot table with with 1 row header
|
||||
item-3 at level 3: table with [6x4]
|
||||
item-4 at level 2: section_header: Pivot table with 2 row headers
|
||||
item-5 at level 3: table with [6x5]
|
||||
item-6 at level 2: section_header: Equivalent pivot table
|
||||
item-7 at level 3: table with [6x5]
|
2008
tests/data/groundtruth/docling_v2/example_8.html.json
vendored
2008
tests/data/groundtruth/docling_v2/example_8.html.json
vendored
File diff suppressed because it is too large
Load Diff
@ -1,29 +0,0 @@
|
||||
## Pivot table with with 1 row header
|
||||
|
||||
| Year | Month | Revenue | Cost |
|
||||
|--------|----------|-----------|--------|
|
||||
| 2025 | January | $134 | $162 |
|
||||
| 2025 | February | $150 | $155 |
|
||||
| 2025 | March | $160 | $143 |
|
||||
| 2025 | April | $210 | $150 |
|
||||
| 2025 | May | $280 | $120 |
|
||||
|
||||
## Pivot table with 2 row headers
|
||||
|
||||
| Year | Quarter | Month | Revenue | Cost |
|
||||
|--------|-----------|----------|-----------|--------|
|
||||
| 2025 | Q1 | January | $134 | $162 |
|
||||
| 2025 | Q1 | February | $150 | $155 |
|
||||
| 2025 | Q1 | March | $160 | $143 |
|
||||
| 2025 | Q2 | April | $210 | $150 |
|
||||
| 2025 | Q2 | May | $280 | $120 |
|
||||
|
||||
## Equivalent pivot table
|
||||
|
||||
| Year | Quarter | Month | Revenue | Cost |
|
||||
|--------|-----------|----------|-----------|--------|
|
||||
| 2025 | Q1 | January | $134 | $162 |
|
||||
| 2025 | Q1 | February | $150 | $155 |
|
||||
| 2025 | Q1 | March | $160 | $143 |
|
||||
| 2025 | Q2 | April | $210 | $150 |
|
||||
| 2025 | Q2 | May | $280 | $120 |
|
@ -1,148 +0,0 @@
|
||||
item-0 at level 0: unspecified: group _root_
|
||||
item-1 at level 1: title: The coreceptor mutation CCR5Δ32 ... V epidemics and is selected for by HIV
|
||||
item-2 at level 2: paragraph: Amy D. Sullivan, Janis Wigginton, Denise Kirschner
|
||||
item-3 at level 2: paragraph: Department of Microbiology and I ... dical School, Ann Arbor, MI 48109-0620
|
||||
item-4 at level 2: section_header: Abstract
|
||||
item-5 at level 3: text: We explore the impact of a host ... creasing the frequency of this allele.
|
||||
item-6 at level 2: text: Nineteen million people have die ... factors such as host genetics (4, 5).
|
||||
item-7 at level 2: text: To exemplify the contribution of ... follow the CCR5Δ32 allelic frequency.
|
||||
item-8 at level 2: text: We hypothesize that CCR5Δ32 limi ... g the frequency of this mutant allele.
|
||||
item-9 at level 2: text: CCR5 is a host-cell chemokine re ... iral strain (such as X4 or R5X4) (30).
|
||||
item-10 at level 2: section_header: The Model
|
||||
item-11 at level 3: text: Because we are most concerned wi ... t both economic and social conditions.
|
||||
item-12 at level 3: picture
|
||||
item-12 at level 4: caption: Figure 1 A schematic representation of the basic compartmental HIV epidemic model. The criss-cross lines indicate the sexual mixing between different compartments. Each of these interactions has a positive probability of taking place; they also incorporate individual rates of transmission indicated as λ, but in full notation is λ î,,→i,j, where i,j,k is the phenotype of the infected partner and î, is the phenotype of the susceptible partner. Also shown are the different rates of disease progression, γ i,j,k , that vary according to genotype, gender, and stage. Thus, the interactions between different genotypes, genders, and stages are associated with a unique probability of HIV infection. M, male; F, female.
|
||||
item-13 at level 3: table with [6x5]
|
||||
item-13 at level 4: caption: Table 1 Children's genotype
|
||||
item-14 at level 3: section_header: Parameter Estimates for the Model.
|
||||
item-15 at level 4: text: Estimates for rates that govern ... d in Fig. 1 are summarized as follows:
|
||||
item-16 at level 4: formula: \frac{dS_{i,j}(t)}{dt}={\chi}_{ ... ,\hat {k}{\rightarrow}i,j}S_{i,j}(t),
|
||||
item-17 at level 4: formula: \hspace{1em}\hspace{1em}\hspace ... j,A}(t)-{\gamma}_{i,j,A}I_{i,j,A}(t),
|
||||
item-18 at level 4: formula: \frac{dI_{i,j,B}(t)}{dt}={\gamm ... j,B}(t)-{\gamma}_{i,j,B}I_{i,j,B}(t),
|
||||
item-19 at level 4: formula: \frac{dA(t)}{dt}={\gamma}_{i,j, ... \right) -{\mu}_{A}A(t)-{\delta}A(t),
|
||||
item-20 at level 4: text: where, in addition to previously ... on of the infected partner, and j ≠ .
|
||||
item-21 at level 4: table with [14x5]
|
||||
item-21 at level 5: caption: Table 2 Transmission probabilities
|
||||
item-22 at level 4: table with [8x3]
|
||||
item-22 at level 5: caption: Table 3 Progression rates
|
||||
item-23 at level 4: table with [20x3]
|
||||
item-23 at level 5: caption: Table 4 Parameter values
|
||||
item-24 at level 4: text: The effects of the CCR5 W/Δ32 an ... nting this probability of infection is
|
||||
item-25 at level 4: formula: {\lambda}_{\hat {i},\hat {j},\h ... \hat {i},\hat {j},\hat {k}} \right] ,
|
||||
item-26 at level 4: text: where j ≠ is either male or fe ... e those with AIDS in the simulations).
|
||||
item-27 at level 4: text: The average rate of partner acqu ... owing the male rates to vary (36, 37).
|
||||
item-28 at level 4: section_header: Transmission probabilities.
|
||||
item-29 at level 5: text: The effect of a genetic factor i ... reported; ref. 42) (ref. 43, Table 2).
|
||||
item-30 at level 5: text: Given the assumption of no treat ... ases during the end stage of disease).
|
||||
item-31 at level 4: section_header: Disease progression.
|
||||
item-32 at level 5: text: We assume three stages of HIV in ... ssion rates are summarized in Table 3.
|
||||
item-33 at level 3: section_header: Demographic Setting.
|
||||
item-34 at level 4: text: Demographic parameters are based ... [suppressing (t) notation]: χ1,j 1,j =
|
||||
item-35 at level 4: formula: B_{r}\hspace{.167em}{ \,\substa ... }+I_{2,M,k})}{N_{M}} \right] + \right
|
||||
item-36 at level 4: formula: p_{v} \left \left( \frac{(I_{1, ... ght] \right) \right] ,\hspace{.167em}
|
||||
item-37 at level 4: text: where the probability of HIV ver ... heir values are summarized in Table 4.
|
||||
item-38 at level 2: section_header: Prevalence of HIV
|
||||
item-39 at level 3: section_header: Demographics and Model Validation.
|
||||
item-40 at level 4: text: The model was validated by using ... 5% to capture early epidemic behavior.
|
||||
item-41 at level 4: text: In deciding on our initial value ... n within given subpopulations (2, 49).
|
||||
item-42 at level 4: text: In the absence of HIV infection, ... those predicted by our model (Fig. 2).
|
||||
item-43 at level 4: picture
|
||||
item-43 at level 5: caption: Figure 2 Model simulation of HIV infection in a population lacking the protective CCR5Δ32 allele compared with national data from Kenya (healthy adults) and Mozambique (blood donors, ref. 17). The simulated population incorporates parameter estimates from sub-Saharan African demographics. Note the two outlier points from the Mozambique data were likely caused by underreporting in the early stages of the epidemic.
|
||||
item-44 at level 3: section_header: Effects of the Allele on Prevalence.
|
||||
item-45 at level 4: text: After validating the model in th ... among adults for total HIV/AIDS cases.
|
||||
item-46 at level 4: text: Although CCR5Δ32/Δ32 homozygosit ... frequency of the mutation as 0.105573.
|
||||
item-47 at level 4: text: Fig. 3 shows the prevalence of H ... mic, reaching 18% before leveling off.
|
||||
item-48 at level 4: picture
|
||||
item-48 at level 5: caption: Figure 3 Prevalence of HIV/AIDS in the adult population as predicted by the model. The top curve (○) indicates prevalence in a population lacking the protective allele. We compare that to a population with 19% heterozygous and 1% homozygous for the allele (implying an allelic frequency of 0.105573. Confidence interval bands (light gray) are shown around the median simulation () providing a range of uncertainty in evaluating parameters for the effect of the mutation on the infectivity and the duration of asymptomatic HIV for heterozygotes.
|
||||
item-49 at level 4: text: In contrast, when a proportion o ... gins to decline slowly after 70 years.
|
||||
item-50 at level 4: text: In the above simulations we assu ... in the presence of the CCR5 mutation.
|
||||
item-51 at level 4: text: Because some parameters (e.g., r ... s a major influence on disease spread.
|
||||
item-52 at level 2: section_header: HIV Induces Selective Pressure on Genotype Frequency
|
||||
item-53 at level 3: text: To observe changes in the freque ... for ≈1,600 years before leveling off.
|
||||
item-54 at level 3: picture
|
||||
item-54 at level 4: caption: Figure 4 Effects of HIV-1 on selection of the CCR5Δ32 allele. The Hardy-Weinberg equilibrium level is represented in the no-infection simulation (solid lines) for each population. Divergence from the original Hardy-Weinberg equilibrium is shown to occur in the simulations that include HIV infection (dashed lines). Fraction of the total subpopulations are presented: (A) wild types (W/W), (B) heterozygotes (W/Δ32), and (C) homozygotes (Δ32/Δ32). Note that we initiate this simulation with a much lower allelic frequency (0.00105) than used in the rest of the study to better exemplify the actual selective effect over a 1,000-year time scale. (D) The allelic selection effect over a 2,000-year time scale.
|
||||
item-55 at level 2: section_header: Discussion
|
||||
item-56 at level 3: text: This study illustrates how popul ... pulations where the allele is present.
|
||||
item-57 at level 3: text: We also observed that HIV can pr ... is) have been present for much longer.
|
||||
item-58 at level 3: text: Two mathematical models have con ... ce of the pathogen constant over time.
|
||||
item-59 at level 3: text: Even within our focus on host pr ... f a protective allele such as CCR5Δ32.
|
||||
item-60 at level 3: text: Although our models demonstrate ... f the population to epidemic HIV (16).
|
||||
item-61 at level 3: text: In assessing the HIV/AIDS epidem ... for education and prevention programs.
|
||||
item-62 at level 2: section_header: Acknowledgments
|
||||
item-63 at level 3: text: We thank Mark Krosky, Katia Koel ... ers for extremely insightful comments.
|
||||
item-64 at level 2: section_header: References
|
||||
item-65 at level 3: list: group list
|
||||
item-66 at level 4: list_item: Weiss HA, Hawkes S. Leprosy Rev 72:92–98 (2001). PMID: 11355525
|
||||
item-67 at level 4: list_item: Taha TE, Dallabetta GA, Hoover D ... AIDS 12:197–203 (1998). PMID: 9468369
|
||||
item-68 at level 4: list_item: AIDS Epidemic Update. Geneva: World Health Organization1–17 (1998).
|
||||
item-69 at level 4: list_item: D'Souza MP, Harden VA. Nat Med 2:1293–1300 (1996). PMID: 8946819
|
||||
item-70 at level 4: list_item: Martinson JJ, Chapman NH, Rees D ... Genet 16:100–103 (1997). PMID: 9140404
|
||||
item-71 at level 4: list_item: Roos MTL, Lange JMA, deGoede REY ... Dis 165:427–432 (1992). PMID: 1347054
|
||||
item-72 at level 4: list_item: Garred P, Eugen-Olsen J, Iversen ... Lancet 349:1884 (1997). PMID: 9217763
|
||||
item-73 at level 4: list_item: Katzenstein TL, Eugen-Olsen J, H ... rovirol 16:10–14 (1997). PMID: 9377119
|
||||
item-74 at level 4: list_item: deRoda H, Meyer K, Katzenstain W ... ce 273:1856–1862 (1996). PMID: 8791590
|
||||
item-75 at level 4: list_item: Meyer L, Magierowska M, Hubert J ... AIDS 11:F73–F78 (1997). PMID: 9302436
|
||||
item-76 at level 4: list_item: Smith MW, Dean M, Carrington M, ... ence 277:959–965 (1997). PMID: 9252328
|
||||
item-77 at level 4: list_item: Samson M, Libert F, Doranz BJ, R ... don) 382:722–725 (1996). PMID: 8751444
|
||||
item-78 at level 4: list_item: McNicholl JM, Smith DK, Qari SH, ... ct Dis 3:261–271 (1997). PMID: 9284370
|
||||
item-79 at level 4: list_item: Michael NL, Chang G, Louie LG, M ... at Med 3:338–340 (1997). PMID: 9055864
|
||||
item-80 at level 4: list_item: Mayaud P, Mosha F, Todd J, Balir ... IDS 11:1873–1880 (1997). PMID: 9412707
|
||||
item-81 at level 4: list_item: Hoffman IF, Jere CS, Taylor TE, ... li P, Dyer JR. AIDS 13:487–494 (1998).
|
||||
item-82 at level 4: list_item: HIV/AIDS Surveillance Database. ... International Programs Center (1999).
|
||||
item-83 at level 4: list_item: Anderson RM, May RM, McLean AR. ... don) 332:228–234 (1988). PMID: 3279320
|
||||
item-84 at level 4: list_item: Berger EA, Doms RW, Fenyo EM, Ko ... (London) 391:240 (1998). PMID: 9440686
|
||||
item-85 at level 4: list_item: Alkhatib G, Broder CC, Berger EA ... rol 70:5487–5494 (1996). PMID: 8764060
|
||||
item-86 at level 4: list_item: Choe H, Farzan M, Sun Y, Sulliva ... ell 85:1135–1148 (1996). PMID: 8674119
|
||||
item-87 at level 4: list_item: Deng H, Liu R, Ellmeier W, Choe ... don) 381:661–666 (1996). PMID: 8649511
|
||||
item-88 at level 4: list_item: Doranz BJ, Rucker J, Yi Y, Smyth ... ell 85:1149–1158 (1996). PMID: 8674120
|
||||
item-89 at level 4: list_item: Dragic T, Litwin V, Allaway GP, ... don) 381:667–673 (1996). PMID: 8649512
|
||||
item-90 at level 4: list_item: Zhu T, Mo H, Wang N, Nam DS, Cao ... ce 261:1179–1181 (1993). PMID: 8356453
|
||||
item-91 at level 4: list_item: Bjorndal A, Deng H, Jansson M, F ... rol 71:7478–7487 (1997). PMID: 9311827
|
||||
item-92 at level 4: list_item: Conner RI, Sheridan KE, Ceradini ... Med 185:621–628 (1997). PMID: 9034141
|
||||
item-93 at level 4: list_item: Liu R, Paxton WA, Choe S, Ceradi ... Cell 86:367–377 (1996). PMID: 8756719
|
||||
item-94 at level 4: list_item: Mussico M, Lazzarin A, Nicolosi ... w) 154:1971–1976 (1994). PMID: 8074601
|
||||
item-95 at level 4: list_item: Michael NL, Nelson JA, KewalRama ... rol 72:6040–6047 (1998). PMID: 9621067
|
||||
item-96 at level 4: list_item: Hethcote HW, Yorke JA. Gonorrhea ... and Control. Berlin: Springer (1984).
|
||||
item-97 at level 4: list_item: Anderson RM, May RM. Nature (London) 333:514–522 (1988). PMID: 3374601
|
||||
item-98 at level 4: list_item: Asiimwe-Okiror G, Opio AA, Musin ... IDS 11:1757–1763 (1997). PMID: 9386811
|
||||
item-99 at level 4: list_item: Carael M, Cleland J, Deheneffe J ... AIDS 9:1171–1175 (1995). PMID: 8519454
|
||||
item-100 at level 4: list_item: Blower SM, Boe C. J AIDS 6:1347–1352 (1993). PMID: 8254474
|
||||
item-101 at level 4: list_item: Kirschner D. J Appl Math 56:143–166 (1996).
|
||||
item-102 at level 4: list_item: Le Pont F, Blower S. J AIDS 4:987–999 (1991). PMID: 1890608
|
||||
item-103 at level 4: list_item: Kim MY, Lagakos SW. Ann Epidemiol 1:117–128 (1990). PMID: 1669741
|
||||
item-104 at level 4: list_item: Anderson RM, May RM. Infectious ... ol. Oxford: Oxford Univ. Press (1992).
|
||||
item-105 at level 4: list_item: Ragni MV, Faruki H, Kingsley LA. ... ed Immune Defic Syndr 17:42–45 (1998).
|
||||
item-106 at level 4: list_item: Kaplan JE, Khabbaz RF, Murphy EL ... virol 12:193–201 (1996). PMID: 8680892
|
||||
item-107 at level 4: list_item: Padian NS, Shiboski SC, Glass SO ... nghoff E. Am J Edu 146:350–357 (1997).
|
||||
item-108 at level 4: list_item: Leynaert B, Downs AM, de Vincenzi I. Am J Edu 148:88–96 (1998).
|
||||
item-109 at level 4: list_item: Garnett GP, Anderson RM. J Acquired Immune Defic Syndr 9:500–513 (1995).
|
||||
item-110 at level 4: list_item: Stigum H, Magnus P, Harris JR, S ... eteig LS. Am J Edu 145:636–643 (1997).
|
||||
item-111 at level 4: list_item: Ho DD, Neumann AU, Perelson AS, ... don) 373:123–126 (1995). PMID: 7816094
|
||||
item-112 at level 4: list_item: World Resources (1998–1999). Oxford: Oxford Univ. Press (1999).
|
||||
item-113 at level 4: list_item: Kostrikis LG, Neumann AU, Thomso ... 73:10264–10271 (1999). PMID: 10559343
|
||||
item-114 at level 4: list_item: Low-Beer D, Stoneburner RL, Muku ... at Med 3:553–557 (1997). PMID: 9142126
|
||||
item-115 at level 4: list_item: Grosskurth H, Mosha F, Todd J, S ... . AIDS 9:927–934 (1995). PMID: 7576329
|
||||
item-116 at level 4: list_item: Melo J, Beby-Defaux A, Faria C, ... AIDS 23:203–204 (2000). PMID: 10737436
|
||||
item-117 at level 4: list_item: Iman RL, Helton JC, Campbell JE. J Quality Technol 13:174–183 (1981).
|
||||
item-118 at level 4: list_item: Iman RL, Helton JC, Campbell JE. J Quality Technol 13:232–240 (1981).
|
||||
item-119 at level 4: list_item: Blower SM, Dowlatabadi H. Int Stat Rev 62:229–243 (1994).
|
||||
item-120 at level 4: list_item: Porco TC, Blower SM. Theor Popul Biol 54:117–132 (1998). PMID: 9733654
|
||||
item-121 at level 4: list_item: Blower SM, Porco TC, Darby G. Nat Med 4:673–678 (1998). PMID: 9623975
|
||||
item-122 at level 4: list_item: Libert F, Cochaux P, Beckman G, ... Genet 7:399–406 (1998). PMID: 9466996
|
||||
item-123 at level 4: list_item: Lalani AS, Masters J, Zeng W, Ba ... e 286:1968–1971 (1999). PMID: 10583963
|
||||
item-124 at level 4: list_item: Kermack WO, McKendrick AG. Proc R Soc London 261:700–721 (1927).
|
||||
item-125 at level 4: list_item: Gupta S, Hill AVS. Proc R Soc London Ser B 260:271–277 (1995).
|
||||
item-126 at level 4: list_item: Ruwende C, Khoo SC, Snow RW, Yat ... don) 376:246–249 (1995). PMID: 7617034
|
||||
item-127 at level 4: list_item: McDermott DH, Zimmerman PA, Guig ... ncet 352:866–870 (1998). PMID: 9742978
|
||||
item-128 at level 4: list_item: Kostrikis LG, Huang Y, Moore JP, ... at Med 4:350–353 (1998). PMID: 9500612
|
||||
item-129 at level 4: list_item: Winkler C, Modi W, Smith MW, Nel ... ence 279:389–393 (1998). PMID: 9430590
|
||||
item-130 at level 4: list_item: Martinson JJ, Hong L, Karanicola ... AIDS 14:483–489 (2000). PMID: 10780710
|
||||
item-131 at level 4: list_item: Vernazza PL, Eron JJ, Fiscus SA, ... AIDS 13:155–166 (1999). PMID: 10202821
|
||||
item-132 at level 1: caption: Figure 1 A schematic representat ... of HIV infection. M, male; F, female.
|
||||
item-133 at level 1: caption: Table 1 Children's genotype
|
||||
item-134 at level 1: caption: Table 2 Transmission probabilities
|
||||
item-135 at level 1: caption: Table 3 Progression rates
|
||||
item-136 at level 1: caption: Table 4 Parameter values
|
||||
item-137 at level 1: caption: Figure 2 Model simulation of HIV ... g in the early stages of the epidemic.
|
||||
item-138 at level 1: caption: Figure 3 Prevalence of HIV/AIDS ... of asymptomatic HIV for heterozygotes.
|
||||
item-139 at level 1: caption: Figure 4 Effects of HIV-1 on sel ... n effect over a 2,000-year time scale.
|
6353
tests/data/groundtruth/docling_v2/pnas_sample.xml.json
vendored
6353
tests/data/groundtruth/docling_v2/pnas_sample.xml.json
vendored
File diff suppressed because it is too large
Load Diff
258
tests/data/groundtruth/docling_v2/pnas_sample.xml.md
vendored
258
tests/data/groundtruth/docling_v2/pnas_sample.xml.md
vendored
@ -1,258 +0,0 @@
|
||||
# The coreceptor mutation CCR5Δ32 influences the dynamics of HIV epidemics and is selected for by HIV
|
||||
|
||||
Amy D. Sullivan, Janis Wigginton, Denise Kirschner
|
||||
|
||||
Department of Microbiology and Immunology, University of Michigan Medical School, Ann Arbor, MI 48109-0620
|
||||
|
||||
## Abstract
|
||||
|
||||
We explore the impact of a host genetic factor on heterosexual HIV epidemics by using a deterministic mathematical model. A protective allele unequally distributed across populations is exemplified in our models by the 32-bp deletion in the host-cell chemokine receptor CCR5, CCR5Δ32. Individuals homozygous for CCR5Δ32 are protected against HIV infection whereas those heterozygous for CCR5Δ32 have lower pre-AIDS viral loads and delayed progression to AIDS. CCR5Δ32 may limit HIV spread by decreasing the probability of both risk of infection and infectiousness. In this work, we characterize epidemic HIV within three dynamic subpopulations: CCR5/CCR5 (homozygous, wild type), CCR5/CCR5Δ32 (heterozygous), and CCR5Δ32/CCR5Δ32 (homozygous, mutant). Our results indicate that prevalence of HIV/AIDS is greater in populations lacking the CCR5Δ32 alleles (homozygous wild types only) as compared with populations that include people heterozygous or homozygous for CCR5Δ32. Also, we show that HIV can provide selective pressure for CCR5Δ32, increasing the frequency of this allele.
|
||||
|
||||
Nineteen million people have died of AIDS since the discovery of HIV in the 1980s. In 1999 alone, 5.4 million people were newly infected with HIV (ref. 1 and http://www.unaids.org/epidemicupdate/report/Epireport.html). (For brevity, HIV-1 is referred to as HIV in this paper.) Sub-Saharan Africa has been hardest hit, with more than 20% of the general population HIV-positive in some countries (2, 3). In comparison, heterosexual epidemics in developed, market-economy countries have not reached such severe levels. Factors contributing to the severity of the epidemic in economically developing countries abound, including economic, health, and social differences such as high levels of sexually transmitted diseases and a lack of prevention programs. However, the staggering rate at which the epidemic has spread in sub-Saharan Africa has not been adequately explained. The rate and severity of this epidemic also could indicate a greater underlying susceptibility to HIV attributable not only to sexually transmitted disease, economics, etc., but also to other more ubiquitous factors such as host genetics (4, 5).
|
||||
|
||||
To exemplify the contribution of such a host genetic factor to HIV prevalence trends, we consider a well-characterized 32-bp deletion in the host-cell chemokine receptor CCR5, CCR5Δ32. When HIV binds to host cells, it uses the CD4 receptor on the surface of host immune cells together with a coreceptor, mainly the CCR5 and CXCR4 chemokine receptors (6). Homozygous mutations for this 32-bp deletion offer almost complete protection from HIV infection, and heterozygous mutations are associated with lower pre-AIDS viral loads and delayed progression to AIDS (7–14). CCR5Δ32 generally is found in populations of European descent, with allelic frequencies ranging from 0 to 0.29 (13). African and Asian populations studied outside the United States or Europe appear to lack the CCR5Δ32 allele, with an allelic frequency of almost zero (5, 13). Thus, to understand the effects of a protective allele, we use a mathematical model to track prevalence of HIV in populations with or without CCR5Δ32 heterozygous and homozygous people and also to follow the CCR5Δ32 allelic frequency.
|
||||
|
||||
We hypothesize that CCR5Δ32 limits epidemic HIV by decreasing infection rates, and we evaluate the relative contributions to this by the probability of infection and duration of infectivity. To capture HIV infection as a chronic infectious disease together with vertical transmission occurring in untreated mothers, we model a dynamic population (i.e., populations that vary in growth rates because of fluctuations in birth or death rates) based on realistic demographic characteristics (18). This scenario also allows tracking of the allelic frequencies over time. This work considers how a specific host genetic factor affecting HIV infectivity and viremia at the individual level might influence the epidemic in a dynamic population and how HIV exerts selective pressure, altering the frequency of this mutant allele.
|
||||
|
||||
CCR5 is a host-cell chemokine receptor, which is also used as a coreceptor by R5 strains of HIV that are generally acquired during sexual transmission (6, 19–25). As infection progresses to AIDS the virus expands its repertoire of potential coreceptors to include other CC-family and CXC-family receptors in roughly 50% of patients (19, 26, 27). CCR5Δ32 was identified in HIV-resistant people (28). Benefits to individuals from the mutation in this allele are as follows. Persons homozygous for the CCR5Δ32 mutation are almost nonexistent in HIV-infected populations (11, 12) (see ref. 13 for review). Persons heterozygous for the mutant allele (CCR5 W/Δ32) tend to have lower pre-AIDS viral loads. Aside from the beneficial effects that lower viral loads may have for individuals, there is also an altruistic effect, as transmission rates are reduced for individuals with low viral loads (as compared with, for example, AZT and other studies; ref. 29). Finally, individuals heterozygous for the mutant allele (CCR5 W/Δ32) also have a slower progression to AIDS than those homozygous for the wild-type allele (CCR5 W/W) (7–10), remaining in the population 2 years longer, on average. Interestingly, the dearth of information on HIV disease progression in people homozygous for the CCR5Δ32 allele (CCR5 Δ32/Δ32) stems from the rarity of HIV infection in this group (4, 12, 28). However, in case reports of HIV-infected CCR5 Δ32/Δ32 homozygotes, a rapid decline in CD4+ T cells and a high viremia are observed, likely because of initial infection with a more aggressive viral strain (such as X4 or R5X4) (30).
|
||||
|
||||
## The Model
|
||||
|
||||
Because we are most concerned with understanding the severity of the epidemic in developing countries where the majority of infection is heterosexual, we consider a purely heterosexual model. To model the effects of the allele in the population, we examine the rate of HIV spread by using an enhanced susceptible-infected-AIDS model of epidemic HIV (for review see ref. 31). Our model compares two population scenarios: a CCR5 wild-type population and one with CCR5Δ32 heterozygotes and homozygotes in addition to the wild type. To model the scenario where there are only wild-type individuals present in the population (i.e., CCR5 W/W), we track the sexually active susceptibles at time t [Si,j (t)], where i = 1 refers to genotype (CCR5 W/W only in this case) and j is either the male or female subpopulation. We also track those who are HIV-positive at time t not yet having AIDS in Ii,j,k (t) where k refers to stage of HIV infection [primary (A) or asymptomatic (B)]. The total number of individuals with AIDS at time t are tracked in A(t). The source population are children, χ i,j (t), who mature into the sexually active population at time t (Fig. 1, Table 1). We compare the model of a population lacking the CCR5Δ32 allele to a demographically similar population with a high frequency of the allele. When genetic heterogeneity is included, male and female subpopulations are each further divided into three distinct genotypic groups, yielding six susceptible subpopulations, [Si,j (t), where i ranges from 1 to 3, where 1 = CCR5W/W; 2 = CCR5 W/Δ32; 3 = CCR5 Δ32/Δ32]. The infected classes, Ii,j,k (t), also increase in number to account for these new genotype compartments. In both settings we assume there is no treatment available and no knowledge of HIV status by people in the early acute and middle asymptomatic stages (both conditions exist in much of sub-Saharan Africa). In addition, we assume that sexual mixing in the population occurs randomly with respect to genotype and HIV disease status, all HIV-infected people eventually progress to AIDS, and no barrier contraceptives are used. These last assumptions reflect both economic and social conditions.
|
||||
|
||||
Figure 1 A schematic representation of the basic compartmental HIV epidemic model. The criss-cross lines indicate the sexual mixing between different compartments. Each of these interactions has a positive probability of taking place; they also incorporate individual rates of transmission indicated as λ, but in full notation is λ î,,→i,j, where i,j,k is the phenotype of the infected partner and î, is the phenotype of the susceptible partner. Also shown are the different rates of disease progression, γ i,j,k , that vary according to genotype, gender, and stage. Thus, the interactions between different genotypes, genders, and stages are associated with a unique probability of HIV infection. M, male; F, female.
|
||||
|
||||
<!-- image -->
|
||||
|
||||
Table 1 Children's genotype
|
||||
|
||||
| Parents | Mother | Mother | Mother | Mother |
|
||||
|-----------|----------|--------------------|------------------------------|--------------------|
|
||||
| | | | | |
|
||||
| Father | | W/W | W/Δ32 | Δ32/Δ32 |
|
||||
| | W/W | χ1,j 1,j | χ1,j 1,j, χ2,j 2,j | χ2,j 2,j |
|
||||
| | W/Δ32 | χ1,j 1,j, χ2,j 2,j | χ1,j 1,j, χ2,j 2,j, χ3,j 3,j | χ2,j 2,j, χ3,j 3,j |
|
||||
| | Δ32/Δ32 | χ2,j 2,j | χ2,j 2,j, χ3,j 3,j | χ3,j 3,j |
|
||||
|
||||
### Parameter Estimates for the Model.
|
||||
|
||||
Estimates for rates that govern the interactions depicted in Fig. 1 were derived from the extensive literature on HIV. Our parameters and their estimates are summarized in Tables 2–4. The general form of the equations describing the rates of transition between population classes as depicted in Fig. 1 are summarized as follows:
|
||||
|
||||
$$ \frac{dS_{i,j}(t)}{dt}={\chi}_{i,j}(t)-{\mu}_{j}S_{i,j}(t)-{\lambda}_{\hat {\imath},\hat {},\hat {k}{\rightarrow}i,j}S_{i,j}(t), $$
|
||||
|
||||
$$ \hspace{1em}\hspace{1em}\hspace{.167em}\frac{dI_{i,j,A}(t)}{dt}={\lambda}_{\hat {\imath},\hat {},\hat {k}{\rightarrow}i,j}S_{i,j}(t)-{\mu}_{j}I_{i,j,A}(t)-{\gamma}_{i,j,A}I_{i,j,A}(t), $$
|
||||
|
||||
$$ \frac{dI_{i,j,B}(t)}{dt}={\gamma}_{i,j,A}I_{i,j,A}(t)-{\mu}_{j}I_{i,j,B}(t)-{\gamma}_{i,j,B}I_{i,j,B}(t), $$
|
||||
|
||||
$$ \frac{dA(t)}{dt}={\gamma}_{i,j,B} \left( { \,\substack{ ^{3} \\ {\sum} \\ _{i=1} }\, }I_{i,F,B}(t)+I_{i,M,B}(t) \right) -{\mu}_{A}A(t)-{\delta}A(t), $$
|
||||
|
||||
where, in addition to previously defined populations and rates (with i equals genotype, j equals gender, and k equals stage of infection, either A or B), μ j , represents the non-AIDS (natural) death rate for males and females respectively, and μA is estimated by the average (μF + μM/2). This approximation allows us to simplify the model (only one AIDS compartment) without compromising the results, as most people with AIDS die of AIDS (δAIDS) and very few of other causes (μA). These estimates include values that affect infectivity (λ î,,→i,j ), transmission (β î,,→i,j ), and disease progression (γ i , j , k ) where the î,, notation represents the genotype, gender, and stage of infection of the infected partner, and j ≠ .
|
||||
|
||||
Table 2 Transmission probabilities
|
||||
|
||||
| HIV-infected partner (îıı^^, ^^, k k^^) | Susceptible partner (i, j) | Susceptible partner (i, j) | Susceptible partner (i, j) | Susceptible partner (i, j) |
|
||||
|-----------------------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|
|
||||
| HIV-infected partner (îıı^^, ^^, k k^^) | | | | |
|
||||
| HIV-infected partner (îıı^^, ^^, k k^^) | (^^ to j) | W/W | W/Δ32 | Δ32/Δ32 |
|
||||
| | | | | |
|
||||
| Acute/primary | | | | |
|
||||
| W/W or Δ32/Δ32 | M to F | 0.040 | 0.040 | 0.00040 |
|
||||
| | F to M | 0.020 | 0.020 | 0.00020 |
|
||||
| W/Δ32 | M to F | 0.030 | 0.030 | 0.00030 |
|
||||
| | F to M | 0.015 | 0.015 | 0.00015 |
|
||||
| Asymptomatic | | | | |
|
||||
| W/W or Δ32/Δ32 | M to F | 0.0010 | 0.0010 | 10 × 10−6 |
|
||||
| | F to M | 0.0005 | 0.0005 | 5 × 10−6 |
|
||||
| W/Δ32 | M to F | 0.0005 | 0.0005 | 5 × 10−6 |
|
||||
| | F to M | 0.00025 | 0.00025 | 2.5 × 10−6 |
|
||||
|
||||
Table 3 Progression rates
|
||||
|
||||
| Genotype | Disease stage | Males/females |
|
||||
|------------|-----------------|------------------|
|
||||
| | | |
|
||||
| W/W | A | 3.5 |
|
||||
| | B | 0.16667 |
|
||||
| W/Δ32 | A | 3.5 |
|
||||
| | B | 0.125 |
|
||||
| Δ32/Δ32 | A | 3.5 |
|
||||
| | B | 0.16667 |
|
||||
|
||||
Table 4 Parameter values
|
||||
|
||||
| Parameter | Definition | Value |
|
||||
|-----------------------------------------|----------------------------------------------------------|-------------------------|
|
||||
| | | |
|
||||
| μ F F, μ M M | All-cause mortality for adult females (males) | 0.015 (0.016) per year |
|
||||
| μχχ | All-cause childhood mortality (<15 years of age) | 0.01 per year |
|
||||
| B r r | Birthrate | 0.25 per woman per year |
|
||||
| SA F F | Percent females acquiring new partners (sexual activity) | 10% |
|
||||
| SA M M | Percent males acquiring new partners (sexual activity) | 25% |
|
||||
| m F F(ς$$ {\mathrm{_{{F}}^{{2}}}} $$) | Mean (variance) no. of new partners for females | 1.8 (1.2) per year |
|
||||
| ς$$ {\mathrm{_{{M}}^{{2}}}} $$ | Variance in no. of new partners for males | 5.5 per year |
|
||||
| 1 − p v v | Probability of vertical transmission | 0.30 per birth |
|
||||
| I i,j,k i,j,k(0) | Initial total population HIV-positive | 0.50% |
|
||||
| χ i,j i,j(0) | Initial total children in population (<15 years of age) | 45% |
|
||||
| W/W (0) | Initial total wild types (W/W) in population | 80% |
|
||||
| W/Δ32(0) | Initial total heterozygotes (W/Δ32) in population | 19% |
|
||||
| Δ32/Δ32(0) | Initial total homozygotes (Δ32/Δ32) in population | 1% |
|
||||
| r M M(r F F) | Initial percent males (females) in total population | 49% (51%) |
|
||||
| ϕ F F, ϕ M M | Number of sexual contacts a female (male) has | 30 (24) per partner |
|
||||
| ɛ i,j,k i,j,k | % effect of mutation on transmission rates (see Table 2) | 0 < ɛ i,j,k i,j,k < 1 |
|
||||
| δ | Death rate for AIDS population | 1.0 per year |
|
||||
| q | Allelic frequency of Δ32 allele | 0.105573 |
|
||||
|
||||
The effects of the CCR5 W/Δ32 and CCR5 Δ32/Δ32 genotypes are included in our model through both the per-capita probabilities of infection, λ î,,→i,j , and the progression rates, γ i , j , k . The infectivity coefficients, λ î,,→i,j , are calculated for each population subgroup based on the following: likelihood of HIV transmission in a sexual encounter between a susceptible and an infected (βîıı^^,j,k k^^→i,j ) person; formation of new partnerships (c j j); number of contacts in a given partnership (ϕ j ); and probability of encountering an infected individual (I î,, /N ). The formula representing this probability of infection is
|
||||
|
||||
$$ {\lambda}_{\hat {i},\hat {j},\hat {k}{\rightarrow}i,j}=\frac{C_{j}{\cdot}{\phi}_{j}}{N_{\hat {j}}}\hspace{.167em} \left[ { \,\substack{ \\ {\sum} \\ _{\hat {i},\hat {k}} }\, }{\beta}_{\hat {i},\hat {j},\hat {k}{\rightarrow}i,j}{\cdot}I_{\hat {i},\hat {j},\hat {k}} \right] , $$
|
||||
|
||||
where j ≠ is either male or female. N represents the total population of gender (this does not include those with AIDS in the simulations).
|
||||
|
||||
The average rate of partner acquisition, cj , includes the mean plus the variance to mean ratio of the relevant distribution of partner-change rates to capture the small number of high-risk people: cj = mj + (ς/m j) where the mean (mj ) and variance (ς) are annual figures for new partnerships only (32). These means are estimated from Ugandan data for the number of heterosexual partners in the past year (33) and the number of nonregular heterosexual partners (i.e., spouses or long-term partners) in the past year (34). In these sexual activity surveys, men invariably have more new partnerships; thus, we assumed that they would have fewer average contacts per partnership than women (a higher rate of new partner acquisition means fewer sexual contacts with a given partner; ref. 35). To incorporate this assumption in our model, the male contacts/partnership, ϕ M , was reduced by 20%. In a given population, the numbers of heterosexual interactions must equate between males and females. The balancing equation applied here is SA F·m F·N F = SA M·m M·N M, where SAj are the percent sexually active and Nj are the total in the populations for gender j. To specify changes in partner acquisition, we apply a male flexibility mechanism, holding the female rate of acquisition constant and allowing the male rates to vary (36, 37).
|
||||
|
||||
#### Transmission probabilities.
|
||||
|
||||
The effect of a genetic factor in a model of HIV transmission can be included by reducing the transmission coefficient. The probabilities of transmission per contact with an infected partner, βîıı^^,^^,k k^^→i,j , have been estimated in the literature (see ref. 38 for estimates in minimally treated groups). We want to capture a decreased risk in transmission based on genotype (ref. 39, Table 2). No studies have directly evaluated differences in infectivity between HIV-infected CCR5 W/Δ32 heterozygotes and HIV-infected CCR5 wild types. Thus, we base estimates for reduced transmission on studies of groups with various HIV serum viral loads (40), HTLV-I/II viral loads (41), and a study of the effect of AZT treatment on transmission (29). We decrease transmission probabilities for infecting CCR5Δ32/Δ32 persons by 100-fold to reflect the rarity of infections in these persons. However, we assume that infected CCR5Δ32/Δ32 homozygotes can infect susceptibles at a rate similar to CCR5W/W homozygotes, as the former generally have high viremias (ref. 30, Table 2). We also assume that male-to-female transmission is twice as efficient as female-to-male transmission (up to a 9-fold difference has been reported; ref. 42) (ref. 43, Table 2).
|
||||
|
||||
Given the assumption of no treatment, the high burden of disease in people with AIDS is assumed to greatly limit their sexual activity. Our initial model excludes people with AIDS from the sexually active groups. Subsequently, we allow persons with AIDS to be sexually active, fixing their transmission rates (βAIDS) to be the same across all CCR5 genotypes, and lower than transmission rates for primary-stage infection (as the viral burden on average is not as high as during the acute phase), and larger than transmission rates for asymptomatic-stage infection (as the viral burden characteristically increases during the end stage of disease).
|
||||
|
||||
#### Disease progression.
|
||||
|
||||
We assume three stages of HIV infection: primary (acute, stage A), asymptomatic HIV (stage B), and AIDS. The rates of transition through the first two stages are denoted by γ i,j,k i,j,k, where i represents genotype, j is male/female, and k represents either stage A or stage B. Transition rates through each of these stages are assumed to be inversely proportional to the duration of that stage; however, other distributions are possible (31, 44, 45). Although viral loads generally peak in the first 2 months of infection, steady-state viral loads are established several months beyond this (46). For group A, the primary HIV-infecteds, duration is assumed to be 3.5 months. Based on results from European cohort studies (7–10), the beneficial effects of the CCR5 W/Δ32 genotype are observed mainly in the asymptomatic years of HIV infection; ≈7 years after seroconversion survival rates appear to be quite similar between heterozygous and homozygous individuals. We also assume that CCR5Δ32/Δ32-infected individuals and wild-type individuals progress similarly, and that men and women progress through each disease stage at the same rate. Given these observations, and that survival after infection may be shorter in untreated populations, we choose the duration time in stage B to be 6 years for wild-type individuals and 8 years for heterozygous individuals. Transition through AIDS, δAIDS, is inversely proportional to the duration of AIDS. We estimate this value to be 1 year for the time from onset of AIDS to death. The progression rates are summarized in Table 3.
|
||||
|
||||
### Demographic Setting.
|
||||
|
||||
Demographic parameters are based on data from Malawi, Zimbabwe, and Botswana (3, 47). Estimated birth and child mortality rates are used to calculate the annual numbers of children (χ i,j i,j) maturing into the potentially sexually active, susceptible group at the age of 15 years (3). For example, in the case where the mother is CCR5 wild type and the father is CCR5 wild type or heterozygous, the number of CCR5 W/W children is calculated as follows [suppressing (t) notation]: χ1,j 1,j =
|
||||
|
||||
$$ B_{r}\hspace{.167em}{ \,\substack{ \\ {\sum} \\ _{k} }\, } \left[ S_{1,F}\frac{(S_{1,M}+I_{1,M,k})}{N_{M}}+ \left[ (0.5)S_{1,F}\frac{(S_{2,M}+I_{2,M,k})}{N_{M}} \right] + \right $$
|
||||
|
||||
$$ p_{v} \left \left( \frac{(I_{1,F,k}(S_{1,M}+I_{1,M,k}))}{N_{M}}+ \left[ (0.5)I_{1,F,k}\frac{(S_{2,M}+I_{2,M,k})}{N_{M}} \right] \right) \right] ,\hspace{.167em} $$
|
||||
|
||||
where the probability of HIV vertical transmission, 1 − pv , and the birthrate, Br , are both included in the equations together with the Mendelian inheritance values as presented in Table 1. The generalized version of this equation (i.e., χ i,j i,j) can account for six categories of children (including gender and genotype). We assume that all children of all genotypes are at risk, although we can relax this condition if data become available to support vertical protection (e.g., ref. 48). All infected children are assumed to die before age 15. Before entering the susceptible group at age 15, there is additional loss because of mortality from all non-AIDS causes occurring less than 15 years of age at a rate of μχχ × χ i,j i,j (where μχ is the mortality under 15 years of age). Children then enter the population as susceptibles at an annual rate, ς j j × χ i,j i,j/15, where ς j distributes the children 51% females and 49% males. All parameters and their values are summarized in Table 4.
|
||||
|
||||
## Prevalence of HIV
|
||||
|
||||
### Demographics and Model Validation.
|
||||
|
||||
The model was validated by using parameters estimated from available demographic data. Simulations were run in the absence of HIV infection to compare the model with known population growth rates. Infection was subsequently introduced with an initial low HIV prevalence of 0.5% to capture early epidemic behavior.
|
||||
|
||||
In deciding on our initial values for parameters during infection, we use Joint United Nations Programme on HIV/AIDS national prevalence data for Malawi, Zimbabwe, and Botswana. Nationwide seroprevalence of HIV in these countries varies from ≈11% to over 20% (3), although there may be considerable variation within given subpopulations (2, 49).
|
||||
|
||||
In the absence of HIV infection, the annual percent population growth rate in the model is ≈2.5%, predicting the present-day values for an average of sub-Saharan African cities (data not shown). To validate the model with HIV infection, we compare our simulation of the HIV epidemic to existing prevalence data for Kenya and Mozambique (http://www.who.int/emc-hiv/fact-sheets/pdfs/kenya.pdf and ref. 51). Prevalence data collected from these countries follow similar trajectories to those predicted by our model (Fig. 2).
|
||||
|
||||
Figure 2 Model simulation of HIV infection in a population lacking the protective CCR5Δ32 allele compared with national data from Kenya (healthy adults) and Mozambique (blood donors, ref. 17). The simulated population incorporates parameter estimates from sub-Saharan African demographics. Note the two outlier points from the Mozambique data were likely caused by underreporting in the early stages of the epidemic.
|
||||
|
||||
<!-- image -->
|
||||
|
||||
### Effects of the Allele on Prevalence.
|
||||
|
||||
After validating the model in the wild type-only population, both CCR5Δ32 heterozygous and homozygous people are included. Parameter values for HIV transmission, duration of illness, and numbers of contacts per partner are assumed to be the same within both settings. We then calculate HIV/AIDS prevalence among adults for total HIV/AIDS cases.
|
||||
|
||||
Although CCR5Δ32/Δ32 homozygosity is rarely seen in HIV-positive populations (prevalence ranges between 0 and 0.004%), 1–20% of people in HIV-negative populations of European descent are homozygous. Thus, to evaluate the potential impact of CCR5Δ32, we estimate there are 19% CCR5 W/Δ32 heterozygous and 1% CCR5 Δ32/Δ32 homozygous people in our population. These values are in Hardy-Weinberg equilibrium with an allelic frequency of the mutation as 0.105573.
|
||||
|
||||
Fig. 3 shows the prevalence of HIV in two populations: one lacking the mutant CCR5 allele and another carrying that allele. In the population lacking the protective mutation, prevalence increases logarithmically for the first 35 years of the epidemic, reaching 18% before leveling off.
|
||||
|
||||
Figure 3 Prevalence of HIV/AIDS in the adult population as predicted by the model. The top curve (○) indicates prevalence in a population lacking the protective allele. We compare that to a population with 19% heterozygous and 1% homozygous for the allele (implying an allelic frequency of 0.105573. Confidence interval bands (light gray) are shown around the median simulation () providing a range of uncertainty in evaluating parameters for the effect of the mutation on the infectivity and the duration of asymptomatic HIV for heterozygotes.
|
||||
|
||||
<!-- image -->
|
||||
|
||||
In contrast, when a proportion of the population carries the CCR5Δ32 allele, the epidemic increases more slowly, but still logarithmically, for the first 50 years, and HIV/AIDS prevalence reaches ≈12% (Fig. 3). Prevalence begins to decline slowly after 70 years.
|
||||
|
||||
In the above simulations we assume that people with AIDS are not sexually active. However, when these individuals are included in the sexually active population the severity of the epidemic increases considerably (data not shown). Consistent with our initial simulations, prevalences are still relatively lower in the presence of the CCR5 mutation.
|
||||
|
||||
Because some parameters (e.g., rate constants) are difficult to estimate based on available data, we implement an uncertainty analysis to assess the variability in the model outcomes caused by any inaccuracies in estimates of the parameter values with regard to the effect of the allelic mutation. For these analyses we use Latin hypercube sampling, as described in refs. 52–56, Our uncertainty and sensitivity analyses focus on infectivity vs. duration of infectiousness. To this end, we assess the effects on the dynamics of the epidemic for a range of values of the parameters governing transmission and progression rates: βîıı^^,^^,k k^^→i,j and γ i,j,k i,j,k. All other parameters are held constant. These results are presented as an interval band about the average simulation for the population carrying the CCR5Δ32 allele (Fig. 3). Although there is variability in the model outcomes, the analysis indicates that the overall model predictions are consistent for a wide range of transmission and progression rates. Further, most of the variation observed in the outcome is because of the transmission rates for both heterosexual males and females in the primary stage of infection (β2,M,A → i ,F, β2,F,A → i ,M). As mentioned above, we assume lower viral loads correlate with reduced infectivity; thus, the reduction in viral load in heterozygotes has a major influence on disease spread.
|
||||
|
||||
## HIV Induces Selective Pressure on Genotype Frequency
|
||||
|
||||
To observe changes in the frequency of the CCR5Δ32 allele in a setting with HIV infection as compared with the Hardy-Weinberg equilibrium in the absence of HIV, we follow changes in the total number of CCR5Δ32 heterozygotes and homozygotes over 1,000 years (Fig. 4). We initially perform simulations in the absence of HIV infection as a negative control to show there is not significant selection of the allele in the absence of infection. To determine how long it would take for the allelic frequency to reach present-day levels (e.g., q = 0.105573), we initiate this simulation for 1,000 years with a very small allelic frequency (q = 0.00105). In the absence of HIV, the allelic frequency is maintained in equilibrium as shown by the constant proportions of CCR5Δ32 heterozygotes and homozygotes (Fig. 4, solid lines). The selection for CCR5Δ32 in the presence of HIV is seen in comparison (Fig. 4, dashed lines). We expand the time frame of this simulation to 2,000 years to view the point at which the frequency reaches present levels (where q ∼0.105573 at year = 1200). Note that the allelic frequency increases for ≈1,600 years before leveling off.
|
||||
|
||||
Figure 4 Effects of HIV-1 on selection of the CCR5Δ32 allele. The Hardy-Weinberg equilibrium level is represented in the no-infection simulation (solid lines) for each population. Divergence from the original Hardy-Weinberg equilibrium is shown to occur in the simulations that include HIV infection (dashed lines). Fraction of the total subpopulations are presented: (A) wild types (W/W), (B) heterozygotes (W/Δ32), and (C) homozygotes (Δ32/Δ32). Note that we initiate this simulation with a much lower allelic frequency (0.00105) than used in the rest of the study to better exemplify the actual selective effect over a 1,000-year time scale. (D) The allelic selection effect over a 2,000-year time scale.
|
||||
|
||||
<!-- image -->
|
||||
|
||||
## Discussion
|
||||
|
||||
This study illustrates how populations can differ in susceptibility to epidemic HIV/AIDS depending on a ubiquitous attribute such as a prevailing genotype. We have examined heterosexual HIV epidemics by using mathematical models to assess HIV transmission in dynamic populations either with or without CCR5Δ32 heterozygous and homozygous persons. The most susceptible population lacks the protective mutation in CCR5. In less susceptible populations, the majority of persons carrying the CCR5Δ32 allele are heterozygotes. We explore the hypothesis that lower viral loads (CCR5Δ32 heterozygotes) or resistance to infection (CCR5Δ32 homozygotes) observed in persons with this coreceptor mutation ultimately can influence HIV epidemic trends. Two contrasting influences of the protective CCR5 allele are conceivable: it may limit the epidemic by decreasing the probability of infection because of lower viral loads in infected heterozygotes, or it may exacerbate the epidemic by extending the time that infectious individuals remain in the sexually active population. Our results strongly suggest the former. Thus, the absence of this allele in Africa could explain the severity of HIV disease as compared with populations where the allele is present.
|
||||
|
||||
We also observed that HIV can provide selective pressure for the CCR5Δ32 allele within a population, increasing the allelic frequency. Other influences may have additionally selected for this allele. Infectious diseases such as plague and small pox have been postulated to select for CCR5Δ32 (57, 58). For plague, relatively high levels of CCR5Δ32 are believed to have arisen within ≈4,000 years, accounting for the prevalence of the mutation only in populations of European descent. Smallpox virus uses the CC-coreceptor, indicating that direct selection for mutations in CCR5 may have offered resistance to smallpox. Given the differences in the epidemic rates of plague (59), smallpox, and HIV, it is difficult to directly compare our results to these findings. However, our model suggests that the CCR5Δ32 mutation could have reached its present allelic frequency in Northern Europe within this time frame if selected for by a disease with virulence patterns similar to HIV. Our results further support the idea that HIV has been only recently introduced as a pathogen into African populations, as the frequency of the protective allele is almost zero, and our model predicts that selection of the mutant allele in this population by HIV alone takes at least 1,000 years. This prediction is distinct from the frequency of the CCR5Δ32 allele in European populations, where pathogens that may have influenced its frequency (e.g., Yersinia pestis) have been present for much longer.
|
||||
|
||||
Two mathematical models have considered the role of parasite and host genetic heterogeneity with regard to susceptibility to another pathogen, namely malaria (60, 61). In each it was determined that heterogeneity of host resistance facilitates the maintenance of diversity in parasite virulence. Given our underlying interest in the coevolution of pathogen and host, we focus on changes in a host protective mutation, holding the virulence of the pathogen constant over time.
|
||||
|
||||
Even within our focus on host protective mutations, numerous genetic factors, beneficial or detrimental, could potentially influence epidemics. Other genetically determined host factors affecting HIV susceptibility and disease progression include a CCR5 A/A to G/G promoter polymorphism (62), a CCR2 point mutation (11, 63), and a mutation in the CXCR4 ligand (64). The CCR2b mutation, CCR264I, is found in linkage with at least one CCR5 promoter polymorphism (65) and is prevalent in populations where CCR5Δ32 is nonexistent, such as sub-Saharan Africa (63). However, as none of these mutations have been consistently shown to be as protective as the CCR5Δ32 allele, we simplified our model to incorporate only the effect of CCR5Δ32. Subsequent models could be constructed from our model to account for the complexity of multiple protective alleles. It is interesting to note that our model predicts that even if CCR264I is present at high frequencies in Africa, its protective effects may not augment the lack of a protective allele such as CCR5Δ32.
|
||||
|
||||
Although our models demonstrate that genetic factors can contribute to the high prevalence of HIV in sub-Saharan Africa, demographic factors are also clearly important in this region. Our models explicitly incorporated such factors, for example, lack of treatment availability. Additional factors were implicitly controlled for by varying only the presence of the CCR5Δ32 allele. More complex models eventually could include interactions with infectious diseases that serve as cofactors in HIV transmission. The role of high sexually transmitted disease prevalences in HIV infection has long been discussed, especially in relation to core populations (15, 50, 66). Malaria, too, might influence HIV transmission, as it is associated with transient increases in semen HIV viral loads and thus could increase the susceptibility of the population to epidemic HIV (16).
|
||||
|
||||
In assessing the HIV/AIDS epidemic, considerable attention has been paid to the influence of core groups in driving sexually transmitted disease epidemics. Our results also highlight how characteristics more uniformly distributed in a population can affect susceptibility. We observed that the genotypic profile of a population affects its susceptibility to epidemic HIV/AIDS. Additional studies are needed to better characterize the influence of these genetic determinants on HIV transmission, as they may be crucial in estimating the severity of the epidemic in some populations. This information can influence the design of treatment strategies as well as point to the urgency for education and prevention programs.
|
||||
|
||||
## Acknowledgments
|
||||
|
||||
We thank Mark Krosky, Katia Koelle, and Kevin Chung for programming and technical assistance. We also thank Drs. V. J. DiRita, P. Kazanjian, and S. M. Blower for helpful comments and discussions. We thank the reviewers for extremely insightful comments.
|
||||
|
||||
## References
|
||||
|
||||
- Weiss HA, Hawkes S. Leprosy Rev 72:92–98 (2001). PMID: 11355525
|
||||
- Taha TE, Dallabetta GA, Hoover DR, Chiphangwi JD, Mtimavalye LAR. AIDS 12:197–203 (1998). PMID: 9468369
|
||||
- AIDS Epidemic Update. Geneva: World Health Organization1–17 (1998).
|
||||
- D'Souza MP, Harden VA. Nat Med 2:1293–1300 (1996). PMID: 8946819
|
||||
- Martinson JJ, Chapman NH, Rees DC, Liu YT, Clegg JB. Nat Genet 16:100–103 (1997). PMID: 9140404
|
||||
- Roos MTL, Lange JMA, deGoede REY, Miedema PT, Tersmette F, Coutinho M, Schellekens RA. J Infect Dis 165:427–432 (1992). PMID: 1347054
|
||||
- Garred P, Eugen-Olsen J, Iversen AKN, Benfield TL, Svejgaard A, Hofmann B. Lancet 349:1884 (1997). PMID: 9217763
|
||||
- Katzenstein TL, Eugen-Olsen J, Hofman B, Benfield T, Pedersen C, Iversen AK, Sorensen AM, Garred P, Koppelhus U, Svejgaard A, Gerstoft J. J Acquired Immune Defic Syndr Hum Retrovirol 16:10–14 (1997). PMID: 9377119
|
||||
- deRoda H, Meyer K, Katzenstain W, Dean M. Science 273:1856–1862 (1996). PMID: 8791590
|
||||
- Meyer L, Magierowska M, Hubert JB, Rouzioux C, Deveau C, Sanson F, Debre P, Delfraissy JF, Theodorou I. AIDS 11:F73–F78 (1997). PMID: 9302436
|
||||
- Smith MW, Dean M, Carrington M, Winkler C, Huttley DA, Lomb GA, Goedert JJ, O'Brien TR, Jacobson LP, Kaslow R, et al. Science 277:959–965 (1997). PMID: 9252328
|
||||
- Samson M, Libert F, Doranz BJ, Rucker J, Liesnard C, Farber CM, Saragosti S, Lapoumeroulie C, Cognaux J, Forceille C, et al. Nature (London) 382:722–725 (1996). PMID: 8751444
|
||||
- McNicholl JM, Smith DK, Qari SH, Hodge T. Emerging Infect Dis 3:261–271 (1997). PMID: 9284370
|
||||
- Michael NL, Chang G, Louie LG, Mascola JR, Dondero D, Birx DL, Sheppard HW. Nat Med 3:338–340 (1997). PMID: 9055864
|
||||
- Mayaud P, Mosha F, Todd J, Balira R, Mgara J, West B, Rusizoka M, Mwijarubi E, Gabone R, Gavyole A, et al. AIDS 11:1873–1880 (1997). PMID: 9412707
|
||||
- Hoffman IF, Jere CS, Taylor TE, Munthali P, Dyer JR. AIDS 13:487–494 (1998).
|
||||
- HIV/AIDS Surveillance Database. Washington, DC: Population Division, International Programs Center (1999).
|
||||
- Anderson RM, May RM, McLean AR. Nature (London) 332:228–234 (1988). PMID: 3279320
|
||||
- Berger EA, Doms RW, Fenyo EM, Korber BT, Littman DR, Moore JP, Sattentau QJ, Schuitemaker H, Sodroski J, Weiss RA. Nature (London) 391:240 (1998). PMID: 9440686
|
||||
- Alkhatib G, Broder CC, Berger EA. J Virol 70:5487–5494 (1996). PMID: 8764060
|
||||
- Choe H, Farzan M, Sun Y, Sullivan N, Rollins B, Ponath PD, Wu L, Mackay CR, LaRosa G, Newman W, et al. Cell 85:1135–1148 (1996). PMID: 8674119
|
||||
- Deng H, Liu R, Ellmeier W, Choe S, Unutmaz D, Burkhart M, Di Marzio P, Marmon S, Sutton RE, Hill CM, et al. Nature (London) 381:661–666 (1996). PMID: 8649511
|
||||
- Doranz BJ, Rucker J, Yi Y, Smyth RJ, Samsom M, Peiper M, Parmentier SC, Collman RG, Doms RW. Cell 85:1149–1158 (1996). PMID: 8674120
|
||||
- Dragic T, Litwin V, Allaway GP, Martin SR, Huang Y, Nagashima KA, Cayanan C, Maddon PJ, Koup RA, Moore JP, Paxton WA. Nature (London) 381:667–673 (1996). PMID: 8649512
|
||||
- Zhu T, Mo H, Wang N, Nam DS, Cao Y, Koup RA, Ho DD. Science 261:1179–1181 (1993). PMID: 8356453
|
||||
- Bjorndal A, Deng H, Jansson M, Fiore JR, Colognesi C, Karlsson A, Albert J, Scarlatti G, Littman DR, Fenyo EM. J Virol 71:7478–7487 (1997). PMID: 9311827
|
||||
- Conner RI, Sheridan KE, Ceradinin D, Choe S, Landau NR. J Exp Med 185:621–628 (1997). PMID: 9034141
|
||||
- Liu R, Paxton WA, Choe S, Ceradini D, Martin SR, Horuk R, MacDonald ME, Stuhlmann H, Koup RA, Landau NR. Cell 86:367–377 (1996). PMID: 8756719
|
||||
- Mussico M, Lazzarin A, Nicolosi A, Gasparini M, Costigliola P, Arici C, Saracco A. Arch Intern Med (Moscow) 154:1971–1976 (1994). PMID: 8074601
|
||||
- Michael NL, Nelson JA, KewalRamani VN, Chang G, O'Brien SJ, Mascola JR, Volsky B, Louder M, White GC, Littman DR, et al. J Virol 72:6040–6047 (1998). PMID: 9621067
|
||||
- Hethcote HW, Yorke JA. Gonorrhea Transmission Dynamics and Control. Berlin: Springer (1984).
|
||||
- Anderson RM, May RM. Nature (London) 333:514–522 (1988). PMID: 3374601
|
||||
- Asiimwe-Okiror G, Opio AA, Musinguzi J, Madraa E, Tembo G, Carael M. AIDS 11:1757–1763 (1997). PMID: 9386811
|
||||
- Carael M, Cleland J, Deheneffe JC, Ferry B, Ingham R. AIDS 9:1171–1175 (1995). PMID: 8519454
|
||||
- Blower SM, Boe C. J AIDS 6:1347–1352 (1993). PMID: 8254474
|
||||
- Kirschner D. J Appl Math 56:143–166 (1996).
|
||||
- Le Pont F, Blower S. J AIDS 4:987–999 (1991). PMID: 1890608
|
||||
- Kim MY, Lagakos SW. Ann Epidemiol 1:117–128 (1990). PMID: 1669741
|
||||
- Anderson RM, May RM. Infectious Disease of Humans: Dynamics and Control. Oxford: Oxford Univ. Press (1992).
|
||||
- Ragni MV, Faruki H, Kingsley LA. J Acquired Immune Defic Syndr 17:42–45 (1998).
|
||||
- Kaplan JE, Khabbaz RF, Murphy EL, Hermansen S, Roberts C, Lal R, Heneine W, Wright D, Matijas L, Thomson R, et al. J Acquired Immune Defic Syndr Hum Retrovirol 12:193–201 (1996). PMID: 8680892
|
||||
- Padian NS, Shiboski SC, Glass SO, Vittinghoff E. Am J Edu 146:350–357 (1997).
|
||||
- Leynaert B, Downs AM, de Vincenzi I. Am J Edu 148:88–96 (1998).
|
||||
- Garnett GP, Anderson RM. J Acquired Immune Defic Syndr 9:500–513 (1995).
|
||||
- Stigum H, Magnus P, Harris JR, Samualson SO, Bakketeig LS. Am J Edu 145:636–643 (1997).
|
||||
- Ho DD, Neumann AU, Perelson AS, Chen W, Leonard JM, Markowitz M. Nature (London) 373:123–126 (1995). PMID: 7816094
|
||||
- World Resources (1998–1999). Oxford: Oxford Univ. Press (1999).
|
||||
- Kostrikis LG, Neumann AU, Thomson B, Korber BT, McHardy P, Karanicolas R, Deutsch L, Huang Y, Lew JF, McIntosh K, et al. J Virol 73:10264–10271 (1999). PMID: 10559343
|
||||
- Low-Beer D, Stoneburner RL, Mukulu A. Nat Med 3:553–557 (1997). PMID: 9142126
|
||||
- Grosskurth H, Mosha F, Todd J, Senkoro K, Newell J, Klokke A, Changalucha J, West B, Mayaud P, Gavyole A. AIDS 9:927–934 (1995). PMID: 7576329
|
||||
- Melo J, Beby-Defaux A, Faria C, Guiraud G, Folgosa E, Barreto A, Agius G. J AIDS 23:203–204 (2000). PMID: 10737436
|
||||
- Iman RL, Helton JC, Campbell JE. J Quality Technol 13:174–183 (1981).
|
||||
- Iman RL, Helton JC, Campbell JE. J Quality Technol 13:232–240 (1981).
|
||||
- Blower SM, Dowlatabadi H. Int Stat Rev 62:229–243 (1994).
|
||||
- Porco TC, Blower SM. Theor Popul Biol 54:117–132 (1998). PMID: 9733654
|
||||
- Blower SM, Porco TC, Darby G. Nat Med 4:673–678 (1998). PMID: 9623975
|
||||
- Libert F, Cochaux P, Beckman G, Samson M, Aksenova M, Cao A, Czeizel A, Claustres M, de la Rua C, Ferrari M, et al. Hum Mol Genet 7:399–406 (1998). PMID: 9466996
|
||||
- Lalani AS, Masters J, Zeng W, Barrett J, Pannu R, Everett H, Arendt CW, McFadden G. Science 286:1968–1971 (1999). PMID: 10583963
|
||||
- Kermack WO, McKendrick AG. Proc R Soc London 261:700–721 (1927).
|
||||
- Gupta S, Hill AVS. Proc R Soc London Ser B 260:271–277 (1995).
|
||||
- Ruwende C, Khoo SC, Snow RW, Yates SNR, Kwiatkowski D, Gupta S, Warn P, Allsopp CE, Gilbert SC, Peschu N. Nature (London) 376:246–249 (1995). PMID: 7617034
|
||||
- McDermott DH, Zimmerman PA, Guignard F, Kleeberger CA, Leitman SF, Murphy PM. Lancet 352:866–870 (1998). PMID: 9742978
|
||||
- Kostrikis LG, Huang Y, Moore JP, Wolinsky SM, Zhang L, Guo Y, Deutsch L, Phair J, Neumann AU, Ho DD. Nat Med 4:350–353 (1998). PMID: 9500612
|
||||
- Winkler C, Modi W, Smith MW, Nelson GW, Wu X, Carrington M, Dean M, Honjo T, Tashiro K, Yabe D, et al. Science 279:389–393 (1998). PMID: 9430590
|
||||
- Martinson JJ, Hong L, Karanicolas R, Moore JP, Kostrikis LG. AIDS 14:483–489 (2000). PMID: 10780710
|
||||
- Vernazza PL, Eron JJ, Fiscus SA, Cohen MS. AIDS 13:155–166 (1999). PMID: 10202821
|
7237
tests/data/groundtruth/docling_v2/pntd.0008301.nxml.json
vendored
Normal file
7237
tests/data/groundtruth/docling_v2/pntd.0008301.nxml.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
@ -34,16 +34,16 @@ Table 1 Categorization of potential factors influencing pre-TAS results.
|
||||
|
||||
| Domain | Factor | Covariate | Description | Reference Group | Summary statistic | Temporal Resolution | Source |
|
||||
|------------------------|-----------------------|-------------------------------|-----------------------------------------------------------------|----------------------|---------------------|-----------------------|--------------------|
|
||||
| Prevalence | Baseline prevalence | 5% cut off | Maximum reported mapping or baseline sentinel site prevalence | <5% | Maximum | Varies | Programmatic data |
|
||||
| Prevalence | Baseline prevalence | 10% cut off | Maximum reported mapping or baseline sentinel site prevalence | <10% | Maximum | Varies | Programmatic data |
|
||||
| Agent | Parasite | Parasite | Predominate parasite in district | W. bancrofti & mixed | Binary value | 2018 | Programmatic data |
|
||||
| Environment | Vector | Vector | Predominate vector in district | Anopheles & Mansonia | Binary value | 2018 | Country expert |
|
||||
| Environment | Geography | Elevation | Elevation measured in meters | >350 | Mean | 2000 | CGIAR-CSI SRTM [9] |
|
||||
| Environment | Geography | District area | Area measured in km2 | >2,500 | Maximum sum | Static | Programmatic data |
|
||||
| Environment | Climate | EVI | Enhanced vegetation index | > 0.3 | Mean | 2015 | MODIS [10] |
|
||||
| Prevalence | Baseline prevalence | 5% cut off | Maximum reported mapping or baseline sentinel site prevalence | <5% | Maximum | Varies | Programmatic data |
|
||||
| Prevalence | Baseline prevalence | 10% cut off | Maximum reported mapping or baseline sentinel site prevalence | <10% | Maximum | Varies | Programmatic data |
|
||||
| Agent | Parasite | Parasite | Predominate parasite in district | W. bancrofti & mixed | Binary value | 2018 | Programmatic data |
|
||||
| Environment | Vector | Vector | Predominate vector in district | Anopheles & Mansonia | Binary value | 2018 | Country expert |
|
||||
| Environment | Geography | Elevation | Elevation measured in meters | >350 | Mean | 2000 | CGIAR-CSI SRTM [9] |
|
||||
| Environment | Geography | District area | Area measured in km2 | >2,500 | Maximum sum | Static | Programmatic data |
|
||||
| Environment | Climate | EVI | Enhanced vegetation index | > 0.3 | Mean | 2015 | MODIS [10] |
|
||||
| Environment | Climate | Rainfall | Annual rainfall measured in mm | ≤ 700 | Mean | 2015 | CHIRPS [11] |
|
||||
| Environment | Socio-economic | Population density | Number of people per km2 | ≤ 100 | Mean | 2015 | WorldPop [12] |
|
||||
| Environment | Socio-economic | Nighttime lights | Nighttime light index from 0 to 63 | >1.5 | Mean | 2015 | VIIRS [13] |
|
||||
| Environment | Socio-economic | Nighttime lights | Nighttime light index from 0 to 63 | >1.5 | Mean | 2015 | VIIRS [13] |
|
||||
| Environment | Co-endemicity | Co-endemic for onchocerciasis | Part or all of district is also endemic for onchocerciases | Non-endemic | Binary value | 2018 | Programmatic data |
|
||||
| MDA | Drug efficacy | Drug package | DEC-ALB or IVM-ALB | DEC-ALB | Binary value | 2018 | Programmatic data |
|
||||
| MDA | Implementation of MDA | Coverage | Median MDA coverage for last 5 rounds | ≥ 65% | Median | Varies | Programmatic data |
|
||||
@ -134,12 +134,12 @@ Table 2 Adjusted risk ratios for pre-TAS failure from log-binomial model sensiti
|
||||
| Number of Failures | 74 | 74 | 44 | 72 | 46 |
|
||||
| Number of total districts | (N = 554) | (N = 420) | (N = 407) | (N = 518) | (N = 414) |
|
||||
| Covariate | RR (95% CI) | RR (95% CI) | RR (95% CI) | RR (95% CI) | RR (95% CI) |
|
||||
| Baseline prevalence > = 10% & used FTS test | 2.38 (0.96–5.90) | 1.23 (0.52–2.92) | 14.52 (1.79–117.82) | 2.61 (1.03–6.61) | 15.80 (1.95–127.67) |
|
||||
| Baseline prevalence > = 10% & used ICT test | 0.80 (0.20–3.24) | 0.42 (0.11–1.68) | 1.00 (0.00–0.00) | 0.88 (0.21–3.60) | 1.00 (0.00–0.00) |
|
||||
| Baseline prevalence > = 10% & used FTS test | 2.38 (0.96–5.90) | 1.23 (0.52–2.92) | 14.52 (1.79–117.82) | 2.61 (1.03–6.61) | 15.80 (1.95–127.67) |
|
||||
| Baseline prevalence > = 10% & used ICT test | 0.80 (0.20–3.24) | 0.42 (0.11–1.68) | 1.00 (0.00–0.00) | 0.88 (0.21–3.60) | 1.00 (0.00–0.00) |
|
||||
| +Used FTS test | 1.16 (0.52–2.59) | 2.40 (1.12–5.11) | 0.15 (0.02–1.11) | 1.03 (0.45–2.36) | 0.13 (0.02–0.96) |
|
||||
| +Used ICT test | 0.92 (0.32–2.67) | 1.47 (0.51–4.21) | 0.33 (0.04–2.54) | 0.82 (0.28–2.43) | 0.27 (0.03–2.04) |
|
||||
| +Baseline prevalence > = 10% | 2.52 (1.37–4.64) | 2.42 (1.31–4.47) | 2.03 (1.06–3.90) | 2.30 (1.21–4.36) | 2.01 (1.07–3.77) |
|
||||
| Elevation < 350m | 3.07 (1.95–4.83) | 2.21 (1.42–3.43) | 4.68 (2.22–9.87) | 3.04 (1.93–4.79) | 3.76 (1.92–7.37) |
|
||||
| +Baseline prevalence > = 10% | 2.52 (1.37–4.64) | 2.42 (1.31–4.47) | 2.03 (1.06–3.90) | 2.30 (1.21–4.36) | 2.01 (1.07–3.77) |
|
||||
| Elevation < 350m | 3.07 (1.95–4.83) | 2.21 (1.42–3.43) | 4.68 (2.22–9.87) | 3.04 (1.93–4.79) | 3.76 (1.92–7.37) |
|
||||
|
||||
Overall 74 districts in the dataset failed pre-TAS. Fig 5 summarizes the likelihood of failure by variable combinations identified in the log-binomial model. For those districts with a baseline prevalence ≥10% that used a FTS diagnostic test and have an average elevation below 350 meters (Combination C01), 87% of the 23 districts failed. Of districts with high baseline that used an ICT diagnostic test and have a low average elevation (C02) 45% failed. Overall, combinations with high baseline and low elevation C01, C02, and C04 accounted for 51% of all the failures (38 of 74).
|
||||
|
14628
tests/data/groundtruth/docling_v2/pone.0234687.nxml.json
vendored
Normal file
14628
tests/data/groundtruth/docling_v2/pone.0234687.nxml.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,458 +1,485 @@
|
||||
item-0 at level 0: unspecified: group _root_
|
||||
item-1 at level 1: section: group header-1
|
||||
item-2 at level 2: section_header: Contents
|
||||
item-3 at level 3: list: group list
|
||||
item-4 at level 4: list_item: (Top)
|
||||
item-5 at level 4: list_item: 1 Etymology
|
||||
item-6 at level 5: list: group list
|
||||
item-7 at level 4: list_item: 2 Taxonomy
|
||||
item-3 at level 3: text: move to sidebar
|
||||
item-4 at level 3: text: hide
|
||||
item-5 at level 3: list: group list
|
||||
item-6 at level 4: list_item: (Top)
|
||||
item-7 at level 4: list_item: 1 Etymology
|
||||
item-8 at level 5: list: group list
|
||||
item-9 at level 4: list_item: 3 Morphology
|
||||
item-9 at level 4: list_item: 2 Taxonomy
|
||||
item-10 at level 5: list: group list
|
||||
item-11 at level 4: list_item: 4 Distribution and habitat
|
||||
item-11 at level 4: list_item: 3 Morphology
|
||||
item-12 at level 5: list: group list
|
||||
item-13 at level 4: list_item: 5 Behaviour Toggle Behaviour subsection
|
||||
item-13 at level 4: list_item: 4 Distribution and habitat
|
||||
item-14 at level 5: list: group list
|
||||
item-15 at level 6: list_item: 5.1 Feeding
|
||||
item-16 at level 7: list: group list
|
||||
item-17 at level 6: list_item: 5.2 Breeding
|
||||
item-15 at level 4: list_item: 5 Behaviour Toggle Behaviour subsection
|
||||
item-16 at level 5: list: group list
|
||||
item-17 at level 6: list_item: 5.1 Feeding
|
||||
item-18 at level 7: list: group list
|
||||
item-19 at level 6: list_item: 5.3 Communication
|
||||
item-19 at level 6: list_item: 5.2 Breeding
|
||||
item-20 at level 7: list: group list
|
||||
item-21 at level 6: list_item: 5.4 Predators
|
||||
item-21 at level 6: list_item: 5.3 Communication
|
||||
item-22 at level 7: list: group list
|
||||
item-23 at level 4: list_item: 6 Relationship with humans Toggle Relationship with humans subsection
|
||||
item-24 at level 5: list: group list
|
||||
item-25 at level 6: list_item: 6.1 Hunting
|
||||
item-26 at level 7: list: group list
|
||||
item-27 at level 6: list_item: 6.2 Domestication
|
||||
item-23 at level 6: list_item: 5.4 Predators
|
||||
item-24 at level 7: list: group list
|
||||
item-25 at level 4: list_item: 6 Relationship with humans Toggle Relationship with humans subsection
|
||||
item-26 at level 5: list: group list
|
||||
item-27 at level 6: list_item: 6.1 Hunting
|
||||
item-28 at level 7: list: group list
|
||||
item-29 at level 6: list_item: 6.3 Heraldry
|
||||
item-29 at level 6: list_item: 6.2 Domestication
|
||||
item-30 at level 7: list: group list
|
||||
item-31 at level 6: list_item: 6.4 Cultural references
|
||||
item-31 at level 6: list_item: 6.3 Heraldry
|
||||
item-32 at level 7: list: group list
|
||||
item-33 at level 4: list_item: 7 See also
|
||||
item-34 at level 5: list: group list
|
||||
item-35 at level 4: list_item: 8 Notes Toggle Notes subsection
|
||||
item-33 at level 6: list_item: 6.4 Cultural references
|
||||
item-34 at level 7: list: group list
|
||||
item-35 at level 4: list_item: 7 See also
|
||||
item-36 at level 5: list: group list
|
||||
item-37 at level 6: list_item: 8.1 Citations
|
||||
item-38 at level 7: list: group list
|
||||
item-39 at level 6: list_item: 8.2 Sources
|
||||
item-37 at level 4: list_item: 8 Notes Toggle Notes subsection
|
||||
item-38 at level 5: list: group list
|
||||
item-39 at level 6: list_item: 8.1 Citations
|
||||
item-40 at level 7: list: group list
|
||||
item-41 at level 4: list_item: 9 External links
|
||||
item-42 at level 5: list: group list
|
||||
item-43 at level 1: title: Duck
|
||||
item-44 at level 2: list: group list
|
||||
item-45 at level 3: list_item: Acèh
|
||||
item-46 at level 3: list_item: Afrikaans
|
||||
item-47 at level 3: list_item: Alemannisch
|
||||
item-48 at level 3: list_item: አማርኛ
|
||||
item-49 at level 3: list_item: Ænglisc
|
||||
item-50 at level 3: list_item: العربية
|
||||
item-51 at level 3: list_item: Aragonés
|
||||
item-52 at level 3: list_item: ܐܪܡܝܐ
|
||||
item-53 at level 3: list_item: Armãneashti
|
||||
item-54 at level 3: list_item: Asturianu
|
||||
item-55 at level 3: list_item: Atikamekw
|
||||
item-56 at level 3: list_item: Авар
|
||||
item-57 at level 3: list_item: Aymar aru
|
||||
item-58 at level 3: list_item: تۆرکجه
|
||||
item-59 at level 3: list_item: Basa Bali
|
||||
item-60 at level 3: list_item: বাংলা
|
||||
item-61 at level 3: list_item: 閩南語 / Bân-lâm-gú
|
||||
item-62 at level 3: list_item: Беларуская
|
||||
item-63 at level 3: list_item: Беларуская (тарашкевіца)
|
||||
item-64 at level 3: list_item: Bikol Central
|
||||
item-65 at level 3: list_item: Български
|
||||
item-66 at level 3: list_item: Brezhoneg
|
||||
item-67 at level 3: list_item: Буряад
|
||||
item-68 at level 3: list_item: Català
|
||||
item-69 at level 3: list_item: Чӑвашла
|
||||
item-70 at level 3: list_item: Čeština
|
||||
item-71 at level 3: list_item: ChiShona
|
||||
item-72 at level 3: list_item: Cymraeg
|
||||
item-73 at level 3: list_item: Dagbanli
|
||||
item-74 at level 3: list_item: Dansk
|
||||
item-75 at level 3: list_item: Deitsch
|
||||
item-76 at level 3: list_item: Deutsch
|
||||
item-77 at level 3: list_item: डोटेली
|
||||
item-78 at level 3: list_item: Ελληνικά
|
||||
item-79 at level 3: list_item: Emiliàn e rumagnòl
|
||||
item-80 at level 3: list_item: Español
|
||||
item-81 at level 3: list_item: Esperanto
|
||||
item-82 at level 3: list_item: Euskara
|
||||
item-83 at level 3: list_item: فارسی
|
||||
item-84 at level 3: list_item: Français
|
||||
item-85 at level 3: list_item: Gaeilge
|
||||
item-86 at level 3: list_item: Galego
|
||||
item-87 at level 3: list_item: ГӀалгӀай
|
||||
item-88 at level 3: list_item: 贛語
|
||||
item-89 at level 3: list_item: گیلکی
|
||||
item-90 at level 3: list_item: 𐌲𐌿𐍄𐌹𐍃𐌺
|
||||
item-91 at level 3: list_item: गोंयची कोंकणी / Gõychi Konknni
|
||||
item-92 at level 3: list_item: 客家語 / Hak-kâ-ngî
|
||||
item-93 at level 3: list_item: 한국어
|
||||
item-94 at level 3: list_item: Hausa
|
||||
item-95 at level 3: list_item: Հայերեն
|
||||
item-96 at level 3: list_item: हिन्दी
|
||||
item-97 at level 3: list_item: Hrvatski
|
||||
item-98 at level 3: list_item: Ido
|
||||
item-99 at level 3: list_item: Bahasa Indonesia
|
||||
item-100 at level 3: list_item: Iñupiatun
|
||||
item-101 at level 3: list_item: Íslenska
|
||||
item-102 at level 3: list_item: Italiano
|
||||
item-103 at level 3: list_item: עברית
|
||||
item-104 at level 3: list_item: Jawa
|
||||
item-105 at level 3: list_item: ಕನ್ನಡ
|
||||
item-106 at level 3: list_item: Kapampangan
|
||||
item-107 at level 3: list_item: ქართული
|
||||
item-108 at level 3: list_item: कॉशुर / کٲشُر
|
||||
item-109 at level 3: list_item: Қазақша
|
||||
item-110 at level 3: list_item: Ikirundi
|
||||
item-111 at level 3: list_item: Kongo
|
||||
item-112 at level 3: list_item: Kreyòl ayisyen
|
||||
item-113 at level 3: list_item: Кырык мары
|
||||
item-114 at level 3: list_item: ລາວ
|
||||
item-115 at level 3: list_item: Latina
|
||||
item-116 at level 3: list_item: Latviešu
|
||||
item-117 at level 3: list_item: Lietuvių
|
||||
item-118 at level 3: list_item: Li Niha
|
||||
item-119 at level 3: list_item: Ligure
|
||||
item-120 at level 3: list_item: Limburgs
|
||||
item-121 at level 3: list_item: Lingála
|
||||
item-122 at level 3: list_item: Malagasy
|
||||
item-123 at level 3: list_item: മലയാളം
|
||||
item-124 at level 3: list_item: मराठी
|
||||
item-125 at level 3: list_item: مازِرونی
|
||||
item-126 at level 3: list_item: Bahasa Melayu
|
||||
item-127 at level 3: list_item: ꯃꯤꯇꯩ ꯂꯣꯟ
|
||||
item-128 at level 3: list_item: 閩東語 / Mìng-dĕ̤ng-ngṳ̄
|
||||
item-129 at level 3: list_item: Мокшень
|
||||
item-130 at level 3: list_item: Монгол
|
||||
item-131 at level 3: list_item: မြန်မာဘာသာ
|
||||
item-132 at level 3: list_item: Nederlands
|
||||
item-133 at level 3: list_item: Nedersaksies
|
||||
item-134 at level 3: list_item: नेपाली
|
||||
item-135 at level 3: list_item: नेपाल भाषा
|
||||
item-136 at level 3: list_item: 日本語
|
||||
item-137 at level 3: list_item: Нохчийн
|
||||
item-138 at level 3: list_item: Norsk nynorsk
|
||||
item-139 at level 3: list_item: Occitan
|
||||
item-140 at level 3: list_item: Oromoo
|
||||
item-141 at level 3: list_item: ਪੰਜਾਬੀ
|
||||
item-142 at level 3: list_item: Picard
|
||||
item-143 at level 3: list_item: Plattdüütsch
|
||||
item-144 at level 3: list_item: Polski
|
||||
item-145 at level 3: list_item: Português
|
||||
item-146 at level 3: list_item: Qırımtatarca
|
||||
item-147 at level 3: list_item: Română
|
||||
item-148 at level 3: list_item: Русский
|
||||
item-149 at level 3: list_item: Саха тыла
|
||||
item-150 at level 3: list_item: ᱥᱟᱱᱛᱟᱲᱤ
|
||||
item-151 at level 3: list_item: Sardu
|
||||
item-152 at level 3: list_item: Scots
|
||||
item-153 at level 3: list_item: Seeltersk
|
||||
item-154 at level 3: list_item: Shqip
|
||||
item-155 at level 3: list_item: Sicilianu
|
||||
item-156 at level 3: list_item: සිංහල
|
||||
item-157 at level 3: list_item: Simple English
|
||||
item-158 at level 3: list_item: سنڌي
|
||||
item-159 at level 3: list_item: کوردی
|
||||
item-160 at level 3: list_item: Српски / srpski
|
||||
item-161 at level 3: list_item: Srpskohrvatski / српскохрватски
|
||||
item-162 at level 3: list_item: Sunda
|
||||
item-163 at level 3: list_item: Svenska
|
||||
item-164 at level 3: list_item: Tagalog
|
||||
item-165 at level 3: list_item: தமிழ்
|
||||
item-166 at level 3: list_item: Taqbaylit
|
||||
item-167 at level 3: list_item: Татарча / tatarça
|
||||
item-168 at level 3: list_item: ไทย
|
||||
item-169 at level 3: list_item: Türkçe
|
||||
item-170 at level 3: list_item: Українська
|
||||
item-171 at level 3: list_item: ئۇيغۇرچە / Uyghurche
|
||||
item-172 at level 3: list_item: Vahcuengh
|
||||
item-173 at level 3: list_item: Tiếng Việt
|
||||
item-174 at level 3: list_item: Walon
|
||||
item-175 at level 3: list_item: 文言
|
||||
item-176 at level 3: list_item: Winaray
|
||||
item-177 at level 3: list_item: 吴语
|
||||
item-178 at level 3: list_item: 粵語
|
||||
item-179 at level 3: list_item: Žemaitėška
|
||||
item-180 at level 3: list_item: 中文
|
||||
item-181 at level 2: list: group list
|
||||
item-182 at level 3: list_item: Article
|
||||
item-183 at level 3: list_item: Talk
|
||||
item-184 at level 2: list: group list
|
||||
item-185 at level 2: list: group list
|
||||
item-186 at level 3: list_item: Read
|
||||
item-187 at level 3: list_item: View source
|
||||
item-188 at level 3: list_item: View history
|
||||
item-189 at level 2: text: Tools
|
||||
item-190 at level 2: text: Actions
|
||||
item-41 at level 6: list_item: 8.2 Sources
|
||||
item-42 at level 7: list: group list
|
||||
item-43 at level 4: list_item: 9 External links
|
||||
item-44 at level 5: list: group list
|
||||
item-45 at level 3: text: Toggle the table of contents
|
||||
item-46 at level 1: title: Duck
|
||||
item-47 at level 2: text: 136 languages
|
||||
item-48 at level 2: list: group list
|
||||
item-49 at level 3: list_item: Acèh
|
||||
item-50 at level 3: list_item: Afrikaans
|
||||
item-51 at level 3: list_item: Alemannisch
|
||||
item-52 at level 3: list_item: አማርኛ
|
||||
item-53 at level 3: list_item: Ænglisc
|
||||
item-54 at level 3: list_item: العربية
|
||||
item-55 at level 3: list_item: Aragonés
|
||||
item-56 at level 3: list_item: ܐܪܡܝܐ
|
||||
item-57 at level 3: list_item: Armãneashti
|
||||
item-58 at level 3: list_item: Asturianu
|
||||
item-59 at level 3: list_item: Atikamekw
|
||||
item-60 at level 3: list_item: Авар
|
||||
item-61 at level 3: list_item: Aymar aru
|
||||
item-62 at level 3: list_item: تۆرکجه
|
||||
item-63 at level 3: list_item: Basa Bali
|
||||
item-64 at level 3: list_item: বাংলা
|
||||
item-65 at level 3: list_item: 閩南語 / Bân-lâm-gú
|
||||
item-66 at level 3: list_item: Беларуская
|
||||
item-67 at level 3: list_item: Беларуская (тарашкевіца)
|
||||
item-68 at level 3: list_item: Bikol Central
|
||||
item-69 at level 3: list_item: Български
|
||||
item-70 at level 3: list_item: Brezhoneg
|
||||
item-71 at level 3: list_item: Буряад
|
||||
item-72 at level 3: list_item: Català
|
||||
item-73 at level 3: list_item: Чӑвашла
|
||||
item-74 at level 3: list_item: Čeština
|
||||
item-75 at level 3: list_item: ChiShona
|
||||
item-76 at level 3: list_item: Cymraeg
|
||||
item-77 at level 3: list_item: Dagbanli
|
||||
item-78 at level 3: list_item: Dansk
|
||||
item-79 at level 3: list_item: Deitsch
|
||||
item-80 at level 3: list_item: Deutsch
|
||||
item-81 at level 3: list_item: डोटेली
|
||||
item-82 at level 3: list_item: Ελληνικά
|
||||
item-83 at level 3: list_item: Emiliàn e rumagnòl
|
||||
item-84 at level 3: list_item: Español
|
||||
item-85 at level 3: list_item: Esperanto
|
||||
item-86 at level 3: list_item: Euskara
|
||||
item-87 at level 3: list_item: فارسی
|
||||
item-88 at level 3: list_item: Français
|
||||
item-89 at level 3: list_item: Gaeilge
|
||||
item-90 at level 3: list_item: Galego
|
||||
item-91 at level 3: list_item: ГӀалгӀай
|
||||
item-92 at level 3: list_item: 贛語
|
||||
item-93 at level 3: list_item: گیلکی
|
||||
item-94 at level 3: list_item: 𐌲𐌿𐍄𐌹𐍃𐌺
|
||||
item-95 at level 3: list_item: गोंयची कोंकणी / Gõychi Konknni
|
||||
item-96 at level 3: list_item: 客家語 / Hak-kâ-ngî
|
||||
item-97 at level 3: list_item: 한국어
|
||||
item-98 at level 3: list_item: Hausa
|
||||
item-99 at level 3: list_item: Հայերեն
|
||||
item-100 at level 3: list_item: हिन्दी
|
||||
item-101 at level 3: list_item: Hrvatski
|
||||
item-102 at level 3: list_item: Ido
|
||||
item-103 at level 3: list_item: Bahasa Indonesia
|
||||
item-104 at level 3: list_item: Iñupiatun
|
||||
item-105 at level 3: list_item: Íslenska
|
||||
item-106 at level 3: list_item: Italiano
|
||||
item-107 at level 3: list_item: עברית
|
||||
item-108 at level 3: list_item: Jawa
|
||||
item-109 at level 3: list_item: ಕನ್ನಡ
|
||||
item-110 at level 3: list_item: Kapampangan
|
||||
item-111 at level 3: list_item: ქართული
|
||||
item-112 at level 3: list_item: कॉशुर / کٲشُر
|
||||
item-113 at level 3: list_item: Қазақша
|
||||
item-114 at level 3: list_item: Ikirundi
|
||||
item-115 at level 3: list_item: Kongo
|
||||
item-116 at level 3: list_item: Kreyòl ayisyen
|
||||
item-117 at level 3: list_item: Кырык мары
|
||||
item-118 at level 3: list_item: ລາວ
|
||||
item-119 at level 3: list_item: Latina
|
||||
item-120 at level 3: list_item: Latviešu
|
||||
item-121 at level 3: list_item: Lietuvių
|
||||
item-122 at level 3: list_item: Li Niha
|
||||
item-123 at level 3: list_item: Ligure
|
||||
item-124 at level 3: list_item: Limburgs
|
||||
item-125 at level 3: list_item: Lingála
|
||||
item-126 at level 3: list_item: Malagasy
|
||||
item-127 at level 3: list_item: മലയാളം
|
||||
item-128 at level 3: list_item: मराठी
|
||||
item-129 at level 3: list_item: مازِرونی
|
||||
item-130 at level 3: list_item: Bahasa Melayu
|
||||
item-131 at level 3: list_item: ꯃꯤꯇꯩ ꯂꯣꯟ
|
||||
item-132 at level 3: list_item: 閩東語 / Mìng-dĕ̤ng-ngṳ̄
|
||||
item-133 at level 3: list_item: Мокшень
|
||||
item-134 at level 3: list_item: Монгол
|
||||
item-135 at level 3: list_item: မြန်မာဘာသာ
|
||||
item-136 at level 3: list_item: Nederlands
|
||||
item-137 at level 3: list_item: Nedersaksies
|
||||
item-138 at level 3: list_item: नेपाली
|
||||
item-139 at level 3: list_item: नेपाल भाषा
|
||||
item-140 at level 3: list_item: 日本語
|
||||
item-141 at level 3: list_item: Нохчийн
|
||||
item-142 at level 3: list_item: Norsk nynorsk
|
||||
item-143 at level 3: list_item: Occitan
|
||||
item-144 at level 3: list_item: Oromoo
|
||||
item-145 at level 3: list_item: ਪੰਜਾਬੀ
|
||||
item-146 at level 3: list_item: Picard
|
||||
item-147 at level 3: list_item: Plattdüütsch
|
||||
item-148 at level 3: list_item: Polski
|
||||
item-149 at level 3: list_item: Português
|
||||
item-150 at level 3: list_item: Qırımtatarca
|
||||
item-151 at level 3: list_item: Română
|
||||
item-152 at level 3: list_item: Русский
|
||||
item-153 at level 3: list_item: Саха тыла
|
||||
item-154 at level 3: list_item: ᱥᱟᱱᱛᱟᱲᱤ
|
||||
item-155 at level 3: list_item: Sardu
|
||||
item-156 at level 3: list_item: Scots
|
||||
item-157 at level 3: list_item: Seeltersk
|
||||
item-158 at level 3: list_item: Shqip
|
||||
item-159 at level 3: list_item: Sicilianu
|
||||
item-160 at level 3: list_item: සිංහල
|
||||
item-161 at level 3: list_item: Simple English
|
||||
item-162 at level 3: list_item: سنڌي
|
||||
item-163 at level 3: list_item: کوردی
|
||||
item-164 at level 3: list_item: Српски / srpski
|
||||
item-165 at level 3: list_item: Srpskohrvatski / српскохрватски
|
||||
item-166 at level 3: list_item: Sunda
|
||||
item-167 at level 3: list_item: Svenska
|
||||
item-168 at level 3: list_item: Tagalog
|
||||
item-169 at level 3: list_item: தமிழ்
|
||||
item-170 at level 3: list_item: Taqbaylit
|
||||
item-171 at level 3: list_item: Татарча / tatarça
|
||||
item-172 at level 3: list_item: ไทย
|
||||
item-173 at level 3: list_item: Türkçe
|
||||
item-174 at level 3: list_item: Українська
|
||||
item-175 at level 3: list_item: ئۇيغۇرچە / Uyghurche
|
||||
item-176 at level 3: list_item: Vahcuengh
|
||||
item-177 at level 3: list_item: Tiếng Việt
|
||||
item-178 at level 3: list_item: Walon
|
||||
item-179 at level 3: list_item: 文言
|
||||
item-180 at level 3: list_item: Winaray
|
||||
item-181 at level 3: list_item: 吴语
|
||||
item-182 at level 3: list_item: 粵語
|
||||
item-183 at level 3: list_item: Žemaitėška
|
||||
item-184 at level 3: list_item: 中文
|
||||
item-185 at level 2: text: Edit links
|
||||
item-186 at level 2: list: group list
|
||||
item-187 at level 3: list_item: Article
|
||||
item-188 at level 3: list_item: Talk
|
||||
item-189 at level 2: text: English
|
||||
item-190 at level 2: list: group list
|
||||
item-191 at level 2: list: group list
|
||||
item-192 at level 3: list_item: Read
|
||||
item-193 at level 3: list_item: View source
|
||||
item-194 at level 3: list_item: View history
|
||||
item-195 at level 2: text: General
|
||||
item-196 at level 2: list: group list
|
||||
item-197 at level 3: list_item: What links here
|
||||
item-198 at level 3: list_item: Related changes
|
||||
item-199 at level 3: list_item: Upload file
|
||||
item-200 at level 3: list_item: Special pages
|
||||
item-201 at level 3: list_item: Permanent link
|
||||
item-202 at level 3: list_item: Page information
|
||||
item-203 at level 3: list_item: Cite this page
|
||||
item-204 at level 3: list_item: Get shortened URL
|
||||
item-205 at level 3: list_item: Download QR code
|
||||
item-206 at level 3: list_item: Wikidata item
|
||||
item-207 at level 2: text: Print/export
|
||||
item-208 at level 2: list: group list
|
||||
item-209 at level 3: list_item: Download as PDF
|
||||
item-210 at level 3: list_item: Printable version
|
||||
item-211 at level 2: text: In other projects
|
||||
item-212 at level 2: list: group list
|
||||
item-213 at level 3: list_item: Wikimedia Commons
|
||||
item-214 at level 3: list_item: Wikiquote
|
||||
item-215 at level 2: text: Appearance
|
||||
item-216 at level 2: picture
|
||||
item-217 at level 2: text: From Wikipedia, the free encyclopedia
|
||||
item-218 at level 2: text: Common name for many species of bird
|
||||
item-219 at level 2: text: This article is about the bird. ... as a food, see . For other uses, see .
|
||||
item-220 at level 2: text: "Duckling" redirects here. For other uses, see .
|
||||
item-221 at level 2: table with [13x2]
|
||||
item-222 at level 2: text: Duck is the common name for nume ... und in both fresh water and sea water.
|
||||
item-223 at level 2: text: Ducks are sometimes confused wit ... divers, grebes, gallinules and coots.
|
||||
item-224 at level 2: section_header: Etymology
|
||||
item-225 at level 3: text: The word duck comes from Old Eng ... h duiken and German tauchen 'to dive'.
|
||||
item-226 at level 3: picture
|
||||
item-226 at level 4: caption: Pacific black duck displaying the characteristic upending "duck"
|
||||
item-227 at level 3: text: This word replaced Old English e ... nskrit ātí 'water bird', among others.
|
||||
item-228 at level 3: text: A duckling is a young duck in do ... , is sometimes labelled as a duckling.
|
||||
item-229 at level 3: text: A male is called a drake and the ... a duck, or in ornithology a hen.[3][4]
|
||||
item-230 at level 3: picture
|
||||
item-230 at level 4: caption: Male mallard.
|
||||
item-231 at level 3: picture
|
||||
item-231 at level 4: caption: Wood ducks.
|
||||
item-232 at level 2: section_header: Taxonomy
|
||||
item-233 at level 3: text: All ducks belong to the biologic ... ationships between various species.[9]
|
||||
item-234 at level 3: picture
|
||||
item-234 at level 4: caption: Mallard landing in approach
|
||||
item-235 at level 3: text: In most modern classifications, ... all size and stiff, upright tails.[14]
|
||||
item-236 at level 3: text: A number of other species called ... shelducks in the tribe Tadornini.[15]
|
||||
item-237 at level 2: section_header: Morphology
|
||||
item-238 at level 3: picture
|
||||
item-238 at level 4: caption: Male Mandarin duck
|
||||
item-239 at level 3: text: The overall body plan of ducks i ... is moult typically precedes migration.
|
||||
item-240 at level 3: text: The drakes of northern species o ... rkscrew shaped vagina to prevent rape.
|
||||
item-241 at level 2: section_header: Distribution and habitat
|
||||
item-242 at level 3: picture
|
||||
item-242 at level 4: caption: Flying steamer ducks in Ushuaia, Argentina
|
||||
item-243 at level 3: text: Ducks have a cosmopolitan distri ... endemic to such far-flung islands.[21]
|
||||
item-195 at level 2: text: Tools
|
||||
item-196 at level 2: text: Tools
|
||||
item-197 at level 2: text: move to sidebar
|
||||
item-198 at level 2: text: hide
|
||||
item-199 at level 2: text: Actions
|
||||
item-200 at level 2: list: group list
|
||||
item-201 at level 3: list_item: Read
|
||||
item-202 at level 3: list_item: View source
|
||||
item-203 at level 3: list_item: View history
|
||||
item-204 at level 2: text: General
|
||||
item-205 at level 2: list: group list
|
||||
item-206 at level 3: list_item: What links here
|
||||
item-207 at level 3: list_item: Related changes
|
||||
item-208 at level 3: list_item: Upload file
|
||||
item-209 at level 3: list_item: Special pages
|
||||
item-210 at level 3: list_item: Permanent link
|
||||
item-211 at level 3: list_item: Page information
|
||||
item-212 at level 3: list_item: Cite this page
|
||||
item-213 at level 3: list_item: Get shortened URL
|
||||
item-214 at level 3: list_item: Download QR code
|
||||
item-215 at level 3: list_item: Wikidata item
|
||||
item-216 at level 2: text: Print/export
|
||||
item-217 at level 2: list: group list
|
||||
item-218 at level 3: list_item: Download as PDF
|
||||
item-219 at level 3: list_item: Printable version
|
||||
item-220 at level 2: text: In other projects
|
||||
item-221 at level 2: list: group list
|
||||
item-222 at level 3: list_item: Wikimedia Commons
|
||||
item-223 at level 3: list_item: Wikiquote
|
||||
item-224 at level 2: text: Appearance
|
||||
item-225 at level 2: text: move to sidebar
|
||||
item-226 at level 2: text: hide
|
||||
item-227 at level 2: text: From Wikipedia, the free encyclopedia
|
||||
item-228 at level 2: text: (Redirected from Duckling)
|
||||
item-229 at level 2: text: Common name for many species of bird
|
||||
item-230 at level 2: text: This article is about the bird. ... other uses, see Duck (disambiguation).
|
||||
item-231 at level 2: text: "Duckling" redirects here. For other uses, see Duckling (disambiguation).
|
||||
item-232 at level 2: picture
|
||||
item-233 at level 2: picture
|
||||
item-234 at level 2: table with [13x2]
|
||||
item-235 at level 2: text: Duck is the common name for nume ... und in both fresh water and sea water.
|
||||
item-236 at level 2: text: Ducks are sometimes confused wit ... divers, grebes, gallinules and coots.
|
||||
item-237 at level 2: section_header: Etymology
|
||||
item-238 at level 3: text: The word duck comes from Old Eng ... h duiken and German tauchen 'to dive'.
|
||||
item-239 at level 3: picture
|
||||
item-239 at level 4: caption: Pacific black duck displaying the characteristic upending "duck"
|
||||
item-240 at level 3: text: This word replaced Old English e ... nskrit ātí 'water bird', among others.
|
||||
item-241 at level 3: text: A duckling is a young duck in do ... , is sometimes labelled as a duckling.
|
||||
item-242 at level 3: text: A male is called a drake and the ... a duck, or in ornithology a hen.[3][4]
|
||||
item-243 at level 3: picture
|
||||
item-243 at level 4: caption: Male mallard.
|
||||
item-244 at level 3: picture
|
||||
item-244 at level 4: caption: Female mallard in Cornwall, England
|
||||
item-245 at level 3: text: Some duck species, mainly those ... t form after localised heavy rain.[23]
|
||||
item-246 at level 2: section_header: Behaviour
|
||||
item-247 at level 3: section_header: Feeding
|
||||
item-248 at level 4: picture
|
||||
item-248 at level 5: caption: Pecten along the bill
|
||||
item-249 at level 4: picture
|
||||
item-249 at level 5: caption: Mallard duckling preening
|
||||
item-250 at level 4: text: Ducks eat food sources such as g ... amphibians, worms, and small molluscs.
|
||||
item-251 at level 4: text: Dabbling ducks feed on the surfa ... thers and to hold slippery food items.
|
||||
item-252 at level 4: text: Diving ducks and sea ducks forag ... ave more difficulty taking off to fly.
|
||||
item-253 at level 4: text: A few specialized species such a ... apted to catch and swallow large fish.
|
||||
item-254 at level 4: text: The others have the characterist ... e nostrils come out through hard horn.
|
||||
item-255 at level 4: text: The Guardian published an articl ... the ducks and pollutes waterways.[25]
|
||||
item-256 at level 3: section_header: Breeding
|
||||
item-257 at level 4: picture
|
||||
item-257 at level 5: caption: A Muscovy duckling
|
||||
item-258 at level 4: text: Ducks generally only have one pa ... st and led her ducklings to water.[28]
|
||||
item-259 at level 3: section_header: Communication
|
||||
item-260 at level 4: text: Female mallard ducks (as well as ... laying calls or quieter contact calls.
|
||||
item-261 at level 4: text: A common urban legend claims tha ... annel television show MythBusters.[32]
|
||||
item-262 at level 3: section_header: Predators
|
||||
item-263 at level 4: picture
|
||||
item-263 at level 5: caption: Ringed teal
|
||||
item-264 at level 4: text: Ducks have many predators. Duckl ... or large birds, such as hawks or owls.
|
||||
item-265 at level 4: text: Adult ducks are fast fliers, but ... its speed and strength to catch ducks.
|
||||
item-266 at level 2: section_header: Relationship with humans
|
||||
item-267 at level 3: section_header: Hunting
|
||||
item-268 at level 4: text: Humans have hunted ducks since p ... evidence of this is uncommon.[35][42]
|
||||
item-269 at level 4: text: In many areas, wild ducks (inclu ... inated by pollutants such as PCBs.[44]
|
||||
item-270 at level 3: section_header: Domestication
|
||||
item-271 at level 4: picture
|
||||
item-271 at level 5: caption: Indian Runner ducks, a common breed of domestic ducks
|
||||
item-272 at level 4: text: Ducks have many economic uses, b ... it weighs less than 1 kg (2.2 lb).[48]
|
||||
item-273 at level 3: section_header: Heraldry
|
||||
item-274 at level 4: picture
|
||||
item-274 at level 5: caption: Three black-colored ducks in the coat of arms of Maaninka[49]
|
||||
item-275 at level 4: text: Ducks appear on several coats of ... the coat of arms of Föglö (Åland).[51]
|
||||
item-276 at level 3: section_header: Cultural references
|
||||
item-277 at level 4: text: In 2002, psychologist Richard Wi ... 54] and was made into a movie in 1986.
|
||||
item-278 at level 4: text: The 1992 Disney film The Mighty ... Ducks minor league baseball team.[55]
|
||||
item-279 at level 2: section_header: See also
|
||||
item-280 at level 3: list: group list
|
||||
item-281 at level 4: list_item: Birds portal
|
||||
item-282 at level 3: list: group list
|
||||
item-283 at level 4: list_item: Domestic duck
|
||||
item-284 at level 4: list_item: Duck as food
|
||||
item-285 at level 4: list_item: Duck test
|
||||
item-286 at level 4: list_item: Duck breeds
|
||||
item-287 at level 4: list_item: Fictional ducks
|
||||
item-288 at level 4: list_item: Rubber duck
|
||||
item-289 at level 2: section_header: Notes
|
||||
item-290 at level 3: section_header: Citations
|
||||
item-291 at level 4: list: group ordered list
|
||||
item-292 at level 5: list_item: ^ "Duckling". The American Herit ... n Company. 2006. Retrieved 2015-05-22.
|
||||
item-293 at level 5: list_item: ^ "Duckling". Kernerman English ... Ltd. 2000–2006. Retrieved 2015-05-22.
|
||||
item-294 at level 5: list_item: ^ Dohner, Janet Vorwald (2001). ... University Press. ISBN 978-0300138139.
|
||||
item-295 at level 5: list_item: ^ Visca, Curt; Visca, Kelley (20 ... Publishing Group. ISBN 9780823961566.
|
||||
item-296 at level 5: list_item: ^ a b c d Carboneras 1992, p. 536.
|
||||
item-297 at level 5: list_item: ^ Livezey 1986, pp. 737–738.
|
||||
item-298 at level 5: list_item: ^ Madsen, McHugh & de Kloet 1988, p. 452.
|
||||
item-299 at level 5: list_item: ^ Donne-Goussé, Laudet & Hänni 2002, pp. 353–354.
|
||||
item-300 at level 5: list_item: ^ a b c d e f Carboneras 1992, p. 540.
|
||||
item-301 at level 5: list_item: ^ Elphick, Dunning & Sibley 2001, p. 191.
|
||||
item-302 at level 5: list_item: ^ Kear 2005, p. 448.
|
||||
item-303 at level 5: list_item: ^ Kear 2005, p. 622–623.
|
||||
item-304 at level 5: list_item: ^ Kear 2005, p. 686.
|
||||
item-305 at level 5: list_item: ^ Elphick, Dunning & Sibley 2001, p. 193.
|
||||
item-306 at level 5: list_item: ^ a b c d e f g Carboneras 1992, p. 537.
|
||||
item-307 at level 5: list_item: ^ American Ornithologists' Union 1998, p. xix.
|
||||
item-308 at level 5: list_item: ^ American Ornithologists' Union 1998.
|
||||
item-309 at level 5: list_item: ^ Carboneras 1992, p. 538.
|
||||
item-310 at level 5: list_item: ^ Christidis & Boles 2008, p. 62.
|
||||
item-311 at level 5: list_item: ^ Shirihai 2008, pp. 239, 245.
|
||||
item-312 at level 5: list_item: ^ a b Pratt, Bruner & Berrett 1987, pp. 98–107.
|
||||
item-313 at level 5: list_item: ^ Fitter, Fitter & Hosking 2000, pp. 52–3.
|
||||
item-314 at level 5: list_item: ^ "Pacific Black Duck". www.wiresnr.org. Retrieved 2018-04-27.
|
||||
item-315 at level 5: list_item: ^ Ogden, Evans. "Dabbling Ducks". CWE. Retrieved 2006-11-02.
|
||||
item-316 at level 5: list_item: ^ Karl Mathiesen (16 March 2015) ... Guardian. Retrieved 13 November 2016.
|
||||
item-317 at level 5: list_item: ^ Rohwer, Frank C.; Anderson, Mi ... 4615-6787-5_4. ISBN 978-1-4615-6789-9.
|
||||
item-318 at level 5: list_item: ^ Smith, Cyndi M.; Cooke, Fred; ... 093/condor/102.1.201. hdl:10315/13797.
|
||||
item-319 at level 5: list_item: ^ "If You Find An Orphaned Duckl ... l on 2018-09-23. Retrieved 2018-12-22.
|
||||
item-320 at level 5: list_item: ^ Carver, Heather (2011). The Du ... 9780557901562.[self-published source]
|
||||
item-321 at level 5: list_item: ^ Titlow, Budd (2013-09-03). Bir ... man & Littlefield. ISBN 9780762797707.
|
||||
item-322 at level 5: list_item: ^ Amos, Jonathan (2003-09-08). " ... kers". BBC News. Retrieved 2006-11-02.
|
||||
item-323 at level 5: list_item: ^ "Mythbusters Episode 8". 12 December 2003.
|
||||
item-324 at level 5: list_item: ^ Erlandson 1994, p. 171.
|
||||
item-325 at level 5: list_item: ^ Jeffries 2008, pp. 168, 243.
|
||||
item-326 at level 5: list_item: ^ a b Sued-Badillo 2003, p. 65.
|
||||
item-327 at level 5: list_item: ^ Thorpe 1996, p. 68.
|
||||
item-328 at level 5: list_item: ^ Maisels 1999, p. 42.
|
||||
item-329 at level 5: list_item: ^ Rau 1876, p. 133.
|
||||
item-330 at level 5: list_item: ^ Higman 2012, p. 23.
|
||||
item-331 at level 5: list_item: ^ Hume 2012, p. 53.
|
||||
item-332 at level 5: list_item: ^ Hume 2012, p. 52.
|
||||
item-333 at level 5: list_item: ^ Fieldhouse 2002, p. 167.
|
||||
item-334 at level 5: list_item: ^ Livingston, A. D. (1998-01-01) ... Editions, Limited. ISBN 9781853263774.
|
||||
item-335 at level 5: list_item: ^ "Study plan for waterfowl inju ... on 2022-10-09. Retrieved 2 July 2019.
|
||||
item-336 at level 5: list_item: ^ "FAOSTAT". www.fao.org. Retrieved 2019-10-25.
|
||||
item-337 at level 5: list_item: ^ "Anas platyrhynchos, Domestic ... . Digimorph.org. Retrieved 2012-12-23.
|
||||
item-338 at level 5: list_item: ^ Sy Montgomery. "Mallard; Encyc ... Britannica.com. Retrieved 2012-12-23.
|
||||
item-339 at level 5: list_item: ^ Glenday, Craig (2014). Guinnes ... ited. pp. 135. ISBN 978-1-908843-15-9.
|
||||
item-340 at level 5: list_item: ^ Suomen kunnallisvaakunat (in F ... tto. 1982. p. 147. ISBN 951-773-085-3.
|
||||
item-341 at level 5: list_item: ^ "Lubānas simbolika" (in Latvian). Retrieved September 9, 2021.
|
||||
item-342 at level 5: list_item: ^ "Föglö" (in Swedish). Retrieved September 9, 2021.
|
||||
item-343 at level 5: list_item: ^ Young, Emma. "World's funniest ... w Scientist. Retrieved 7 January 2019.
|
||||
item-344 at level 5: list_item: ^ "Howard the Duck (character)". Grand Comics Database.
|
||||
item-345 at level 5: list_item: ^ Sanderson, Peter; Gilbert, Lau ... luding this bad-tempered talking duck.
|
||||
item-346 at level 5: list_item: ^ "The Duck". University of Oregon Athletics. Retrieved 2022-01-20.
|
||||
item-347 at level 3: section_header: Sources
|
||||
item-348 at level 4: list: group list
|
||||
item-349 at level 5: list_item: American Ornithologists' Union ( ... (PDF) from the original on 2022-10-09.
|
||||
item-350 at level 5: list_item: Carboneras, Carlos (1992). del H ... Lynx Edicions. ISBN 978-84-87334-10-8.
|
||||
item-351 at level 5: list_item: Christidis, Les; Boles, Walter E ... ro Publishing. ISBN 978-0-643-06511-6.
|
||||
item-352 at level 5: list_item: Donne-Goussé, Carole; Laudet, Vi ... /S1055-7903(02)00019-2. PMID 12099792.
|
||||
item-353 at level 5: list_item: Elphick, Chris; Dunning, John B. ... istopher Helm. ISBN 978-0-7136-6250-4.
|
||||
item-354 at level 5: list_item: Erlandson, Jon M. (1994). Early ... usiness Media. ISBN 978-1-4419-3231-0.
|
||||
item-355 at level 5: list_item: Fieldhouse, Paul (2002). Food, F ... ara: ABC-CLIO. ISBN 978-1-61069-412-4.
|
||||
item-356 at level 5: list_item: Fitter, Julian; Fitter, Daniel; ... versity Press. ISBN 978-0-691-10295-5.
|
||||
item-357 at level 5: list_item: Higman, B. W. (2012). How Food M ... Wiley & Sons. ISBN 978-1-4051-8947-7.
|
||||
item-358 at level 5: list_item: Hume, Julian H. (2012). Extinct ... istopher Helm. ISBN 978-1-4729-3744-5.
|
||||
item-359 at level 5: list_item: Jeffries, Richard (2008). Holoce ... Alabama Press. ISBN 978-0-8173-1658-7.
|
||||
item-360 at level 5: list_item: Kear, Janet, ed. (2005). Ducks, ... versity Press. ISBN 978-0-19-861009-0.
|
||||
item-361 at level 5: list_item: Livezey, Bradley C. (October 198 ... (PDF) from the original on 2022-10-09.
|
||||
item-362 at level 5: list_item: Madsen, Cort S.; McHugh, Kevin P ... (PDF) from the original on 2022-10-09.
|
||||
item-363 at level 5: list_item: Maisels, Charles Keith (1999). E ... on: Routledge. ISBN 978-0-415-10975-8.
|
||||
item-364 at level 5: list_item: Pratt, H. Douglas; Bruner, Phill ... University Press. ISBN 0-691-02399-9.
|
||||
item-365 at level 5: list_item: Rau, Charles (1876). Early Man i ... ork: Harper & Brothers. LCCN 05040168.
|
||||
item-366 at level 5: list_item: Shirihai, Hadoram (2008). A Comp ... versity Press. ISBN 978-0-691-13666-0.
|
||||
item-367 at level 5: list_item: Sued-Badillo, Jalil (2003). Auto ... Paris: UNESCO. ISBN 978-92-3-103832-7.
|
||||
item-368 at level 5: list_item: Thorpe, I. J. (1996). The Origin ... rk: Routledge. ISBN 978-0-415-08009-5.
|
||||
item-369 at level 2: section_header: External links
|
||||
item-370 at level 3: list: group list
|
||||
item-371 at level 4: list_item: Definitions from Wiktionary
|
||||
item-372 at level 4: list_item: Media from Commons
|
||||
item-373 at level 4: list_item: Quotations from Wikiquote
|
||||
item-374 at level 4: list_item: Recipes from Wikibooks
|
||||
item-375 at level 4: list_item: Taxa from Wikispecies
|
||||
item-376 at level 4: list_item: Data from Wikidata
|
||||
item-377 at level 3: list: group list
|
||||
item-378 at level 4: list_item: list of books (useful looking abstracts)
|
||||
item-379 at level 4: list_item: Ducks on postage stamps Archived 2013-05-13 at the Wayback Machine
|
||||
item-380 at level 4: list_item: Ducks at a Distance, by Rob Hine ... uide to identification of US waterfowl
|
||||
item-381 at level 3: table with [3x2]
|
||||
item-382 at level 3: picture
|
||||
item-383 at level 3: text: Retrieved from ""
|
||||
item-384 at level 3: text: :
|
||||
item-385 at level 3: list: group list
|
||||
item-386 at level 4: list_item: Ducks
|
||||
item-387 at level 4: list_item: Game birds
|
||||
item-388 at level 4: list_item: Bird common names
|
||||
item-389 at level 3: text: Hidden categories:
|
||||
item-390 at level 3: list: group list
|
||||
item-391 at level 4: list_item: All accuracy disputes
|
||||
item-392 at level 4: list_item: Accuracy disputes from February 2020
|
||||
item-393 at level 4: list_item: CS1 Finnish-language sources (fi)
|
||||
item-394 at level 4: list_item: CS1 Latvian-language sources (lv)
|
||||
item-395 at level 4: list_item: CS1 Swedish-language sources (sv)
|
||||
item-396 at level 4: list_item: Articles with short description
|
||||
item-397 at level 4: list_item: Short description is different from Wikidata
|
||||
item-398 at level 4: list_item: Wikipedia indefinitely move-protected pages
|
||||
item-399 at level 4: list_item: Wikipedia indefinitely semi-protected pages
|
||||
item-400 at level 4: list_item: Articles with 'species' microformats
|
||||
item-401 at level 4: list_item: Articles containing Old English (ca. 450-1100)-language text
|
||||
item-402 at level 4: list_item: Articles containing Dutch-language text
|
||||
item-403 at level 4: list_item: Articles containing German-language text
|
||||
item-404 at level 4: list_item: Articles containing Norwegian-language text
|
||||
item-405 at level 4: list_item: Articles containing Lithuanian-language text
|
||||
item-406 at level 4: list_item: Articles containing Ancient Greek (to 1453)-language text
|
||||
item-407 at level 4: list_item: All articles with self-published sources
|
||||
item-408 at level 4: list_item: Articles with self-published sources from February 2020
|
||||
item-409 at level 4: list_item: All articles with unsourced statements
|
||||
item-410 at level 4: list_item: Articles with unsourced statements from January 2022
|
||||
item-411 at level 4: list_item: CS1: long volume value
|
||||
item-412 at level 4: list_item: Pages using Sister project links with wikidata mismatch
|
||||
item-413 at level 4: list_item: Pages using Sister project links with hidden wikidata
|
||||
item-414 at level 4: list_item: Webarchive template wayback links
|
||||
item-415 at level 4: list_item: Articles with Project Gutenberg links
|
||||
item-416 at level 4: list_item: Articles containing video clips
|
||||
item-417 at level 3: list: group list
|
||||
item-418 at level 4: list_item: This page was last edited on 21 September 2024, at 12:11 (UTC).
|
||||
item-419 at level 4: list_item: Text is available under the Crea ... tion, Inc., a non-profit organization.
|
||||
item-420 at level 3: list: group list
|
||||
item-421 at level 4: list_item: Privacy policy
|
||||
item-422 at level 4: list_item: About Wikipedia
|
||||
item-423 at level 4: list_item: Disclaimers
|
||||
item-424 at level 4: list_item: Contact Wikipedia
|
||||
item-425 at level 4: list_item: Code of Conduct
|
||||
item-426 at level 4: list_item: Developers
|
||||
item-427 at level 4: list_item: Statistics
|
||||
item-428 at level 4: list_item: Cookie statement
|
||||
item-429 at level 4: list_item: Mobile view
|
||||
item-430 at level 3: list: group list
|
||||
item-431 at level 3: list: group list
|
||||
item-432 at level 1: caption: Pacific black duck displaying the characteristic upending "duck"
|
||||
item-433 at level 1: caption: Male mallard.
|
||||
item-434 at level 1: caption: Wood ducks.
|
||||
item-435 at level 1: caption: Mallard landing in approach
|
||||
item-436 at level 1: caption: Male Mandarin duck
|
||||
item-437 at level 1: caption: Flying steamer ducks in Ushuaia, Argentina
|
||||
item-438 at level 1: caption: Female mallard in Cornwall, England
|
||||
item-439 at level 1: caption: Pecten along the bill
|
||||
item-440 at level 1: caption: Mallard duckling preening
|
||||
item-441 at level 1: caption: A Muscovy duckling
|
||||
item-442 at level 1: caption: Ringed teal
|
||||
item-443 at level 1: caption: Indian Runner ducks, a common breed of domestic ducks
|
||||
item-444 at level 1: caption: Three black-colored ducks in the coat of arms of Maaninka[49]
|
||||
item-244 at level 4: caption: Wood ducks.
|
||||
item-245 at level 2: section_header: Taxonomy
|
||||
item-246 at level 3: text: All ducks belong to the biologic ... ationships between various species.[9]
|
||||
item-247 at level 3: picture
|
||||
item-247 at level 4: caption: Mallard landing in approach
|
||||
item-248 at level 3: text: In most modern classifications, ... all size and stiff, upright tails.[14]
|
||||
item-249 at level 3: text: A number of other species called ... shelducks in the tribe Tadornini.[15]
|
||||
item-250 at level 2: section_header: Morphology
|
||||
item-251 at level 3: picture
|
||||
item-251 at level 4: caption: Male Mandarin duck
|
||||
item-252 at level 3: text: The overall body plan of ducks i ... is moult typically precedes migration.
|
||||
item-253 at level 3: text: The drakes of northern species o ... rkscrew shaped vagina to prevent rape.
|
||||
item-254 at level 2: section_header: Distribution and habitat
|
||||
item-255 at level 3: text: See also: List of Anseriformes by population
|
||||
item-256 at level 3: picture
|
||||
item-256 at level 4: caption: Flying steamer ducks in Ushuaia, Argentina
|
||||
item-257 at level 3: text: Ducks have a cosmopolitan distri ... endemic to such far-flung islands.[21]
|
||||
item-258 at level 3: picture
|
||||
item-258 at level 4: caption: Female mallard in Cornwall, England
|
||||
item-259 at level 3: text: Some duck species, mainly those ... t form after localised heavy rain.[23]
|
||||
item-260 at level 2: section_header: Behaviour
|
||||
item-261 at level 3: section_header: Feeding
|
||||
item-262 at level 4: picture
|
||||
item-262 at level 5: caption: Pecten along the bill
|
||||
item-263 at level 4: text: Ducks eat food sources such as g ... amphibians, worms, and small molluscs.
|
||||
item-264 at level 4: text: Dabbling ducks feed on the surfa ... thers and to hold slippery food items.
|
||||
item-265 at level 4: text: Diving ducks and sea ducks forag ... ave more difficulty taking off to fly.
|
||||
item-266 at level 4: text: A few specialized species such a ... apted to catch and swallow large fish.
|
||||
item-267 at level 4: text: The others have the characterist ... e nostrils come out through hard horn.
|
||||
item-268 at level 4: text: The Guardian published an articl ... the ducks and pollutes waterways.[25]
|
||||
item-269 at level 3: section_header: Breeding
|
||||
item-270 at level 4: picture
|
||||
item-270 at level 5: caption: A Muscovy duckling
|
||||
item-271 at level 4: text: Ducks generally only have one pa ... st and led her ducklings to water.[28]
|
||||
item-272 at level 3: section_header: Communication
|
||||
item-273 at level 4: text: Female mallard ducks (as well as ... laying calls or quieter contact calls.
|
||||
item-274 at level 4: text: A common urban legend claims tha ... annel television show MythBusters.[32]
|
||||
item-275 at level 3: section_header: Predators
|
||||
item-276 at level 4: picture
|
||||
item-276 at level 5: caption: Ringed teal
|
||||
item-277 at level 4: text: Ducks have many predators. Duckl ... or large birds, such as hawks or owls.
|
||||
item-278 at level 4: text: Adult ducks are fast fliers, but ... its speed and strength to catch ducks.
|
||||
item-279 at level 2: section_header: Relationship with humans
|
||||
item-280 at level 3: section_header: Hunting
|
||||
item-281 at level 4: text: Main article: Waterfowl hunting
|
||||
item-282 at level 4: text: Humans have hunted ducks since p ... evidence of this is uncommon.[35][42]
|
||||
item-283 at level 4: text: In many areas, wild ducks (inclu ... inated by pollutants such as PCBs.[44]
|
||||
item-284 at level 3: section_header: Domestication
|
||||
item-285 at level 4: text: Main article: Domestic duck
|
||||
item-286 at level 4: picture
|
||||
item-286 at level 5: caption: Indian Runner ducks, a common breed of domestic ducks
|
||||
item-287 at level 4: text: Ducks have many economic uses, b ... it weighs less than 1 kg (2.2 lb).[48]
|
||||
item-288 at level 3: section_header: Heraldry
|
||||
item-289 at level 4: picture
|
||||
item-289 at level 5: caption: Three black-colored ducks in the coat of arms of Maaninka[49]
|
||||
item-290 at level 4: text: Ducks appear on several coats of ... the coat of arms of Föglö (Åland).[51]
|
||||
item-291 at level 3: section_header: Cultural references
|
||||
item-292 at level 4: text: In 2002, psychologist Richard Wi ... 54] and was made into a movie in 1986.
|
||||
item-293 at level 4: text: The 1992 Disney film The Mighty ... Ducks minor league baseball team.[55]
|
||||
item-294 at level 2: section_header: See also
|
||||
item-295 at level 3: list: group list
|
||||
item-296 at level 4: list_item: Birds portal
|
||||
item-297 at level 4: picture
|
||||
item-298 at level 3: list: group list
|
||||
item-299 at level 4: list_item: Domestic duck
|
||||
item-300 at level 4: list_item: Duck as food
|
||||
item-301 at level 4: list_item: Duck test
|
||||
item-302 at level 4: list_item: Duck breeds
|
||||
item-303 at level 4: list_item: Fictional ducks
|
||||
item-304 at level 4: list_item: Rubber duck
|
||||
item-305 at level 2: section_header: Notes
|
||||
item-306 at level 3: section_header: Citations
|
||||
item-307 at level 4: list: group ordered list
|
||||
item-308 at level 5: list_item: ^ "Duckling". The American Herit ... n Company. 2006. Retrieved 2015-05-22.
|
||||
item-309 at level 5: list_item: ^ "Duckling". Kernerman English ... Ltd. 2000–2006. Retrieved 2015-05-22.
|
||||
item-310 at level 5: list_item: ^ Dohner, Janet Vorwald (2001). ... University Press. ISBN 978-0300138139.
|
||||
item-311 at level 5: list_item: ^ Visca, Curt; Visca, Kelley (20 ... Publishing Group. ISBN 9780823961566.
|
||||
item-312 at level 5: list_item: ^ a b c d Carboneras 1992, p. 536.
|
||||
item-313 at level 5: list_item: ^ Livezey 1986, pp. 737–738.
|
||||
item-314 at level 5: list_item: ^ Madsen, McHugh & de Kloet 1988, p. 452.
|
||||
item-315 at level 5: list_item: ^ Donne-Goussé, Laudet & Hänni 2002, pp. 353–354.
|
||||
item-316 at level 5: list_item: ^ a b c d e f Carboneras 1992, p. 540.
|
||||
item-317 at level 5: list_item: ^ Elphick, Dunning & Sibley 2001, p. 191.
|
||||
item-318 at level 5: list_item: ^ Kear 2005, p. 448.
|
||||
item-319 at level 5: list_item: ^ Kear 2005, p. 622–623.
|
||||
item-320 at level 5: list_item: ^ Kear 2005, p. 686.
|
||||
item-321 at level 5: list_item: ^ Elphick, Dunning & Sibley 2001, p. 193.
|
||||
item-322 at level 5: list_item: ^ a b c d e f g Carboneras 1992, p. 537.
|
||||
item-323 at level 5: list_item: ^ American Ornithologists' Union 1998, p. xix.
|
||||
item-324 at level 5: list_item: ^ American Ornithologists' Union 1998.
|
||||
item-325 at level 5: list_item: ^ Carboneras 1992, p. 538.
|
||||
item-326 at level 5: list_item: ^ Christidis & Boles 2008, p. 62.
|
||||
item-327 at level 5: list_item: ^ Shirihai 2008, pp. 239, 245.
|
||||
item-328 at level 5: list_item: ^ a b Pratt, Bruner & Berrett 1987, pp. 98–107.
|
||||
item-329 at level 5: list_item: ^ Fitter, Fitter & Hosking 2000, pp. 52–3.
|
||||
item-330 at level 5: list_item: ^ "Pacific Black Duck". www.wiresnr.org. Retrieved 2018-04-27.
|
||||
item-331 at level 5: list_item: ^ Ogden, Evans. "Dabbling Ducks". CWE. Retrieved 2006-11-02.
|
||||
item-332 at level 5: list_item: ^ Karl Mathiesen (16 March 2015) ... Guardian. Retrieved 13 November 2016.
|
||||
item-333 at level 5: list_item: ^ Rohwer, Frank C.; Anderson, Mi ... 4615-6787-5_4. ISBN 978-1-4615-6789-9.
|
||||
item-334 at level 5: list_item: ^ Smith, Cyndi M.; Cooke, Fred; ... 093/condor/102.1.201. hdl:10315/13797.
|
||||
item-335 at level 5: list_item: ^ "If You Find An Orphaned Duckl ... l on 2018-09-23. Retrieved 2018-12-22.
|
||||
item-336 at level 5: list_item: ^ Carver, Heather (2011). The Du ... 9780557901562.[self-published source]
|
||||
item-337 at level 5: list_item: ^ Titlow, Budd (2013-09-03). Bir ... man & Littlefield. ISBN 9780762797707.
|
||||
item-338 at level 5: list_item: ^ Amos, Jonathan (2003-09-08). " ... kers". BBC News. Retrieved 2006-11-02.
|
||||
item-339 at level 5: list_item: ^ "Mythbusters Episode 8". 12 December 2003.
|
||||
item-340 at level 5: list_item: ^ Erlandson 1994, p. 171.
|
||||
item-341 at level 5: list_item: ^ Jeffries 2008, pp. 168, 243.
|
||||
item-342 at level 5: list_item: ^ a b Sued-Badillo 2003, p. 65.
|
||||
item-343 at level 5: list_item: ^ Thorpe 1996, p. 68.
|
||||
item-344 at level 5: list_item: ^ Maisels 1999, p. 42.
|
||||
item-345 at level 5: list_item: ^ Rau 1876, p. 133.
|
||||
item-346 at level 5: list_item: ^ Higman 2012, p. 23.
|
||||
item-347 at level 5: list_item: ^ Hume 2012, p. 53.
|
||||
item-348 at level 5: list_item: ^ Hume 2012, p. 52.
|
||||
item-349 at level 5: list_item: ^ Fieldhouse 2002, p. 167.
|
||||
item-350 at level 5: list_item: ^ Livingston, A. D. (1998-01-01) ... Editions, Limited. ISBN 9781853263774.
|
||||
item-351 at level 5: list_item: ^ "Study plan for waterfowl inju ... on 2022-10-09. Retrieved 2 July 2019.
|
||||
item-352 at level 5: list_item: ^ "FAOSTAT". www.fao.org. Retrieved 2019-10-25.
|
||||
item-353 at level 5: list_item: ^ "Anas platyrhynchos, Domestic ... . Digimorph.org. Retrieved 2012-12-23.
|
||||
item-354 at level 5: list_item: ^ Sy Montgomery. "Mallard; Encyc ... Britannica.com. Retrieved 2012-12-23.
|
||||
item-355 at level 5: list_item: ^ Glenday, Craig (2014). Guinnes ... ited. pp. 135. ISBN 978-1-908843-15-9.
|
||||
item-356 at level 5: list_item: ^ Suomen kunnallisvaakunat (in F ... tto. 1982. p. 147. ISBN 951-773-085-3.
|
||||
item-357 at level 5: list_item: ^ "Lubānas simbolika" (in Latvian). Retrieved September 9, 2021.
|
||||
item-358 at level 5: list_item: ^ "Föglö" (in Swedish). Retrieved September 9, 2021.
|
||||
item-359 at level 5: list_item: ^ Young, Emma. "World's funniest ... w Scientist. Retrieved 7 January 2019.
|
||||
item-360 at level 5: list_item: ^ "Howard the Duck (character)". Grand Comics Database.
|
||||
item-361 at level 5: list_item: ^ Sanderson, Peter; Gilbert, Lau ... luding this bad-tempered talking duck.
|
||||
item-362 at level 5: list_item: ^ "The Duck". University of Oregon Athletics. Retrieved 2022-01-20.
|
||||
item-363 at level 3: section_header: Sources
|
||||
item-364 at level 4: list: group list
|
||||
item-365 at level 5: list_item: American Ornithologists' Union ( ... (PDF) from the original on 2022-10-09.
|
||||
item-366 at level 5: list_item: Carboneras, Carlos (1992). del H ... Lynx Edicions. ISBN 978-84-87334-10-8.
|
||||
item-367 at level 5: list_item: Christidis, Les; Boles, Walter E ... ro Publishing. ISBN 978-0-643-06511-6.
|
||||
item-368 at level 5: list_item: Donne-Goussé, Carole; Laudet, Vi ... /S1055-7903(02)00019-2. PMID 12099792.
|
||||
item-369 at level 5: list_item: Elphick, Chris; Dunning, John B. ... istopher Helm. ISBN 978-0-7136-6250-4.
|
||||
item-370 at level 5: list_item: Erlandson, Jon M. (1994). Early ... usiness Media. ISBN 978-1-4419-3231-0.
|
||||
item-371 at level 5: list_item: Fieldhouse, Paul (2002). Food, F ... ara: ABC-CLIO. ISBN 978-1-61069-412-4.
|
||||
item-372 at level 5: list_item: Fitter, Julian; Fitter, Daniel; ... versity Press. ISBN 978-0-691-10295-5.
|
||||
item-373 at level 5: list_item: Higman, B. W. (2012). How Food M ... Wiley & Sons. ISBN 978-1-4051-8947-7.
|
||||
item-374 at level 5: list_item: Hume, Julian H. (2012). Extinct ... istopher Helm. ISBN 978-1-4729-3744-5.
|
||||
item-375 at level 5: list_item: Jeffries, Richard (2008). Holoce ... Alabama Press. ISBN 978-0-8173-1658-7.
|
||||
item-376 at level 5: list_item: Kear, Janet, ed. (2005). Ducks, ... versity Press. ISBN 978-0-19-861009-0.
|
||||
item-377 at level 5: list_item: Livezey, Bradley C. (October 198 ... (PDF) from the original on 2022-10-09.
|
||||
item-378 at level 5: list_item: Madsen, Cort S.; McHugh, Kevin P ... (PDF) from the original on 2022-10-09.
|
||||
item-379 at level 5: list_item: Maisels, Charles Keith (1999). E ... on: Routledge. ISBN 978-0-415-10975-8.
|
||||
item-380 at level 5: list_item: Pratt, H. Douglas; Bruner, Phill ... University Press. ISBN 0-691-02399-9.
|
||||
item-381 at level 5: list_item: Rau, Charles (1876). Early Man i ... ork: Harper & Brothers. LCCN 05040168.
|
||||
item-382 at level 5: list_item: Shirihai, Hadoram (2008). A Comp ... versity Press. ISBN 978-0-691-13666-0.
|
||||
item-383 at level 5: list_item: Sued-Badillo, Jalil (2003). Auto ... Paris: UNESCO. ISBN 978-92-3-103832-7.
|
||||
item-384 at level 5: list_item: Thorpe, I. J. (1996). The Origin ... rk: Routledge. ISBN 978-0-415-08009-5.
|
||||
item-385 at level 2: section_header: External links
|
||||
item-386 at level 3: text: Duck at Wikipedia's sister projects
|
||||
item-387 at level 3: list: group list
|
||||
item-388 at level 4: list_item: Definitions from Wiktionary
|
||||
item-389 at level 4: picture
|
||||
item-390 at level 4: list_item: Media from Commons
|
||||
item-391 at level 4: picture
|
||||
item-392 at level 4: list_item: Quotations from Wikiquote
|
||||
item-393 at level 4: picture
|
||||
item-394 at level 4: list_item: Recipes from Wikibooks
|
||||
item-395 at level 4: picture
|
||||
item-396 at level 4: list_item: Taxa from Wikispecies
|
||||
item-397 at level 4: picture
|
||||
item-398 at level 4: list_item: Data from Wikidata
|
||||
item-399 at level 4: picture
|
||||
item-400 at level 3: list: group list
|
||||
item-401 at level 4: list_item: list of books (useful looking abstracts)
|
||||
item-402 at level 4: list_item: Ducks on postage stamps Archived 2013-05-13 at the Wayback Machine
|
||||
item-403 at level 4: list_item: Ducks at a Distance, by Rob Hine ... uide to identification of US waterfowl
|
||||
item-404 at level 3: picture
|
||||
item-405 at level 3: table with [3x2]
|
||||
item-406 at level 3: text: Retrieved from "https://en.wikip ... index.php?title=Duck&oldid=1246843351"
|
||||
item-407 at level 3: text: Categories:
|
||||
item-408 at level 3: list: group list
|
||||
item-409 at level 4: list_item: Ducks
|
||||
item-410 at level 4: list_item: Game birds
|
||||
item-411 at level 4: list_item: Bird common names
|
||||
item-412 at level 3: text: Hidden categories:
|
||||
item-413 at level 3: list: group list
|
||||
item-414 at level 4: list_item: All accuracy disputes
|
||||
item-415 at level 4: list_item: Accuracy disputes from February 2020
|
||||
item-416 at level 4: list_item: CS1 Finnish-language sources (fi)
|
||||
item-417 at level 4: list_item: CS1 Latvian-language sources (lv)
|
||||
item-418 at level 4: list_item: CS1 Swedish-language sources (sv)
|
||||
item-419 at level 4: list_item: Articles with short description
|
||||
item-420 at level 4: list_item: Short description is different from Wikidata
|
||||
item-421 at level 4: list_item: Wikipedia indefinitely move-protected pages
|
||||
item-422 at level 4: list_item: Wikipedia indefinitely semi-protected pages
|
||||
item-423 at level 4: list_item: Articles with 'species' microformats
|
||||
item-424 at level 4: list_item: Articles containing Old English (ca. 450-1100)-language text
|
||||
item-425 at level 4: list_item: Articles containing Dutch-language text
|
||||
item-426 at level 4: list_item: Articles containing German-language text
|
||||
item-427 at level 4: list_item: Articles containing Norwegian-language text
|
||||
item-428 at level 4: list_item: Articles containing Lithuanian-language text
|
||||
item-429 at level 4: list_item: Articles containing Ancient Greek (to 1453)-language text
|
||||
item-430 at level 4: list_item: All articles with self-published sources
|
||||
item-431 at level 4: list_item: Articles with self-published sources from February 2020
|
||||
item-432 at level 4: list_item: All articles with unsourced statements
|
||||
item-433 at level 4: list_item: Articles with unsourced statements from January 2022
|
||||
item-434 at level 4: list_item: CS1: long volume value
|
||||
item-435 at level 4: list_item: Pages using Sister project links with wikidata mismatch
|
||||
item-436 at level 4: list_item: Pages using Sister project links with hidden wikidata
|
||||
item-437 at level 4: list_item: Webarchive template wayback links
|
||||
item-438 at level 4: list_item: Articles with Project Gutenberg links
|
||||
item-439 at level 4: list_item: Articles containing video clips
|
||||
item-440 at level 3: list: group list
|
||||
item-441 at level 4: list_item: This page was last edited on 21 September 2024, at 12:11 (UTC).
|
||||
item-442 at level 4: list_item: Text is available under the Crea ... tion, Inc., a non-profit organization.
|
||||
item-443 at level 3: list: group list
|
||||
item-444 at level 4: list_item: Privacy policy
|
||||
item-445 at level 4: list_item: About Wikipedia
|
||||
item-446 at level 4: list_item: Disclaimers
|
||||
item-447 at level 4: list_item: Contact Wikipedia
|
||||
item-448 at level 4: list_item: Code of Conduct
|
||||
item-449 at level 4: list_item: Developers
|
||||
item-450 at level 4: list_item: Statistics
|
||||
item-451 at level 4: list_item: Cookie statement
|
||||
item-452 at level 4: list_item: Mobile view
|
||||
item-453 at level 3: list: group list
|
||||
item-454 at level 4: picture
|
||||
item-454 at level 5: caption: Wikimedia Foundation
|
||||
item-455 at level 4: picture
|
||||
item-455 at level 5: caption: Powered by MediaWiki
|
||||
item-456 at level 3: list: group list
|
||||
item-457 at level 1: caption: Pacific black duck displaying the characteristic upending "duck"
|
||||
item-458 at level 1: caption: Male mallard.
|
||||
item-459 at level 1: caption: Wood ducks.
|
||||
item-460 at level 1: caption: Mallard landing in approach
|
||||
item-461 at level 1: caption: Male Mandarin duck
|
||||
item-462 at level 1: caption: Flying steamer ducks in Ushuaia, Argentina
|
||||
item-463 at level 1: caption: Female mallard in Cornwall, England
|
||||
item-464 at level 1: caption: Pecten along the bill
|
||||
item-465 at level 1: caption: A Muscovy duckling
|
||||
item-466 at level 1: caption: Ringed teal
|
||||
item-467 at level 1: caption: Indian Runner ducks, a common breed of domestic ducks
|
||||
item-468 at level 1: caption: Three black-colored ducks in the coat of arms of Maaninka[49]
|
||||
item-469 at level 1: caption: Wikimedia Foundation
|
||||
item-470 at level 1: caption: Powered by MediaWiki
|
3143
tests/data/groundtruth/docling_v2/wiki_duck.html.json
vendored
3143
tests/data/groundtruth/docling_v2/wiki_duck.html.json
vendored
File diff suppressed because it is too large
Load Diff
183
tests/data/groundtruth/docling_v2/wiki_duck.html.md
vendored
183
tests/data/groundtruth/docling_v2/wiki_duck.html.md
vendored
@ -1,5 +1,9 @@
|
||||
## Contents
|
||||
|
||||
move to sidebar
|
||||
|
||||
hide
|
||||
|
||||
- (Top)
|
||||
- 1 Etymology
|
||||
- 2 Taxonomy
|
||||
@ -21,8 +25,12 @@
|
||||
- 8.2 Sources
|
||||
- 9 External links
|
||||
|
||||
Toggle the table of contents
|
||||
|
||||
# Duck
|
||||
|
||||
136 languages
|
||||
|
||||
- Acèh
|
||||
- Afrikaans
|
||||
- Alemannisch
|
||||
@ -160,15 +168,25 @@
|
||||
- Žemaitėška
|
||||
- 中文
|
||||
|
||||
Edit links
|
||||
|
||||
- Article
|
||||
- Talk
|
||||
|
||||
English
|
||||
|
||||
- Read
|
||||
- View source
|
||||
- View history
|
||||
|
||||
Tools
|
||||
|
||||
Tools
|
||||
|
||||
move to sidebar
|
||||
|
||||
hide
|
||||
|
||||
Actions
|
||||
|
||||
- Read
|
||||
@ -200,15 +218,23 @@ In other projects
|
||||
|
||||
Appearance
|
||||
|
||||
<!-- image -->
|
||||
move to sidebar
|
||||
|
||||
hide
|
||||
|
||||
From Wikipedia, the free encyclopedia
|
||||
|
||||
(Redirected from Duckling)
|
||||
|
||||
Common name for many species of bird
|
||||
|
||||
This article is about the bird. For duck as a food, see . For other uses, see .
|
||||
This article is about the bird. For duck as a food, see Duck as food. For other uses, see Duck (disambiguation).
|
||||
|
||||
"Duckling" redirects here. For other uses, see .
|
||||
"Duckling" redirects here. For other uses, see Duckling (disambiguation).
|
||||
|
||||
<!-- image -->
|
||||
|
||||
<!-- image -->
|
||||
|
||||
| Duck | Duck |
|
||||
|--------------------------------|--------------------------------|
|
||||
@ -275,6 +301,8 @@ The drakes of northern species often have extravagant plumage, but that is moult
|
||||
|
||||
## Distribution and habitat
|
||||
|
||||
See also: List of Anseriformes by population
|
||||
|
||||
Flying steamer ducks in Ushuaia, Argentina
|
||||
|
||||
<!-- image -->
|
||||
@ -295,10 +323,6 @@ Pecten along the bill
|
||||
|
||||
<!-- image -->
|
||||
|
||||
Mallard duckling preening
|
||||
|
||||
<!-- image -->
|
||||
|
||||
Ducks eat food sources such as grasses, aquatic plants, fish, insects, small amphibians, worms, and small molluscs.
|
||||
|
||||
Dabbling ducks feed on the surface of water or on land, or as deep as they can reach by up-ending without completely submerging.[24] Along the edge of the bill, there is a comb-like structure called a pecten. This strains the water squirting from the side of the bill and traps any food. The pecten is also used to preen feathers and to hold slippery food items.
|
||||
@ -339,12 +363,16 @@ Adult ducks are fast fliers, but may be caught on the water by large aquatic pre
|
||||
|
||||
### Hunting
|
||||
|
||||
Main article: Waterfowl hunting
|
||||
|
||||
Humans have hunted ducks since prehistoric times. Excavations of middens in California dating to 7800 – 6400 BP have turned up bones of ducks, including at least one now-extinct flightless species.[33] Ducks were captured in "significant numbers" by Holocene inhabitants of the lower Ohio River valley, suggesting they took advantage of the seasonal bounty provided by migrating waterfowl.[34] Neolithic hunters in locations as far apart as the Caribbean,[35] Scandinavia,[36] Egypt,[37] Switzerland,[38] and China relied on ducks as a source of protein for some or all of the year.[39] Archeological evidence shows that Māori people in New Zealand hunted the flightless Finsch's duck, possibly to extinction, though rat predation may also have contributed to its fate.[40] A similar end awaited the Chatham duck, a species with reduced flying capabilities which went extinct shortly after its island was colonised by Polynesian settlers.[41] It is probable that duck eggs were gathered by Neolithic hunter-gathers as well, though hard evidence of this is uncommon.[35][42]
|
||||
|
||||
In many areas, wild ducks (including ducks farmed and released into the wild) are hunted for food or sport,[43] by shooting, or by being trapped using duck decoys. Because an idle floating duck or a duck squatting on land cannot react to fly or move quickly, "a sitting duck" has come to mean "an easy target". These ducks may be contaminated by pollutants such as PCBs.[44]
|
||||
|
||||
### Domestication
|
||||
|
||||
Main article: Domestic duck
|
||||
|
||||
Indian Runner ducks, a common breed of domestic ducks
|
||||
|
||||
<!-- image -->
|
||||
@ -368,6 +396,7 @@ The 1992 Disney film The Mighty Ducks, starring Emilio Estevez, chose the duck a
|
||||
## See also
|
||||
|
||||
- Birds portal
|
||||
<!-- image -->
|
||||
|
||||
- Domestic duck
|
||||
- Duck as food
|
||||
@ -382,106 +411,114 @@ The 1992 Disney film The Mighty Ducks, starring Emilio Estevez, chose the duck a
|
||||
|
||||
1. ^ "Duckling". The American Heritage Dictionary of the English Language, Fourth Edition. Houghton Mifflin Company. 2006. Retrieved 2015-05-22.
|
||||
2. ^ "Duckling". Kernerman English Multilingual Dictionary (Beta Version). K. Dictionaries Ltd. 2000–2006. Retrieved 2015-05-22.
|
||||
3. ^ Dohner, Janet Vorwald (2001). The Encyclopedia of Historic and Endangered Livestock and Poultry Breeds. Yale University Press. ISBN 978-0300138139.
|
||||
4. ^ Visca, Curt; Visca, Kelley (2003). How to Draw Cartoon Birds. The Rosen Publishing Group. ISBN 9780823961566.
|
||||
5. ^ a b c d Carboneras 1992, p. 536.
|
||||
6. ^ Livezey 1986, pp. 737–738.
|
||||
7. ^ Madsen, McHugh & de Kloet 1988, p. 452.
|
||||
8. ^ Donne-Goussé, Laudet & Hänni 2002, pp. 353–354.
|
||||
9. ^ a b c d e f Carboneras 1992, p. 540.
|
||||
10. ^ Elphick, Dunning & Sibley 2001, p. 191.
|
||||
11. ^ Kear 2005, p. 448.
|
||||
12. ^ Kear 2005, p. 622–623.
|
||||
13. ^ Kear 2005, p. 686.
|
||||
14. ^ Elphick, Dunning & Sibley 2001, p. 193.
|
||||
15. ^ a b c d e f g Carboneras 1992, p. 537.
|
||||
16. ^ American Ornithologists' Union 1998, p. xix.
|
||||
3. ^ Dohner, Janet Vorwald (2001). The Encyclopedia of Historic and Endangered Livestock and Poultry Breeds. Yale University Press. ISBN 978-0300138139.
|
||||
4. ^ Visca, Curt; Visca, Kelley (2003). How to Draw Cartoon Birds. The Rosen Publishing Group. ISBN 9780823961566.
|
||||
5. ^ a b c d Carboneras 1992, p. 536.
|
||||
6. ^ Livezey 1986, pp. 737–738.
|
||||
7. ^ Madsen, McHugh & de Kloet 1988, p. 452.
|
||||
8. ^ Donne-Goussé, Laudet & Hänni 2002, pp. 353–354.
|
||||
9. ^ a b c d e f Carboneras 1992, p. 540.
|
||||
10. ^ Elphick, Dunning & Sibley 2001, p. 191.
|
||||
11. ^ Kear 2005, p. 448.
|
||||
12. ^ Kear 2005, p. 622–623.
|
||||
13. ^ Kear 2005, p. 686.
|
||||
14. ^ Elphick, Dunning & Sibley 2001, p. 193.
|
||||
15. ^ a b c d e f g Carboneras 1992, p. 537.
|
||||
16. ^ American Ornithologists' Union 1998, p. xix.
|
||||
17. ^ American Ornithologists' Union 1998.
|
||||
18. ^ Carboneras 1992, p. 538.
|
||||
19. ^ Christidis & Boles 2008, p. 62.
|
||||
20. ^ Shirihai 2008, pp. 239, 245.
|
||||
21. ^ a b Pratt, Bruner & Berrett 1987, pp. 98–107.
|
||||
22. ^ Fitter, Fitter & Hosking 2000, pp. 52–3.
|
||||
18. ^ Carboneras 1992, p. 538.
|
||||
19. ^ Christidis & Boles 2008, p. 62.
|
||||
20. ^ Shirihai 2008, pp. 239, 245.
|
||||
21. ^ a b Pratt, Bruner & Berrett 1987, pp. 98–107.
|
||||
22. ^ Fitter, Fitter & Hosking 2000, pp. 52–3.
|
||||
23. ^ "Pacific Black Duck". www.wiresnr.org. Retrieved 2018-04-27.
|
||||
24. ^ Ogden, Evans. "Dabbling Ducks". CWE. Retrieved 2006-11-02.
|
||||
25. ^ Karl Mathiesen (16 March 2015). "Don't feed the ducks bread, say conservationists". The Guardian. Retrieved 13 November 2016.
|
||||
26. ^ Rohwer, Frank C.; Anderson, Michael G. (1988). "Female-Biased Philopatry, Monogamy, and the Timing of Pair Formation in Migratory Waterfowl". Current Ornithology. pp. 187–221. doi:10.1007/978-1-4615-6787-5\_4. ISBN 978-1-4615-6789-9.
|
||||
26. ^ Rohwer, Frank C.; Anderson, Michael G. (1988). "Female-Biased Philopatry, Monogamy, and the Timing of Pair Formation in Migratory Waterfowl". Current Ornithology. pp. 187–221. doi:10.1007/978-1-4615-6787-5\_4. ISBN 978-1-4615-6789-9.
|
||||
27. ^ Smith, Cyndi M.; Cooke, Fred; Robertson, Gregory J.; Goudie, R. Ian; Boyd, W. Sean (2000). "Long-Term Pair Bonds in Harlequin Ducks". The Condor. 102 (1): 201–205. doi:10.1093/condor/102.1.201. hdl:10315/13797.
|
||||
28. ^ "If You Find An Orphaned Duckling - Wildlife Rehabber". wildliferehabber.com. Archived from the original on 2018-09-23. Retrieved 2018-12-22.
|
||||
29. ^ Carver, Heather (2011). The Duck Bible. Lulu.com. ISBN 9780557901562.[self-published source]
|
||||
30. ^ Titlow, Budd (2013-09-03). Bird Brains: Inside the Strange Minds of Our Fine Feathered Friends. Rowman & Littlefield. ISBN 9780762797707.
|
||||
29. ^ Carver, Heather (2011). The Duck Bible. Lulu.com. ISBN 9780557901562.[self-published source]
|
||||
30. ^ Titlow, Budd (2013-09-03). Bird Brains: Inside the Strange Minds of Our Fine Feathered Friends. Rowman & Littlefield. ISBN 9780762797707.
|
||||
31. ^ Amos, Jonathan (2003-09-08). "Sound science is quackers". BBC News. Retrieved 2006-11-02.
|
||||
32. ^ "Mythbusters Episode 8". 12 December 2003.
|
||||
33. ^ Erlandson 1994, p. 171.
|
||||
34. ^ Jeffries 2008, pp. 168, 243.
|
||||
35. ^ a b Sued-Badillo 2003, p. 65.
|
||||
36. ^ Thorpe 1996, p. 68.
|
||||
37. ^ Maisels 1999, p. 42.
|
||||
38. ^ Rau 1876, p. 133.
|
||||
39. ^ Higman 2012, p. 23.
|
||||
40. ^ Hume 2012, p. 53.
|
||||
41. ^ Hume 2012, p. 52.
|
||||
42. ^ Fieldhouse 2002, p. 167.
|
||||
43. ^ Livingston, A. D. (1998-01-01). Guide to Edible Plants and Animals. Wordsworth Editions, Limited. ISBN 9781853263774.
|
||||
44. ^ "Study plan for waterfowl injury assessment: Determining PCB concentrations in Hudson river resident waterfowl" (PDF). New York State Department of Environmental Conservation. US Department of Commerce. December 2008. p. 3. Archived (PDF) from the original on 2022-10-09. Retrieved 2 July 2019.
|
||||
33. ^ Erlandson 1994, p. 171.
|
||||
34. ^ Jeffries 2008, pp. 168, 243.
|
||||
35. ^ a b Sued-Badillo 2003, p. 65.
|
||||
36. ^ Thorpe 1996, p. 68.
|
||||
37. ^ Maisels 1999, p. 42.
|
||||
38. ^ Rau 1876, p. 133.
|
||||
39. ^ Higman 2012, p. 23.
|
||||
40. ^ Hume 2012, p. 53.
|
||||
41. ^ Hume 2012, p. 52.
|
||||
42. ^ Fieldhouse 2002, p. 167.
|
||||
43. ^ Livingston, A. D. (1998-01-01). Guide to Edible Plants and Animals. Wordsworth Editions, Limited. ISBN 9781853263774.
|
||||
44. ^ "Study plan for waterfowl injury assessment: Determining PCB concentrations in Hudson river resident waterfowl" (PDF). New York State Department of Environmental Conservation. US Department of Commerce. December 2008. p. 3. Archived (PDF) from the original on 2022-10-09. Retrieved 2 July 2019.
|
||||
45. ^ "FAOSTAT". www.fao.org. Retrieved 2019-10-25.
|
||||
46. ^ "Anas platyrhynchos, Domestic Duck; DigiMorph Staff - The University of Texas at Austin". Digimorph.org. Retrieved 2012-12-23.
|
||||
47. ^ Sy Montgomery. "Mallard; Encyclopædia Britannica". Britannica.com. Retrieved 2012-12-23.
|
||||
48. ^ Glenday, Craig (2014). Guinness World Records. Guinness World Records Limited. pp. 135. ISBN 978-1-908843-15-9.
|
||||
49. ^ Suomen kunnallisvaakunat (in Finnish). Suomen Kunnallisliitto. 1982. p. 147. ISBN 951-773-085-3.
|
||||
48. ^ Glenday, Craig (2014). Guinness World Records. Guinness World Records Limited. pp. 135. ISBN 978-1-908843-15-9.
|
||||
49. ^ Suomen kunnallisvaakunat (in Finnish). Suomen Kunnallisliitto. 1982. p. 147. ISBN 951-773-085-3.
|
||||
50. ^ "Lubānas simbolika" (in Latvian). Retrieved September 9, 2021.
|
||||
51. ^ "Föglö" (in Swedish). Retrieved September 9, 2021.
|
||||
52. ^ Young, Emma. "World's funniest joke revealed". New Scientist. Retrieved 7 January 2019.
|
||||
53. ^ "Howard the Duck (character)". Grand Comics Database.
|
||||
54. ^ Sanderson, Peter; Gilbert, Laura (2008). "1970s". Marvel Chronicle A Year by Year History. London, United Kingdom: Dorling Kindersley. p. 161. ISBN 978-0756641238. December saw the debut of the cigar-smoking Howard the Duck. In this story by writer Steve Gerber and artist Val Mayerik, various beings from different realities had begun turning up in the Man-Thing's Florida swamp, including this bad-tempered talking duck.
|
||||
54. ^ Sanderson, Peter; Gilbert, Laura (2008). "1970s". Marvel Chronicle A Year by Year History. London, United Kingdom: Dorling Kindersley. p. 161. ISBN 978-0756641238. December saw the debut of the cigar-smoking Howard the Duck. In this story by writer Steve Gerber and artist Val Mayerik, various beings from different realities had begun turning up in the Man-Thing's Florida swamp, including this bad-tempered talking duck.
|
||||
55. ^ "The Duck". University of Oregon Athletics. Retrieved 2022-01-20.
|
||||
|
||||
### Sources
|
||||
|
||||
- American Ornithologists' Union (1998). Checklist of North American Birds (PDF). Washington, DC: American Ornithologists' Union. ISBN 978-1-891276-00-2. Archived (PDF) from the original on 2022-10-09.
|
||||
- Carboneras, Carlos (1992). del Hoyo, Josep; Elliott, Andrew; Sargatal, Jordi (eds.). Handbook of the Birds of the World. Vol. 1: Ostrich to Ducks. Barcelona: Lynx Edicions. ISBN 978-84-87334-10-8.
|
||||
- Christidis, Les; Boles, Walter E., eds. (2008). Systematics and Taxonomy of Australian Birds. Collingwood, VIC: Csiro Publishing. ISBN 978-0-643-06511-6.
|
||||
- Donne-Goussé, Carole; Laudet, Vincent; Hänni, Catherine (July 2002). "A molecular phylogeny of Anseriformes based on mitochondrial DNA analysis". Molecular Phylogenetics and Evolution. 23 (3): 339–356. Bibcode:2002MolPE..23..339D. doi:10.1016/S1055-7903(02)00019-2. PMID 12099792.
|
||||
- Elphick, Chris; Dunning, John B. Jr.; Sibley, David, eds. (2001). The Sibley Guide to Bird Life and Behaviour. London: Christopher Helm. ISBN 978-0-7136-6250-4.
|
||||
- Erlandson, Jon M. (1994). Early Hunter-Gatherers of the California Coast. New York, NY: Springer Science & Business Media. ISBN 978-1-4419-3231-0.
|
||||
- Fieldhouse, Paul (2002). Food, Feasts, and Faith: An Encyclopedia of Food Culture in World Religions. Vol. I: A–K. Santa Barbara: ABC-CLIO. ISBN 978-1-61069-412-4.
|
||||
- Fitter, Julian; Fitter, Daniel; Hosking, David (2000). Wildlife of the Galápagos. Princeton, NJ: Princeton University Press. ISBN 978-0-691-10295-5.
|
||||
- Higman, B. W. (2012). How Food Made History. Chichester, UK: John Wiley & Sons. ISBN 978-1-4051-8947-7.
|
||||
- Hume, Julian H. (2012). Extinct Birds. London: Christopher Helm. ISBN 978-1-4729-3744-5.
|
||||
- Jeffries, Richard (2008). Holocene Hunter-Gatherers of the Lower Ohio River Valley. Tuscaloosa: University of Alabama Press. ISBN 978-0-8173-1658-7.
|
||||
- Kear, Janet, ed. (2005). Ducks, Geese and Swans: Species Accounts (Cairina to Mergus). Bird Families of the World. Oxford: Oxford University Press. ISBN 978-0-19-861009-0.
|
||||
- American Ornithologists' Union (1998). Checklist of North American Birds (PDF). Washington, DC: American Ornithologists' Union. ISBN 978-1-891276-00-2. Archived (PDF) from the original on 2022-10-09.
|
||||
- Carboneras, Carlos (1992). del Hoyo, Josep; Elliott, Andrew; Sargatal, Jordi (eds.). Handbook of the Birds of the World. Vol. 1: Ostrich to Ducks. Barcelona: Lynx Edicions. ISBN 978-84-87334-10-8.
|
||||
- Christidis, Les; Boles, Walter E., eds. (2008). Systematics and Taxonomy of Australian Birds. Collingwood, VIC: Csiro Publishing. ISBN 978-0-643-06511-6.
|
||||
- Donne-Goussé, Carole; Laudet, Vincent; Hänni, Catherine (July 2002). "A molecular phylogeny of Anseriformes based on mitochondrial DNA analysis". Molecular Phylogenetics and Evolution. 23 (3): 339–356. Bibcode:2002MolPE..23..339D. doi:10.1016/S1055-7903(02)00019-2. PMID 12099792.
|
||||
- Elphick, Chris; Dunning, John B. Jr.; Sibley, David, eds. (2001). The Sibley Guide to Bird Life and Behaviour. London: Christopher Helm. ISBN 978-0-7136-6250-4.
|
||||
- Erlandson, Jon M. (1994). Early Hunter-Gatherers of the California Coast. New York, NY: Springer Science & Business Media. ISBN 978-1-4419-3231-0.
|
||||
- Fieldhouse, Paul (2002). Food, Feasts, and Faith: An Encyclopedia of Food Culture in World Religions. Vol. I: A–K. Santa Barbara: ABC-CLIO. ISBN 978-1-61069-412-4.
|
||||
- Fitter, Julian; Fitter, Daniel; Hosking, David (2000). Wildlife of the Galápagos. Princeton, NJ: Princeton University Press. ISBN 978-0-691-10295-5.
|
||||
- Higman, B. W. (2012). How Food Made History. Chichester, UK: John Wiley & Sons. ISBN 978-1-4051-8947-7.
|
||||
- Hume, Julian H. (2012). Extinct Birds. London: Christopher Helm. ISBN 978-1-4729-3744-5.
|
||||
- Jeffries, Richard (2008). Holocene Hunter-Gatherers of the Lower Ohio River Valley. Tuscaloosa: University of Alabama Press. ISBN 978-0-8173-1658-7.
|
||||
- Kear, Janet, ed. (2005). Ducks, Geese and Swans: Species Accounts (Cairina to Mergus). Bird Families of the World. Oxford: Oxford University Press. ISBN 978-0-19-861009-0.
|
||||
- Livezey, Bradley C. (October 1986). "A phylogenetic analysis of recent Anseriform genera using morphological characters" (PDF). The Auk. 103 (4): 737–754. doi:10.1093/auk/103.4.737. Archived (PDF) from the original on 2022-10-09.
|
||||
- Madsen, Cort S.; McHugh, Kevin P.; de Kloet, Siwo R. (July 1988). "A partial classification of waterfowl (Anatidae) based on single-copy DNA" (PDF). The Auk. 105 (3): 452–459. doi:10.1093/auk/105.3.452. Archived (PDF) from the original on 2022-10-09.
|
||||
- Maisels, Charles Keith (1999). Early Civilizations of the Old World. London: Routledge. ISBN 978-0-415-10975-8.
|
||||
- Pratt, H. Douglas; Bruner, Phillip L.; Berrett, Delwyn G. (1987). A Field Guide to the Birds of Hawaii and the Tropical Pacific. Princeton, NJ: Princeton University Press. ISBN 0-691-02399-9.
|
||||
- Rau, Charles (1876). Early Man in Europe. New York: Harper & Brothers. LCCN 05040168.
|
||||
- Shirihai, Hadoram (2008). A Complete Guide to Antarctic Wildlife. Princeton, NJ, US: Princeton University Press. ISBN 978-0-691-13666-0.
|
||||
- Sued-Badillo, Jalil (2003). Autochthonous Societies. General History of the Caribbean. Paris: UNESCO. ISBN 978-92-3-103832-7.
|
||||
- Thorpe, I. J. (1996). The Origins of Agriculture in Europe. New York: Routledge. ISBN 978-0-415-08009-5.
|
||||
- Maisels, Charles Keith (1999). Early Civilizations of the Old World. London: Routledge. ISBN 978-0-415-10975-8.
|
||||
- Pratt, H. Douglas; Bruner, Phillip L.; Berrett, Delwyn G. (1987). A Field Guide to the Birds of Hawaii and the Tropical Pacific. Princeton, NJ: Princeton University Press. ISBN 0-691-02399-9.
|
||||
- Rau, Charles (1876). Early Man in Europe. New York: Harper & Brothers. LCCN 05040168.
|
||||
- Shirihai, Hadoram (2008). A Complete Guide to Antarctic Wildlife. Princeton, NJ, US: Princeton University Press. ISBN 978-0-691-13666-0.
|
||||
- Sued-Badillo, Jalil (2003). Autochthonous Societies. General History of the Caribbean. Paris: UNESCO. ISBN 978-92-3-103832-7.
|
||||
- Thorpe, I. J. (1996). The Origins of Agriculture in Europe. New York: Routledge. ISBN 978-0-415-08009-5.
|
||||
|
||||
## External links
|
||||
|
||||
Duck at Wikipedia's sister projects
|
||||
|
||||
- Definitions from Wiktionary
|
||||
<!-- image -->
|
||||
- Media from Commons
|
||||
<!-- image -->
|
||||
- Quotations from Wikiquote
|
||||
<!-- image -->
|
||||
- Recipes from Wikibooks
|
||||
<!-- image -->
|
||||
- Taxa from Wikispecies
|
||||
<!-- image -->
|
||||
- Data from Wikidata
|
||||
<!-- image -->
|
||||
|
||||
- list of books (useful looking abstracts)
|
||||
- Ducks on postage stamps Archived 2013-05-13 at the Wayback Machine
|
||||
- Ducks at a Distance, by Rob Hines at Project Gutenberg - A modern illustrated guide to identification of US waterfowl
|
||||
|
||||
<!-- image -->
|
||||
|
||||
| Authority control databases | Authority control databases |
|
||||
|--------------------------------|----------------------------------------------|
|
||||
| National | United StatesFranceBnF dataJapanLatviaIsrael |
|
||||
| Other | IdRef |
|
||||
|
||||
<!-- image -->
|
||||
Retrieved from "https://en.wikipedia.org/w/index.php?title=Duck&oldid=1246843351"
|
||||
|
||||
Retrieved from ""
|
||||
|
||||
:
|
||||
Categories:
|
||||
|
||||
- Ducks
|
||||
- Game birds
|
||||
@ -516,9 +553,8 @@ Hidden categories:
|
||||
- Articles with Project Gutenberg links
|
||||
- Articles containing video clips
|
||||
|
||||
- This page was last edited on 21 September 2024, at 12:11 (UTC).
|
||||
- Text is available under the Creative Commons Attribution-ShareAlike License 4.0;
|
||||
additional terms may apply. By using this site, you agree to the Terms of Use and Privacy Policy. Wikipedia® is a registered trademark of the Wikimedia Foundation, Inc., a non-profit organization.
|
||||
- This page was last edited on 21 September 2024, at 12:11 (UTC).
|
||||
- Text is available under the Creative Commons Attribution-ShareAlike License 4.0; additional terms may apply. By using this site, you agree to the Terms of Use and Privacy Policy. Wikipedia® is a registered trademark of the Wikimedia Foundation, Inc., a non-profit organization.
|
||||
|
||||
- Privacy policy
|
||||
- About Wikipedia
|
||||
@ -528,4 +564,11 @@ additional terms may apply. By using this site, you agree to the Terms of Use an
|
||||
- Developers
|
||||
- Statistics
|
||||
- Cookie statement
|
||||
- Mobile view
|
||||
- Mobile view
|
||||
|
||||
Wikimedia Foundation
|
||||
|
||||
<!-- image -->
|
||||
Powered by MediaWiki
|
||||
|
||||
<!-- image -->
|
21
tests/data/html/example_09.html
vendored
Normal file
21
tests/data/html/example_09.html
vendored
Normal file
@ -0,0 +1,21 @@
|
||||
<html>
|
||||
<body>
|
||||
<h1>Introduction to parsing HTML files with <img src="https://docling-project.github.io/docling/assets/logo.png" alt="Docling" height="64"> Docling</h1>
|
||||
<p>Docling simplifies document processing, parsing diverse formats — including HTML — and providing seamless integrations with the gen AI ecosystem.</p>
|
||||
<h2>Supported file formats</h2>
|
||||
<p>Docling supports multiple file formats..</p>
|
||||
<ul>
|
||||
<li><img src="https://github.com/docling-project/docling/tree/main/docs/assets/pdf.png" height="32" alt="PDF">Advanced PDF understanding</li>
|
||||
<li><img src="https://github.com/docling-project/docling/tree/main/docs/assets/docx.png" height="32" alt="DOCX">Microsoft Office DOCX</li>
|
||||
<li><img src="https://github.com/docling-project/docling/tree/main/docs/assets/html.png" height="32" alt="HTML">HTML files (with optional support for images)</li>
|
||||
</ul>
|
||||
<h3>Three backends for handling HTML files</h3>
|
||||
<p>Docling has three backends for parsing HTML files:</p>
|
||||
<ol>
|
||||
<li><b>HTMLDocumentBackend</b> Ignores images</li>
|
||||
<li><b>HTMLDocumentBackendImagesInline</b> Extracts images inline</li>
|
||||
<li><b>HTMLDocumentBackendImagesReferenced</b> Extracts images as references</li>
|
||||
</ol>
|
||||
</body>
|
||||
</html>
|
||||
|
842
tests/data/jats/bmj_sample.xml
vendored
842
tests/data/jats/bmj_sample.xml
vendored
@ -1,842 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1 20151215//EN" "JATS-journalpublishing1.dtd">
|
||||
<article article-type="research-article" dtd-version="1.1" xml:lang="en"
|
||||
xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" >
|
||||
<front>
|
||||
<journal-meta>
|
||||
<journal-id journal-id-type="pmc">bmj</journal-id>
|
||||
<journal-id journal-id-type="pubmed">BMJ</journal-id>
|
||||
<journal-id journal-id-type="publisher">BMJ</journal-id>
|
||||
<issn>0959-8138</issn>
|
||||
<publisher>
|
||||
<publisher-name>BMJ</publisher-name>
|
||||
</publisher>
|
||||
</journal-meta>
|
||||
<article-meta>
|
||||
<article-id pub-id-type="other">jBMJ.v324.i7342.pg880</article-id>
|
||||
<article-id pub-id-type="pmid">11950738</article-id>
|
||||
<article-categories>
|
||||
<subj-group>
|
||||
<subject>Primary care</subject>
|
||||
<subj-group>
|
||||
<subject>190</subject>
|
||||
<subject>10</subject>
|
||||
<subject>218</subject>
|
||||
<subject>219</subject>
|
||||
<subject>355</subject>
|
||||
<subject>357</subject>
|
||||
</subj-group>
|
||||
</subj-group>
|
||||
</article-categories>
|
||||
<title-group>
|
||||
<article-title>Evolving general practice consultation in Britain: issues of length and
|
||||
context</article-title>
|
||||
</title-group>
|
||||
<contrib-group>
|
||||
<contrib contrib-type="author">
|
||||
<name>
|
||||
<surname>Freeman</surname>
|
||||
<given-names>George K</given-names>
|
||||
</name>
|
||||
<role>professor of general practice</role>
|
||||
<xref ref-type="aff" rid="aff-a"/>
|
||||
</contrib>
|
||||
<contrib contrib-type="author">
|
||||
<name>
|
||||
<surname>Horder</surname>
|
||||
<given-names>John P</given-names>
|
||||
</name>
|
||||
<role>past president</role>
|
||||
<xref ref-type="aff" rid="aff-b"/>
|
||||
</contrib>
|
||||
<contrib contrib-type="author">
|
||||
<name>
|
||||
<surname>Howie</surname>
|
||||
<given-names>John G R</given-names>
|
||||
</name>
|
||||
<role>emeritus professor of general practice</role>
|
||||
<xref ref-type="aff" rid="aff-c"/>
|
||||
</contrib>
|
||||
<contrib contrib-type="author">
|
||||
<name>
|
||||
<surname>Hungin</surname>
|
||||
<given-names>A Pali</given-names>
|
||||
</name>
|
||||
<role>professor of general practice</role>
|
||||
<xref ref-type="aff" rid="aff-d"/>
|
||||
</contrib>
|
||||
<contrib contrib-type="author">
|
||||
<name>
|
||||
<surname>Hill</surname>
|
||||
<given-names>Alison P</given-names>
|
||||
</name>
|
||||
<role>general practitioner</role>
|
||||
<xref ref-type="aff" rid="aff-e"/>
|
||||
</contrib>
|
||||
<contrib contrib-type="author">
|
||||
<name>
|
||||
<surname>Shah</surname>
|
||||
<given-names>Nayan C</given-names>
|
||||
</name>
|
||||
<role>general practitioner</role>
|
||||
<xref ref-type="aff" rid="aff-b"/>
|
||||
</contrib>
|
||||
<contrib contrib-type="author">
|
||||
<name>
|
||||
<surname>Wilson</surname>
|
||||
<given-names>Andrew</given-names>
|
||||
</name>
|
||||
<role>senior lecturer</role>
|
||||
<xref ref-type="aff" rid="aff-f"/>
|
||||
</contrib>
|
||||
</contrib-group>
|
||||
<aff id="aff-a">Centre for Primary Care and Social Medicine, Imperial College of Science,
|
||||
Technology and Medicine, London W6 8RP</aff>
|
||||
<aff id="aff-b">Royal College of General Practitioners, London SW7 1PU</aff>
|
||||
<aff id="aff-c">Department of General Practice, University of Edinburgh, Edinburgh EH8 9DX</aff>
|
||||
<aff id="aff-d">Centre for Health Studies, University of Durham, Durham DH1 3HN</aff>
|
||||
<aff id="aff-e">Kilburn Park Medical Centre, London NW6</aff>
|
||||
<aff id="aff-f">Department of General Practice and Primary Health Care, University of Leicester,
|
||||
Leicester LE5 4PW</aff>
|
||||
<author-notes>
|
||||
<fn fn-type="con">
|
||||
<p>Contributors: GKF wrote the paper and revised it after repeated and detailed comments from
|
||||
all of the other authors and feedback from the first referee and from the <italic>BMJ</italic>
|
||||
editorial panel. All other authors gave detailed and repeated comments and cristicisms. GKF is
|
||||
the guarantor of the paper.</p>
|
||||
</fn>
|
||||
<fn>
|
||||
<p>Correspondence to: G Freeman <email>g.freeman@ic.ac.uk</email> </p>
|
||||
</fn>
|
||||
</author-notes>
|
||||
<pub-date date-type="pub" publication-format="print" iso-8601-date="2002-04-13">
|
||||
<day>13</day>
|
||||
<month>4</month>
|
||||
<year>2002</year>
|
||||
</pub-date>
|
||||
<volume>324</volume>
|
||||
<issue>7342</issue>
|
||||
<fpage>880</fpage>
|
||||
<lpage>882</lpage>
|
||||
<history>
|
||||
<date date-type="accepted" iso-8601-date="2002-02-07" publication-format="print">
|
||||
<day>7</day>
|
||||
<month>2</month>
|
||||
<year>2002</year>
|
||||
</date>
|
||||
</history>
|
||||
<permissions>
|
||||
<copyright-statement>Copyright © 2002, BMJ</copyright-statement>
|
||||
<copyright-year>2002, </copyright-year>
|
||||
</permissions>
|
||||
</article-meta>
|
||||
</front>
|
||||
<body>
|
||||
<p>In 1999 Shah<xref ref-type="bibr" rid="B1">1</xref> and others said that the Royal College of
|
||||
General Practitioners should advocate longer consultations in general practice as a matter of
|
||||
policy. The college set up a working group chaired by A P Hungin, and a systematic review of
|
||||
literature on consultation length in general practice was commissioned. The working group agreed
|
||||
that the available evidence would be hard to interpret without discussion of the changing context
|
||||
within which consultations now take place. For many years general practitioners and those who
|
||||
have surveyed patients' opinions in the United Kingdom have complained about short consultation
|
||||
time, despite a steady increase in actual mean length. Recently Mechanic pointed out that this is
|
||||
also true in the United States.<xref ref-type="bibr" rid="B2">2</xref> Is there any justification
|
||||
for a further increase in mean time allocated per consultation in general practice?</p>
|
||||
<p>We report on the outcome of extensive debate among a group of general practitioners with an
|
||||
interest in the process of care, with reference to the interim findings of the commissioned
|
||||
systematic review and our personal databases. The review identified 14 relevant papers. <boxed-text>
|
||||
<sec>
|
||||
<title>Summary points</title>
|
||||
<p> <list list-type="bullet">
|
||||
<list-item>
|
||||
<p>Longer consultations are associated with a range of better patient outcomes</p>
|
||||
</list-item>
|
||||
<list-item>
|
||||
<p>Modern consultations in general practice deal with patients with more serious and chronic
|
||||
conditions</p>
|
||||
</list-item>
|
||||
<list-item>
|
||||
<p>Increasing patient participation means more complex interaction, which demands extra
|
||||
time</p>
|
||||
</list-item>
|
||||
<list-item>
|
||||
<p>Difficulties with access and with loss of continuity add to perceived stress and poor
|
||||
performance and lead to further pressure on time</p>
|
||||
</list-item>
|
||||
<list-item>
|
||||
<p>Longer consultations should be a professional priority, combined with increased use of
|
||||
technology and more flexible practice management to maximise interpersonal continuity</p>
|
||||
</list-item>
|
||||
<list-item>
|
||||
<p>Research on implementation is needed</p>
|
||||
</list-item>
|
||||
</list> </p>
|
||||
</sec>
|
||||
</boxed-text> </p>
|
||||
<sec sec-type="subjects">
|
||||
<title>Longer consultations: benefits for patients</title>
|
||||
<p>The systematic review consistently showed that doctors with longer consultation times
|
||||
prescribe less and offer more advice on lifestyle and other health promoting activities. Longer
|
||||
consultations have been significantly associated with better recognition and handling of
|
||||
psychosocial problems<xref ref-type="bibr" rid="B3">3</xref> and with better patient
|
||||
enablement.<xref ref-type="bibr" rid="B4">4</xref> Also clinical care for some chronic illnesses
|
||||
is better in practices with longer booked intervals between one appointment and the next.<xref
|
||||
ref-type="bibr" rid="B5">5</xref> It is not clear whether time is itself the main influence or
|
||||
whether some doctors insist on more time.</p>
|
||||
<p>A national survey in 1998 reported that most (87%) patients were satisfied with the
|
||||
length of their most recent consultation.<xref ref-type="bibr" rid="B6">6</xref> Satisfaction
|
||||
with any service will be high if expectations are met or exceeded. But expectations are modified
|
||||
by previous experience.<xref ref-type="bibr" rid="B7">7</xref> The result is that primary care
|
||||
patients are likely to be satisfied with what they are used to unless the context modifies the
|
||||
effects of their own experience.</p>
|
||||
</sec>
|
||||
<sec>
|
||||
<title>Context of modern consultations</title>
|
||||
<p>Shorter consultations were more appropriate when the population was younger, when even a brief
|
||||
absence from employment due to sickness required a doctor's note, and when many simple remedies
|
||||
were available only on prescription. Recently at least five important influences have increased
|
||||
the content and hence the potential length of the consultation.</p>
|
||||
</sec>
|
||||
<sec>
|
||||
<title>Participatory consultation style</title>
|
||||
<p>The most effective consultations are those in which doctors most directly acknowledge and
|
||||
perhaps respond to patients' problems and concerns. In addition, for patients to be committed to
|
||||
taking advantage of medical advice they must agree with both the goals and methods proposed. A
|
||||
landmark publication in the United Kingdom was <italic>Meetings Between Experts</italic>, which
|
||||
argued that while doctors are the experts about medical problems in general patients are the
|
||||
experts on how they themselves experience these problems.<xref ref-type="bibr" rid="B8">8</xref>
|
||||
New emphasis on teaching consulting skills in general practice advocated specific attention to
|
||||
the patient's agenda, beliefs, understanding, and agreement. Currently the General Medical
|
||||
Council, aware that communication difficulties underlie many complaints about doctors, has
|
||||
further emphasised the importance of involving patients in consultations in its revised guidance
|
||||
to medical schools.<xref ref-type="bibr" rid="B9">9</xref> More patient involvement should give
|
||||
a better outcome, but this participatory style usually lengthens consultations.</p>
|
||||
</sec>
|
||||
<sec>
|
||||
<title>Extended professional agenda</title>
|
||||
<p>The traditional consultation in general practice was brief.<xref ref-type="bibr" rid="B2"
|
||||
>2</xref> The patient presented symptoms and the doctor prescribed treatment. In 1957 Balint
|
||||
gave new insights into the meaning of symptoms.<xref ref-type="bibr" rid="B10">10</xref> By 1979
|
||||
an enhanced model of consultation was presented, in which the doctors dealt with ongoing as well
|
||||
as presenting problems and added health promotion and education about future appropriate use of
|
||||
services.<xref ref-type="bibr" rid="B11">11</xref> Now, with an ageing population and more
|
||||
community care of chronic illness, there are more issues to be considered at each consultation.
|
||||
Ideas of what constitutes good general practice are more complex.<xref ref-type="bibr" rid="B12"
|
||||
>12</xref> Good practice now includes both extended care of chronic medical problems—for
|
||||
example, coronary heart disease<xref ref-type="bibr" rid="B13">13</xref>—and a public
|
||||
health role. At first this model was restricted to those who lead change (“early
|
||||
adopters”) and enthusiasts<xref ref-type="bibr" rid="B14">14</xref> but now it is
|
||||
embedded in professional and managerial expectations of good practice.</p>
|
||||
<p>Adequate time is essential. It may be difficult for an elderly patient with several active
|
||||
problems to undress, be examined, and get adequate professional consideration in under 15
|
||||
minutes. Here the doctor is faced with the choice of curtailing the consultation or of reducing
|
||||
the time available for the next patient. Having to cope with these situations often contributes
|
||||
to professional dissatisfaction.<xref ref-type="bibr" rid="B15">15</xref> This combination of
|
||||
more care, more options, and more genuine discussion of those options with informed patient
|
||||
choice inevitably leads to pressure on time.</p>
|
||||
</sec>
|
||||
<sec>
|
||||
<title>Access problems</title>
|
||||
<p>In a service free at the point of access, rising demand will tend to increase rationing by
|
||||
delay. But attempts to improve access by offering more consultations at short notice squeeze
|
||||
consultation times.</p>
|
||||
<p>While appointment systems can and should reduce queuing time for consultations, they have long
|
||||
tended to be used as a brake on total demand.<xref ref-type="bibr" rid="B16">16</xref> This may
|
||||
seriously erode patients' confidence in being able to see their doctor or nurse when they need
|
||||
to. Patients are offered appointments further ahead but may keep these even if their symptoms
|
||||
have remitted “just in case.” Availability of consultations is thus blocked.
|
||||
Receptionists are then inappropriately blamed for the inadequate access to doctors.</p>
|
||||
<p>In response to perception of delay, the government has set targets in the NHS plan of
|
||||
“guaranteed access to a primary care professional within 24 hours and to a primary care
|
||||
doctor within 48 hours.” Implementation is currently being negotiated.</p>
|
||||
<p>Virtually all patients think that they would not consult unless it was absolutely necessary.
|
||||
They do not think they are wasting NHS time and do not like being made to feel so. But
|
||||
underlying general practitioners' willingness to make patients wait several days is their
|
||||
perception that few of the problems are urgent. Patients and general practitioners evidently do
|
||||
not agree about the urgency of so called minor problems. To some extent general practice in the
|
||||
United Kingdom may have scored an “own goal” by setting up perceived access
|
||||
barriers (appointment systems and out of hours cooperatives) in the attempt to increase
|
||||
professional standards and control demand in a service that is free at the point of access.</p>
|
||||
<p>A further government initiative has been to bypass general practice with new
|
||||
services—notably, walk-in centres (primary care clinics in which no appointment is
|
||||
needed) and NHS Direct (a professional telephone helpline giving advice on simple remedies and
|
||||
access to services). Introduced widely and rapidly, these services each potentially provide
|
||||
significant features of primary care—namely, quick access to skilled health advice and
|
||||
first line treatment.</p>
|
||||
</sec>
|
||||
<sec>
|
||||
<title>Loss of interpersonal continuity</title>
|
||||
<p>If a patient has to consult several different professionals, particularly over a short period
|
||||
of time, there is inevitable duplication of stories, risk of naive diagnoses, potential for
|
||||
conflicting advice, and perhaps loss of trust. Trust is essential if patients are to accept the
|
||||
“wait and see” management policy which is, or should be, an important part of the
|
||||
management of self limiting conditions, which are often on the boundary between illness and
|
||||
non-illness.<xref ref-type="bibr" rid="B17">17</xref> Such duplication again increases pressure
|
||||
for more extra (unscheduled) consultations resulting in late running and professional
|
||||
frustration.<xref ref-type="bibr" rid="B18">18</xref> </p>
|
||||
<p>Mechanic described how loss of longitudinal (and perhaps personal and relational<xref
|
||||
ref-type="bibr" rid="B19">19</xref>) continuity influences the perception and use of time
|
||||
through an inability to build on previous consultations.<xref ref-type="bibr" rid="B2">2</xref>
|
||||
Knowing the doctor well, particularly in smaller practices, is associated with enhanced patient
|
||||
enablement in shorter time.<xref ref-type="bibr" rid="B4">4</xref> Though Mechanic pointed out
|
||||
that three quarters of UK patients have been registered with their general practitioner five
|
||||
years or more, this may be misleading. Practices are growing, with larger teams and more
|
||||
registered patients. Being registered with a doctor in a larger practice is usually no guarantee
|
||||
that the patient will be able to see the same doctor or the doctor of his or her choice, who may
|
||||
be different. Thus the system does not encourage adequate personal continuity. This adds to
|
||||
pressure on time and reduces both patient and professional satisfaction.</p>
|
||||
</sec>
|
||||
<sec>
|
||||
<title>Health service reforms</title>
|
||||
<p>Finally, for the past 15 years the NHS has experienced unprecedented change with a succession
|
||||
of major administrative reforms. Recent reforms have focused on an NHS led by primary care,
|
||||
including the aim of shifting care from the secondary specialist sector to primary care. One
|
||||
consequence is increased demand for primary care of patients with more serious and less stable
|
||||
problems. With the limited piloting of reforms we do not know whether such major redirection can
|
||||
be achieved without greatly altering the delicate balance between expectations (of both patients
|
||||
and staff) and what is delivered.</p>
|
||||
</sec>
|
||||
<sec>
|
||||
<title>The future</title>
|
||||
<p>We think that the way ahead must embrace both longer mean consultation times and more
|
||||
flexibility. More time is needed for high quality consultations with patients with major and
|
||||
complex problems of all kinds. But patients also need access to simpler services and advice.
|
||||
This should be more appropriate (and cost less) when it is given by professionals who know the
|
||||
patient and his or her medical history and social circumstances. For doctors, the higher quality
|
||||
associated with longer consultations may lead to greater professional satisfaction and, if these
|
||||
longer consultations are combined with more realistic scheduling, to reduced levels of
|
||||
stress.<xref ref-type="bibr" rid="B20">20</xref> They will also find it easier to develop
|
||||
further the care of chronic disease.</p>
|
||||
<p>The challenge posed to general practice by walk-in centres and NHS Direct is considerable, and
|
||||
the diversion of funding from primary care is large. The risk of waste and duplication increases
|
||||
as more layers of complexity are added to a primary care service that started out as something
|
||||
familiar, simple, and local and which is still envied in other developed countries.<xref
|
||||
ref-type="bibr" rid="B21">21</xref> Access needs to be simple, and the advantages of personal
|
||||
knowledge and trust in minimising duplication and overmedicalisation need to be exploited.</p>
|
||||
<p>We must ensure better communication and access so that patients can more easily deal with
|
||||
minor issues and queries with someone they know and trust and avoid the formality and
|
||||
inconvenience of a full face to face consultation. Too often this has to be with a different
|
||||
professional, unfamiliar with the nuances of the case. There should be far more managerial
|
||||
emphasis on helping patients to interact with their chosen practitioner<xref ref-type="bibr"
|
||||
rid="B22">22</xref>; such a programme has been described.<xref ref-type="bibr" rid="B23"
|
||||
>23</xref> Modern information systems make it much easier to record which doctor(s) a patient
|
||||
prefers to see and to monitor how often this is achieved. The telephone is hardly modern but is
|
||||
underused. Email avoids the problems inherent in arranging simultaneous availability necessary
|
||||
for telephone consultations but at the cost of reducing the communication of emotions. There is
|
||||
a place for both.<xref ref-type="bibr" rid="B2">2</xref> Access without prior appointment is a
|
||||
valued feature of primary care, and we need to know more about the right balance between planned
|
||||
and ad hoc consulting.</p>
|
||||
</sec>
|
||||
<sec>
|
||||
<title>Next steps</title>
|
||||
<p>General practitioners do not behave in a uniform way. They can be categorised as slow, medium,
|
||||
and fast and react in different ways to changes in consulting speed.<xref ref-type="bibr"
|
||||
rid="B18">18</xref> They are likely to have differing views about a widespread move to lengthen
|
||||
consultation time. We do not need further confirmation that longer consultations are desirable
|
||||
and necessary, but research could show us the best way to learn how to introduce them with
|
||||
minimal disruption to the way in which patients and practices like primary care to be
|
||||
provided.<xref ref-type="bibr" rid="B24">24</xref> We also need to learn how to make the most of
|
||||
available time in complex consultations.</p>
|
||||
<p>Devising appropriate incentives and helping practices move beyond just reacting to demand in
|
||||
the traditional way by working harder and faster is perhaps our greatest challenge in the United
|
||||
Kingdom. The new primary are trusts need to work together with the growing primary care research
|
||||
networks to carry out the necessary development work. In particular, research is needed on how a
|
||||
primary care team can best provide the right balance of quick access and interpersonal knowledge
|
||||
and trust.</p>
|
||||
</sec>
|
||||
</body>
|
||||
<back>
|
||||
<ack>
|
||||
<p>We thank the other members of the working group: Susan Childs, Paul Freeling, Iona Heath,
|
||||
Marshall Marinker, and Bonnie Sibbald. We also thank Fenny Green of the Royal College of General
|
||||
Practitioners for administrative help.</p>
|
||||
</ack>
|
||||
<ref-list>
|
||||
<ref id="B1">
|
||||
<label>1</label>
|
||||
<element-citation publication-type="journal" publication-format="print">
|
||||
<person-group person-group-type="author"> <name>
|
||||
<surname>Shah</surname>
|
||||
<given-names>NC</given-names>
|
||||
</name> </person-group>
|
||||
<article-title>Viewpoint: Consultation time—time for a change? Still the
|
||||
“perfunctory work of perfunctory men!”</article-title>
|
||||
<source>Br J Gen Pract</source>
|
||||
<year iso-8601-date="1999">1999</year>
|
||||
<volume>49</volume>
|
||||
<fpage>497</fpage>
|
||||
</element-citation>
|
||||
</ref>
|
||||
<ref id="B2">
|
||||
<label>2</label>
|
||||
<element-citation publication-type="journal" publication-format="print">
|
||||
<person-group person-group-type="author"> <name>
|
||||
<surname>Mechanic</surname>
|
||||
<given-names>D</given-names>
|
||||
</name> </person-group>
|
||||
<article-title>How should hamsters run? Some observations about sufficient patient time in
|
||||
primary care</article-title>
|
||||
<source>BMJ</source>
|
||||
<year iso-8601-date="2001">2001</year>
|
||||
<volume>323</volume>
|
||||
<fpage>266</fpage>
|
||||
<lpage>268</lpage>
|
||||
<pub-id pub-id-type="pmid">11485957</pub-id>
|
||||
</element-citation>
|
||||
</ref>
|
||||
<ref id="B3">
|
||||
<label>3</label>
|
||||
<element-citation publication-type="journal" publication-format="print">
|
||||
<person-group person-group-type="author"> <name>
|
||||
<surname>Howie</surname>
|
||||
<given-names>JGR</given-names>
|
||||
</name> <name>
|
||||
<surname>Porter</surname>
|
||||
<given-names>AMD</given-names>
|
||||
</name> <name>
|
||||
<surname>Heaney</surname>
|
||||
<given-names>DJ</given-names>
|
||||
</name> <name>
|
||||
<surname>Hopton</surname>
|
||||
<given-names>JL</given-names>
|
||||
</name> </person-group>
|
||||
<article-title>Long to short consultation ratio: a proxy measure of quality of care for general
|
||||
practice</article-title>
|
||||
<source>Br J Gen Pract</source>
|
||||
<year iso-8601-date="1991">1991</year>
|
||||
<volume>41</volume>
|
||||
<fpage>48</fpage>
|
||||
<lpage>54</lpage>
|
||||
<pub-id pub-id-type="pmid">2031735</pub-id>
|
||||
</element-citation>
|
||||
</ref>
|
||||
<ref id="B4">
|
||||
<label>4</label>
|
||||
<element-citation publication-type="journal" publication-format="print">
|
||||
<person-group person-group-type="author"> <name>
|
||||
<surname>Howie</surname>
|
||||
<given-names>JGR</given-names>
|
||||
</name> <name>
|
||||
<surname>Heaney</surname>
|
||||
<given-names>DJ</given-names>
|
||||
</name> <name>
|
||||
<surname>Maxwell</surname>
|
||||
<given-names>M</given-names>
|
||||
</name> <name>
|
||||
<surname>Walker</surname>
|
||||
<given-names>JJ</given-names>
|
||||
</name> <name>
|
||||
<surname>Freeman</surname>
|
||||
<given-names>GK</given-names>
|
||||
</name> <name>
|
||||
<surname>Rai</surname>
|
||||
<given-names>H</given-names>
|
||||
</name> </person-group>
|
||||
<article-title>Quality at general practice consultations: cross-sectional
|
||||
survey</article-title>
|
||||
<source>BMJ</source>
|
||||
<year iso-8601-date="1999">1999</year>
|
||||
<volume>319</volume>
|
||||
<fpage>738</fpage>
|
||||
<lpage>743</lpage>
|
||||
<pub-id pub-id-type="pmid">10487999</pub-id>
|
||||
</element-citation>
|
||||
</ref>
|
||||
<ref id="B5">
|
||||
<label>5</label>
|
||||
<element-citation publication-type="journal" publication-format="print">
|
||||
<person-group person-group-type="author"> <name>
|
||||
<surname>Kaplan</surname>
|
||||
<given-names>SH</given-names>
|
||||
</name> <name>
|
||||
<surname>Greenfield</surname>
|
||||
<given-names>S</given-names>
|
||||
</name> <name>
|
||||
<surname>Ware</surname>
|
||||
<given-names>JE</given-names>
|
||||
</name> </person-group>
|
||||
<article-title>Assessing the effects of physician-patient interactions on the outcome of
|
||||
chronic disease</article-title>
|
||||
<source>Med Care</source>
|
||||
<year iso-8601-date="1989">1989</year>
|
||||
<volume>27</volume>
|
||||
<supplement>suppl 3</supplement>
|
||||
<fpage>110</fpage>
|
||||
<lpage>125</lpage>
|
||||
</element-citation>
|
||||
</ref>
|
||||
<ref id="B6">
|
||||
<label>6</label>
|
||||
<element-citation publication-type="book" publication-format="print">
|
||||
<person-group person-group-type="editor"> <name>
|
||||
<surname>Airey</surname>
|
||||
<given-names>C</given-names>
|
||||
</name> <name>
|
||||
<surname>Erens</surname>
|
||||
<given-names>B</given-names>
|
||||
</name> </person-group>
|
||||
<source>National surveys of NHS patients: general practice, 1998</source>
|
||||
<year iso-8601-date="1999">1999</year>
|
||||
<publisher-loc>London</publisher-loc>
|
||||
<publisher-name>NHS Executive</publisher-name>
|
||||
</element-citation>
|
||||
</ref>
|
||||
<ref id="B7">
|
||||
<label>7</label>
|
||||
<element-citation publication-type="journal" publication-format="print">
|
||||
<person-group person-group-type="author"> <name>
|
||||
<surname>Hart</surname>
|
||||
<given-names>JT</given-names>
|
||||
</name> </person-group>
|
||||
<article-title>Expectations of health care: promoted, managed or shared?</article-title>
|
||||
<source>Health Expect</source>
|
||||
<year iso-8601-date="1998">1998</year>
|
||||
<volume>1</volume>
|
||||
<fpage>3</fpage>
|
||||
<lpage>13</lpage>
|
||||
<pub-id pub-id-type="pmid">11281857</pub-id>
|
||||
</element-citation>
|
||||
</ref>
|
||||
<ref id="B8">
|
||||
<label>8</label>
|
||||
<element-citation publication-type="book" publication-format="print">
|
||||
<person-group person-group-type="author"> <name>
|
||||
<surname>Tuckett</surname>
|
||||
<given-names>D</given-names>
|
||||
</name> <name>
|
||||
<surname>Boulton</surname>
|
||||
<given-names>M</given-names>
|
||||
</name> <name>
|
||||
<surname>Olson</surname>
|
||||
<given-names>C</given-names>
|
||||
</name> <name>
|
||||
<surname>Williams</surname>
|
||||
<given-names>A</given-names>
|
||||
</name> </person-group>
|
||||
<source>Meetings between experts: an approach to sharing ideas in medical
|
||||
consultations</source>
|
||||
<year iso-8601-date="1985">1985</year>
|
||||
<publisher-loc>London</publisher-loc>
|
||||
<publisher-name>Tavistock Publications</publisher-name>
|
||||
</element-citation>
|
||||
</ref>
|
||||
<ref id="B9">
|
||||
<label>9</label>
|
||||
<mixed-citation publication-type="webpage" publication-format="web">General Medical Council.
|
||||
<source>Draft recommendations on undergraduate medical education</source>. July 2001.
|
||||
www.gmc-uk.org/med_ed/tomorrowsdoctors/index.htm (accessed 2 Jan 2002).</mixed-citation>
|
||||
</ref>
|
||||
<ref id="B10">
|
||||
<label>10</label>
|
||||
<element-citation publication-type="book" publication-format="print">
|
||||
<person-group person-group-type="author"> <name>
|
||||
<surname>Balint</surname>
|
||||
<given-names>M</given-names>
|
||||
</name> </person-group>
|
||||
<source>The doctor, his patient and the illness</source>
|
||||
<year iso-8601-date="1957">1957</year>
|
||||
<publisher-loc>London</publisher-loc>
|
||||
<publisher-name>Tavistock</publisher-name>
|
||||
</element-citation>
|
||||
</ref>
|
||||
<ref id="B11">
|
||||
<label>11</label>
|
||||
<element-citation publication-type="journal" publication-format="print">
|
||||
<person-group person-group-type="author"> <name>
|
||||
<surname>Stott</surname>
|
||||
<given-names>NCH</given-names>
|
||||
</name> <name>
|
||||
<surname>Davies</surname>
|
||||
<given-names>RH</given-names>
|
||||
</name> </person-group>
|
||||
<article-title>The exceptional potential in each primary care consultation</article-title>
|
||||
<source>J R Coll Gen Pract</source>
|
||||
<year iso-8601-date="1979">1979</year>
|
||||
<volume>29</volume>
|
||||
<fpage>210</fpage>
|
||||
<lpage>205</lpage>
|
||||
</element-citation>
|
||||
</ref>
|
||||
<ref id="B12">
|
||||
<label>12</label>
|
||||
<element-citation publication-type="book" publication-format="print">
|
||||
<person-group person-group-type="author"> <name>
|
||||
<surname>Hill</surname>
|
||||
<given-names>AP</given-names>
|
||||
</name> </person-group>
|
||||
<person-group person-group-type="editor"> <name>
|
||||
<surname>Hill</surname>
|
||||
<given-names>AP</given-names>
|
||||
</name> </person-group>
|
||||
<article-title>Challenges for primary care</article-title>
|
||||
<source>What's gone wrong with health care? Challenges for the new millennium</source>
|
||||
<year iso-8601-date="2000">2000</year>
|
||||
<publisher-loc>London</publisher-loc>
|
||||
<publisher-name>King's Fund</publisher-name>
|
||||
<fpage>75</fpage>
|
||||
<lpage>86</lpage>
|
||||
</element-citation>
|
||||
</ref>
|
||||
<ref id="B13">
|
||||
<label>13</label>
|
||||
<element-citation publication-type="book" publication-format="print">
|
||||
<collab>Department of Health</collab>
|
||||
<source>National service framework for coronary heart disease</source>
|
||||
<year iso-8601-date="2000">2000</year>
|
||||
<publisher-loc>London</publisher-loc>
|
||||
<publisher-name>Department of Health</publisher-name>
|
||||
</element-citation>
|
||||
</ref>
|
||||
<ref id="B14">
|
||||
<label>14</label>
|
||||
<element-citation publication-type="book" publication-format="print">
|
||||
<person-group person-group-type="author"> <name>
|
||||
<surname>Hart</surname>
|
||||
<given-names>JT</given-names>
|
||||
</name> </person-group>
|
||||
<source>A new kind of doctor: the general practitioner's part in the health of the
|
||||
community</source>
|
||||
<year iso-8601-date="1988">1988</year>
|
||||
<publisher-loc>London</publisher-loc>
|
||||
<publisher-name>Merlin Press</publisher-name>
|
||||
</element-citation>
|
||||
</ref>
|
||||
<ref id="B15">
|
||||
<label>15</label>
|
||||
<element-citation publication-type="journal" publication-format="print">
|
||||
<person-group person-group-type="author"> <name>
|
||||
<surname>Morrison</surname>
|
||||
<given-names>I</given-names>
|
||||
</name> <name>
|
||||
<surname>Smith</surname>
|
||||
<given-names>R</given-names>
|
||||
</name> </person-group>
|
||||
<article-title>Hamster health care</article-title>
|
||||
<source>BMJ</source>
|
||||
<year iso-8601-date="2000">2000</year>
|
||||
<volume>321</volume>
|
||||
<fpage>1541</fpage>
|
||||
<lpage>1542</lpage>
|
||||
<pub-id pub-id-type="pmid">11124164</pub-id>
|
||||
</element-citation>
|
||||
</ref>
|
||||
<ref id="B16">
|
||||
<label>16</label>
|
||||
<element-citation publication-type="journal" publication-format="print">
|
||||
<person-group person-group-type="author"> <name>
|
||||
<surname>Arber</surname>
|
||||
<given-names>S</given-names>
|
||||
</name> <name>
|
||||
<surname>Sawyer</surname>
|
||||
<given-names>L</given-names>
|
||||
</name> </person-group>
|
||||
<article-title>Do appointment systems work?</article-title>
|
||||
<source>BMJ</source>
|
||||
<year iso-8601-date="1982">1982</year>
|
||||
<volume>284</volume>
|
||||
<fpage>478</fpage>
|
||||
<lpage>480</lpage>
|
||||
<pub-id pub-id-type="pmid">6800503</pub-id>
|
||||
</element-citation>
|
||||
</ref>
|
||||
<ref id="B17">
|
||||
<label>17</label>
|
||||
<element-citation publication-type="journal" publication-format="print">
|
||||
<person-group person-group-type="author"> <name>
|
||||
<surname>Hjortdahl</surname>
|
||||
<given-names>P</given-names>
|
||||
</name> <name>
|
||||
<surname>Borchgrevink</surname>
|
||||
<given-names>CF</given-names>
|
||||
</name> </person-group>
|
||||
<article-title>Continuity of care: influence of general practitioners' knowledge about their
|
||||
patients on use of resources in consultations</article-title>
|
||||
<source>BMJ</source>
|
||||
<year iso-8601-date="1991">1991</year>
|
||||
<volume>303</volume>
|
||||
<fpage>1181</fpage>
|
||||
<lpage>1184</lpage>
|
||||
<pub-id pub-id-type="pmid">1747619</pub-id>
|
||||
</element-citation>
|
||||
</ref>
|
||||
<ref id="B18">
|
||||
<label>18</label>
|
||||
<element-citation publication-type="journal" publication-format="print">
|
||||
<person-group person-group-type="author"> <name>
|
||||
<surname>Howie</surname>
|
||||
<given-names>JGR</given-names>
|
||||
</name> <name>
|
||||
<surname>Hopton</surname>
|
||||
<given-names>JL</given-names>
|
||||
</name> <name>
|
||||
<surname>Heaney</surname>
|
||||
<given-names>DJ</given-names>
|
||||
</name> <name>
|
||||
<surname>Porter</surname>
|
||||
<given-names>AMD</given-names>
|
||||
</name> </person-group>
|
||||
<article-title>Attitudes to medical care, the organization of work, and stress among general
|
||||
practitioners</article-title>
|
||||
<source>Br J Gen Pract</source>
|
||||
<year iso-8601-date="1992">1992</year>
|
||||
<volume>42</volume>
|
||||
<fpage>181</fpage>
|
||||
<lpage>185</lpage>
|
||||
<pub-id pub-id-type="pmid">1389427</pub-id>
|
||||
</element-citation>
|
||||
</ref>
|
||||
<ref id="B19">
|
||||
<label>19</label>
|
||||
<element-citation publication-type="book" publication-format="web">
|
||||
<person-group person-group-type="author"> <name>
|
||||
<surname>Freeman</surname>
|
||||
<given-names>G</given-names>
|
||||
</name> <name>
|
||||
<surname>Shepperd</surname>
|
||||
<given-names>S</given-names>
|
||||
</name> <name>
|
||||
<surname>Robinson</surname>
|
||||
<given-names>I</given-names>
|
||||
</name> <name>
|
||||
<surname>Ehrich</surname>
|
||||
<given-names>K</given-names>
|
||||
</name> <name>
|
||||
<surname>Richards</surname>
|
||||
<given-names>SC</given-names>
|
||||
</name> <name>
|
||||
<surname>Pitman</surname>
|
||||
<given-names>P</given-names>
|
||||
</name> </person-group>
|
||||
<source>Continuity of care: report of a scoping exercise for the national co-ordinating centre
|
||||
for NHS Service Delivery and Organisation R&D (NCCSDO), Summer 2000</source>
|
||||
<year iso-8601-date="2001">2001</year>
|
||||
<publisher-loc>London</publisher-loc>
|
||||
<publisher-name>NCCSDO</publisher-name>
|
||||
<comment><ext-link ext-link-type="url" xmlns:xlink="http://www.w3.org/1999/xlink"
|
||||
xlink:href="http://www.sdo.lshtm.ac.uk/continuityofcare.htm"
|
||||
>www.sdo.lshtm.ac.uk/continuityofcare.htm</ext-link> (accessed 2 Jan 2002)</comment>
|
||||
</element-citation>
|
||||
</ref>
|
||||
<ref id="B20">
|
||||
<label>20</label>
|
||||
<element-citation publication-type="journal" publication-format="print">
|
||||
<person-group person-group-type="author"> <name>
|
||||
<surname>Wilson</surname>
|
||||
<given-names>A</given-names>
|
||||
</name> <name>
|
||||
<surname>McDonald</surname>
|
||||
<given-names>P</given-names>
|
||||
</name> <name>
|
||||
<surname>Hayes</surname>
|
||||
<given-names>L</given-names>
|
||||
</name> <name>
|
||||
<surname>Cooney</surname>
|
||||
<given-names>J</given-names>
|
||||
</name> </person-group>
|
||||
<article-title>Longer booking intervals in general practice: effects on doctors' stress and
|
||||
arousal</article-title>
|
||||
<source>Br J Gen Pract</source>
|
||||
<year iso-8601-date="1991">1991</year>
|
||||
<volume>41</volume>
|
||||
<fpage>184</fpage>
|
||||
<lpage>187</lpage>
|
||||
<pub-id pub-id-type="pmid">1878267</pub-id>
|
||||
</element-citation>
|
||||
</ref>
|
||||
<ref id="B21">
|
||||
<label>21</label>
|
||||
<element-citation publication-type="journal" publication-format="print">
|
||||
<person-group person-group-type="author"> <name>
|
||||
<surname>De Maeseneer</surname>
|
||||
<given-names>J</given-names>
|
||||
</name> <name>
|
||||
<surname>Hjortdahl</surname>
|
||||
<given-names>P</given-names>
|
||||
</name> <name>
|
||||
<surname>Starfield</surname>
|
||||
<given-names>B</given-names>
|
||||
</name> </person-group>
|
||||
<article-title>Fix what's wrong, not what's right, with general practice in
|
||||
Britain</article-title>
|
||||
<source>BMJ</source>
|
||||
<year iso-8601-date="2000">2000</year>
|
||||
<volume>320</volume>
|
||||
<fpage>1616</fpage>
|
||||
<lpage>1617</lpage>
|
||||
<pub-id pub-id-type="pmid">10856043</pub-id>
|
||||
</element-citation>
|
||||
</ref>
|
||||
<ref id="B22">
|
||||
<label>22</label>
|
||||
<element-citation publication-type="journal" publication-format="print">
|
||||
<person-group person-group-type="author"> <name>
|
||||
<surname>Freeman</surname>
|
||||
<given-names>G</given-names>
|
||||
</name> <name>
|
||||
<surname>Hjortdahl</surname>
|
||||
<given-names>P</given-names>
|
||||
</name> </person-group>
|
||||
<article-title>What future for continuity of care in general practice?</article-title>
|
||||
<source>BMJ</source>
|
||||
<year iso-8601-date="1997">1997</year>
|
||||
<volume>314</volume>
|
||||
<fpage>1870</fpage>
|
||||
<lpage>1873</lpage>
|
||||
<pub-id pub-id-type="pmid">9224130</pub-id>
|
||||
</element-citation>
|
||||
</ref>
|
||||
<ref id="B23">
|
||||
<label>23</label>
|
||||
<element-citation publication-type="journal" publication-format="print">
|
||||
<person-group person-group-type="author"> <name>
|
||||
<surname>Kibbe</surname>
|
||||
<given-names>DC</given-names>
|
||||
</name> <name>
|
||||
<surname>Bentz</surname>
|
||||
<given-names>E</given-names>
|
||||
</name> <name>
|
||||
<surname>McLaughlin</surname>
|
||||
<given-names>CP</given-names>
|
||||
</name> </person-group>
|
||||
<article-title>Continuous quality improvement for continuity of care</article-title>
|
||||
<source>J Fam Pract</source>
|
||||
<year iso-8601-date="1993">1993</year>
|
||||
<volume>36</volume>
|
||||
<fpage>304</fpage>
|
||||
<lpage>308</lpage>
|
||||
<pub-id pub-id-type="pmid">8454977</pub-id>
|
||||
</element-citation>
|
||||
</ref>
|
||||
<ref id="B24">
|
||||
<label>24</label>
|
||||
<element-citation publication-type="journal" publication-format="print">
|
||||
<person-group person-group-type="author"> <name>
|
||||
<surname>Williams</surname>
|
||||
<given-names>M</given-names>
|
||||
</name> <name>
|
||||
<surname>Neal</surname>
|
||||
<given-names>RD</given-names>
|
||||
</name> </person-group>
|
||||
<article-title>Time for a change? The process of lengthening booking intervals in general
|
||||
practice</article-title>
|
||||
<source>Br J Gen Pract</source>
|
||||
<year iso-8601-date="1998">1998</year>
|
||||
<volume>48</volume>
|
||||
<fpage>1783</fpage>
|
||||
<lpage>1786</lpage>
|
||||
<pub-id pub-id-type="pmid">10198490</pub-id>
|
||||
</element-citation>
|
||||
</ref>
|
||||
</ref-list>
|
||||
<fn-group>
|
||||
<fn id="fn1">
|
||||
<p>Funding: Meetings of the working group in 1999-2000 were funded by the
|
||||
<funding-source>Scientific Foundation Board of the RCGP</funding-source>.</p>
|
||||
</fn>
|
||||
<fn id="fn2">
|
||||
<p>Competing interests: None declared.</p>
|
||||
</fn>
|
||||
</fn-group>
|
||||
</back>
|
||||
</article>
|
3089
tests/data/jats/pnas_sample.xml
vendored
3089
tests/data/jats/pnas_sample.xml
vendored
File diff suppressed because it is too large
Load Diff
96
tests/data/jats/pntd.0008301.txt
vendored
96
tests/data/jats/pntd.0008301.txt
vendored
File diff suppressed because one or more lines are too long
96
tests/data/jats/pntd.0008301.xml
vendored
96
tests/data/jats/pntd.0008301.xml
vendored
File diff suppressed because one or more lines are too long
60
tests/data/jats/pone.0234687.txt
vendored
60
tests/data/jats/pone.0234687.txt
vendored
File diff suppressed because one or more lines are too long
60
tests/data/jats/pone.0234687.xml
vendored
60
tests/data/jats/pone.0234687.xml
vendored
File diff suppressed because one or more lines are too long
@ -1,8 +1,6 @@
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from docling.backend.html_backend import HTMLDocumentBackend
|
||||
from docling.datamodel.base_models import InputFormat
|
||||
from docling.datamodel.document import (
|
||||
@ -37,17 +35,15 @@ def test_heading_levels():
|
||||
if isinstance(item, SectionHeaderItem):
|
||||
if item.text == "Etymology":
|
||||
found_lvl_1 = True
|
||||
# h2 becomes level 1 because of h1 as title
|
||||
assert item.level == 1
|
||||
elif item.text == "Feeding":
|
||||
found_lvl_2 = True
|
||||
# h3 becomes level 2 because of h1 as title
|
||||
assert item.level == 2
|
||||
assert found_lvl_1 and found_lvl_2
|
||||
|
||||
|
||||
@pytest.mark.skip(
|
||||
"Temporarily disabled since docling-core>=2.21.0 does not support ordered lists "
|
||||
"with custom start value"
|
||||
)
|
||||
def test_ordered_lists():
|
||||
test_set: list[tuple[bytes, str]] = []
|
||||
|
||||
|
@ -14,9 +14,9 @@ from .verify_utils import verify_document, verify_export
|
||||
GENERATE = GEN_TEST_DATA
|
||||
|
||||
|
||||
def get_pubmed_paths():
|
||||
directory = Path(os.path.dirname(__file__) + "/data/pubmed/")
|
||||
xml_files = sorted(directory.rglob("*.xml"))
|
||||
def get_jats_paths():
|
||||
directory = Path(os.path.dirname(__file__) + "/data/jats/")
|
||||
xml_files = sorted(directory.rglob("*.nxml"))
|
||||
return xml_files
|
||||
|
||||
|
||||
@ -25,20 +25,20 @@ def get_converter():
|
||||
return converter
|
||||
|
||||
|
||||
def test_e2e_pubmed_conversions(use_stream=False):
|
||||
pubmed_paths = get_pubmed_paths()
|
||||
def test_e2e_jats_conversions(use_stream=False):
|
||||
jats_paths = get_jats_paths()
|
||||
converter = get_converter()
|
||||
|
||||
for pubmed_path in pubmed_paths:
|
||||
for jats_path in jats_paths:
|
||||
gt_path = (
|
||||
pubmed_path.parent.parent / "groundtruth" / "docling_v2" / pubmed_path.name
|
||||
jats_path.parent.parent / "groundtruth" / "docling_v2" / jats_path.name
|
||||
)
|
||||
if use_stream:
|
||||
buf = BytesIO(pubmed_path.open("rb").read())
|
||||
stream = DocumentStream(name=pubmed_path.name, stream=buf)
|
||||
buf = BytesIO(jats_path.open("rb").read())
|
||||
stream = DocumentStream(name=jats_path.name, stream=buf)
|
||||
conv_result: ConversionResult = converter.convert(stream)
|
||||
else:
|
||||
conv_result: ConversionResult = converter.convert(pubmed_path)
|
||||
conv_result: ConversionResult = converter.convert(jats_path)
|
||||
doc: DoclingDocument = conv_result.document
|
||||
|
||||
pred_md: str = doc.export_to_markdown()
|
||||
@ -54,9 +54,9 @@ def test_e2e_pubmed_conversions(use_stream=False):
|
||||
assert verify_document(doc, str(gt_path) + ".json", GENERATE), "export to json"
|
||||
|
||||
|
||||
def test_e2e_pubmed_conversions_stream():
|
||||
test_e2e_pubmed_conversions(use_stream=True)
|
||||
def test_e2e_jats_conversions_stream():
|
||||
test_e2e_jats_conversions(use_stream=True)
|
||||
|
||||
|
||||
def test_e2e_pubmed_conversions_no_stream():
|
||||
test_e2e_pubmed_conversions(use_stream=False)
|
||||
def test_e2e_jats_conversions_no_stream():
|
||||
test_e2e_jats_conversions(use_stream=False)
|
||||
|
4
uv.lock
generated
4
uv.lock
generated
@ -806,9 +806,10 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "docling"
|
||||
version = "2.41.0"
|
||||
version = "2.42.1"
|
||||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "accelerate" },
|
||||
{ name = "beautifulsoup4" },
|
||||
{ name = "certifi" },
|
||||
{ name = "docling-core", extra = ["chunking"] },
|
||||
@ -902,6 +903,7 @@ examples = [
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [
|
||||
{ name = "accelerate", specifier = ">=1.0.0,<2" },
|
||||
{ name = "accelerate", marker = "extra == 'vlm'", specifier = ">=1.2.1,<2.0.0" },
|
||||
{ name = "beautifulsoup4", specifier = ">=4.12.3,<5.0.0" },
|
||||
{ name = "certifi", specifier = ">=2024.7.4" },
|
||||
|
Loading…
Reference in New Issue
Block a user