From 5d08b749af67b07595f05819951e704d9e5b8ed4 Mon Sep 17 00:00:00 2001
From: vaaale <2428222+vaaale@users.noreply.github.com>
Date: Sat, 24 May 2025 22:25:51 +0200
Subject: [PATCH] A new HTML backend that handles styled html (ignors it) as
well as images.
Note: MyPy fails.
Seems to be a known issue with BeautifulSoup:
https://github.com/python/typeshed/pull/13604
- Fixed issues with handling nested lists.
- Fixed some issues with spaces between text fragments
- Change naming of image configuration from INLINE to EMBEDDED. Also renamed corresponding class.
- Introduced constat for default image width- / height.
Signed-off-by: vaaale <2428222+vaaale@users.noreply.github.com>
---
docling/backend/html_backend.py | 144 +-
docs/examples/backend_html.ipynb | 6040 +++++++++++++++++++++++++++++-
2 files changed, 6022 insertions(+), 162 deletions(-)
diff --git a/docling/backend/html_backend.py b/docling/backend/html_backend.py
index 458267e9..4a5c155d 100644
--- a/docling/backend/html_backend.py
+++ b/docling/backend/html_backend.py
@@ -7,6 +7,7 @@ from pathlib import Path
from typing import Optional, Union
import requests
+from PIL import Image, UnidentifiedImageError
from bs4 import BeautifulSoup, NavigableString, Tag
from docling_core.types.doc import (
DocItemLabel,
@@ -17,8 +18,7 @@ from docling_core.types.doc import (
TableData,
)
from docling_core.types.doc.document import ContentLayer, ImageRef
-from PIL import Image, UnidentifiedImageError
-from pydantic import AnyUrl, HttpUrl, ValidationError
+from pydantic import AnyUrl, ValidationError
from typing_extensions import override
from docling.backend.abstract_backend import DeclarativeDocumentBackend
@@ -27,15 +27,17 @@ from docling.datamodel.document import InputDocument
_log = logging.getLogger(__name__)
+DEFAULT_IMAGE_WIDTH = 128
+DEFAULT_IMAGE_HEIGHT = 128
+
# Tags that initiate distinct Docling items
_BLOCK_TAGS = {"h1", "h2", "h3", "h4", "h5", "h6", "p", "ul", "ol", "table"}
class ImageOptions(str, Enum):
"""Image options for HTML backend."""
-
NONE = "none"
- INLINE = "inline"
+ EMBEDDED = "embedded"
REFERENCED = "referenced"
@@ -49,7 +51,6 @@ class BaseHTMLDocumentBackend(DeclarativeDocumentBackend):
):
super().__init__(in_doc, path_or_stream)
self.image_options = image_options
- self.soup: Optional[Tag] = None
try:
raw = (
path_or_stream.getvalue()
@@ -88,35 +89,27 @@ class BaseHTMLDocumentBackend(DeclarativeDocumentBackend):
binary_hash=self.document_hash,
)
doc = DoclingDocument(name=self.file.stem or "file", origin=origin)
- _log.debug("Starting HTML conversion...")
- if not self.is_valid():
- raise RuntimeError("Invalid HTML document.")
- assert self.soup is not None
-
- # Remove all script/style content
+ title = self.soup.find("title")
+ if title:
+ doc.add_title(title.get_text())
+ # remove scripts/styles
for tag in self.soup.find_all(["script", "style"]):
tag.decompose()
body = self.soup.body or self.soup
- # Normalize
tags to newline strings
+ # normalize
for br in body.find_all("br"):
br.replace_with(NavigableString("\n"))
- # Decide content layer by presence of headers
headers = body.find(list(_BLOCK_TAGS))
self.content_layer = (
ContentLayer.BODY if headers is None else ContentLayer.FURNITURE
)
- # Walk the body to build the DoclingDocument
self._walk(body, doc, parent=doc.body)
return doc
def _walk(self, element: Tag, doc: DoclingDocument, parent) -> None:
- """
- Recursively walk element.contents, buffering inline text across tags like or ,
- emitting text nodes only at block boundaries, and extracting images immediately.
- """
buffer: list[str] = []
def flush_buffer():
@@ -126,88 +119,93 @@ class BaseHTMLDocumentBackend(DeclarativeDocumentBackend):
buffer.clear()
if not text:
return
- # Split on newlines for
for part in text.split("\n"):
seg = part.strip()
if seg:
doc.add_text(DocItemLabel.TEXT, seg, parent=parent)
for node in element.contents:
- # Skip scripts/styles
if isinstance(node, Tag) and node.name.lower() in ("script", "style"):
continue
- # Immediate image extraction
if isinstance(node, Tag) and node.name.lower() == "img":
flush_buffer()
self._emit_image(node, doc, parent)
continue
- # Block-level element triggers flush + handle
if isinstance(node, Tag) and node.name.lower() in _BLOCK_TAGS:
flush_buffer()
self._handle_block(node, doc, parent)
- # Inline tag with nested blocks: recurse
elif isinstance(node, Tag) and node.find(list(_BLOCK_TAGS)):
flush_buffer()
self._walk(node, doc, parent)
- # Inline text
elif isinstance(node, Tag):
buffer.append(node.get_text())
elif isinstance(node, NavigableString):
buffer.append(str(node))
- # Flush any remaining text
flush_buffer()
def _handle_block(self, tag: Tag, doc: DoclingDocument, parent) -> None:
tag_name = tag.name.lower()
- if tag_name == "h1":
- text = tag.get_text(strip=True)
- if text:
- doc.add_title(text, parent=parent)
- for img_tag in tag.find_all("img", recursive=True):
- self._emit_image(img_tag, doc, parent)
- elif tag_name in {"h2", "h3", "h4", "h5", "h6"}:
+
+ if tag_name in {"h1", "h2", "h3", "h4", "h5", "h6"}:
level = int(tag_name[1])
- text = tag.get_text(strip=True)
+ text = tag.get_text(strip=False)
if text:
- doc.add_heading(text, level=level, parent=parent)
+ doc.add_heading(text.strip(), level=level, parent=parent)
for img_tag in tag.find_all("img", recursive=True):
self._emit_image(img_tag, doc, parent)
+
elif tag_name == "p":
for part in tag.get_text().split("\n"):
seg = part.strip()
if seg:
doc.add_text(DocItemLabel.TEXT, seg, parent=parent)
- for img_tag in tag.find_all("img", recursive=True):
- self._emit_image(img_tag, doc, parent)
+ for img_tag in tag.find_all("img", recursive=True):
+ self._emit_image(img_tag, doc, parent)
+
elif tag_name in {"ul", "ol"}:
- is_ordered = tag_name == "ol"
- group = (
+ is_ordered = (tag_name == "ol")
+ # Create the list container
+ list_group = (
doc.add_ordered_list(parent=parent)
if is_ordered
else doc.add_unordered_list(parent=parent)
)
+
+ # For each top-level in this list
for li in tag.find_all("li", recursive=False):
- li_text = li.get_text(separator=" ", strip=True)
+ # 1) extract only the "direct" text from this
+ parts: list[str] = []
+ for child in li.contents:
+ if isinstance(child, NavigableString):
+ text_part = child.strip()
+ if text_part:
+ parts.append(text_part)
+ elif isinstance(child, Tag) and child.name not in ("ul", "ol"):
+ text_part = child.get_text(separator=" ", strip=True)
+ if text_part:
+ parts.append(text_part)
+ li_text = " ".join(parts)
+
+ # 2) add the list item
li_item = doc.add_list_item(
- text=li_text, enumerated=is_ordered, parent=group
+ text=li_text, enumerated=is_ordered, parent=list_group
)
- # Nested lists inside
- for sub in li.find_all(["ul", "ol"], recursive=False):
- self._handle_block(sub, doc, parent=group)
+
+ # 3) recurse into any nested lists, attaching them to this item
+ for sublist in li.find_all(["ul", "ol"], recursive=False):
+ self._handle_block(sublist, doc, parent=li_item)
+
+ # 4) extract any images under this
for img_tag in li.find_all("img", recursive=True):
self._emit_image(img_tag, doc, li_item)
+
elif tag_name == "table":
- # Add table item and extract nested images
data = self._parse_table(tag, doc, parent)
doc.add_table(data=data, parent=parent)
def _emit_image(self, img_tag: Tag, doc: DoclingDocument, parent) -> None:
- """
- Helper to create a PictureItem (with optional CAPTION) for an
tag.
- """
-
- if ImageOptions.NONE == self.image_options:
+ if self.image_options == ImageOptions.NONE:
return
alt = (img_tag.get("alt") or "").strip()
@@ -215,46 +213,40 @@ class BaseHTMLDocumentBackend(DeclarativeDocumentBackend):
if alt:
caption_item = doc.add_text(DocItemLabel.CAPTION, alt, parent=parent)
- src_url = img_tag.get("src")
- width = img_tag.get("width", "128")
- height = img_tag.get("height", "128")
- img_ref = None
- if ImageOptions.INLINE == self.image_options:
+ src_url = img_tag.get("src", "")
+ width = img_tag.get("width", str(DEFAULT_IMAGE_WIDTH))
+ height = img_tag.get("height", str(DEFAULT_IMAGE_HEIGHT))
+ img_ref: Optional[ImageRef] = None
+
+ if self.image_options == ImageOptions.EMBEDDED:
try:
if src_url.startswith("http"):
img = Image.open(requests.get(src_url, stream=True).raw)
- elif src_url.startswith("file:"):
- img = Image.open(src_url)
elif src_url.startswith("data:"):
- image_data = re.sub("^data:image/.+;base64,", "", src_url)
- img = Image.open(BytesIO(base64.b64decode(image_data)))
+ data = re.sub(r"^data:image/.+;base64,", "", src_url)
+ img = Image.open(BytesIO(base64.b64decode(data)))
else:
return
- img_ref = ImageRef.from_pil(img, dpi=int(img.info.get("dpi")[0]))
- except (FileNotFoundError, UnidentifiedImageError) as ve:
- _log.warning(f"Could not load image (src={src_url}): {ve}")
+ img_ref = ImageRef.from_pil(img, dpi=int(img.info.get("dpi", (72,))[0]))
+ except (FileNotFoundError, UnidentifiedImageError) as e:
+ _log.warning(f"Could not load image (src={src_url}): {e}")
return
- elif ImageOptions.REFERENCED == self.image_options:
+
+ elif self.image_options == ImageOptions.REFERENCED:
try:
- img_url = AnyUrl(src_url)
img_ref = ImageRef(
- uri=img_url,
+ uri=AnyUrl(src_url),
dpi=72,
mimetype="image/png",
size=Size(width=float(width), height=float(height)),
)
- except ValidationError as ve:
- _log.warning(f"Could not load image (src={src_url}): {ve}")
+ except ValidationError as e:
+ _log.warning(f"Could not load image (src={src_url}): {e}")
return
doc.add_picture(image=img_ref, caption=caption_item, parent=parent)
def _parse_table(self, table_tag: Tag, doc: DoclingDocument, parent) -> TableData:
- """
- Convert an HTML table into TableData, capturing cell spans and text,
- and emitting any nested images as PictureItems.
- """
- # Build TableData
rows = []
for sec in ("thead", "tbody", "tfoot"):
section = table_tag.find(sec)
@@ -262,9 +254,11 @@ class BaseHTMLDocumentBackend(DeclarativeDocumentBackend):
rows.extend(section.find_all("tr", recursive=False))
if not rows:
rows = table_tag.find_all("tr", recursive=False)
+
occupied: dict[tuple[int, int], bool] = {}
cells: list[TableCell] = []
max_cols = 0
+
for r, tr in enumerate(rows):
c = 0
for cell_tag in tr.find_all(("td", "th"), recursive=False):
@@ -292,9 +286,11 @@ class BaseHTMLDocumentBackend(DeclarativeDocumentBackend):
occupied[(r + dr, c + dc)] = True
c += cs
max_cols = max(max_cols, c)
- # Emit images inside this table
+
+ # emit any images in the table
for img_tag in table_tag.find_all("img", recursive=True):
self._emit_image(img_tag, doc, parent)
+
return TableData(table_cells=cells, num_rows=len(rows), num_cols=max_cols)
@@ -308,14 +304,14 @@ class HTMLDocumentBackend(BaseHTMLDocumentBackend):
super().__init__(in_doc, path_or_stream, image_options=ImageOptions.NONE)
-class HTMLDocumentBackendImagesInline(BaseHTMLDocumentBackend):
+class HTMLDocumentBackendImagesEmbedded(BaseHTMLDocumentBackend):
@override
def __init__(
self,
in_doc: InputDocument,
path_or_stream: Union[BytesIO, Path],
):
- super().__init__(in_doc, path_or_stream, image_options=ImageOptions.INLINE)
+ super().__init__(in_doc, path_or_stream, image_options=ImageOptions.EMBEDDED)
class HTMLDocumentBackendImagesReferenced(BaseHTMLDocumentBackend):
diff --git a/docs/examples/backend_html.ipynb b/docs/examples/backend_html.ipynb
index beb731bc..2e840fea 100644
--- a/docs/examples/backend_html.ipynb
+++ b/docs/examples/backend_html.ipynb
@@ -23,21 +23,19 @@
},
{
"cell_type": "code",
- "execution_count": 1,
"id": "ba735966c052d9ab",
"metadata": {
"ExecuteTime": {
- "end_time": "2025-04-17T11:35:09.420830Z",
- "start_time": "2025-04-17T11:35:06.445943Z"
+ "end_time": "2025-05-24T19:53:12.367346Z",
+ "start_time": "2025-05-24T19:53:09.291989Z"
}
},
- "outputs": [],
"source": [
"from docling_core.types.doc import ImageRefMode\n",
"\n",
"from docling.backend.html_backend import (\n",
" HTMLDocumentBackend,\n",
- " HTMLDocumentBackendImagesInline,\n",
+ " HTMLDocumentBackendImagesEmbedded,\n",
" HTMLDocumentBackendImagesReferenced,\n",
")\n",
"from docling.datamodel.base_models import InputFormat\n",
@@ -48,19 +46,19 @@
" PdfFormatOption,\n",
")\n",
"from docling.pipeline.simple_pipeline import SimplePipeline"
- ]
+ ],
+ "outputs": [],
+ "execution_count": 1
},
{
"cell_type": "code",
- "execution_count": 2,
"id": "86f2468b5e03bd2e",
"metadata": {
"ExecuteTime": {
- "end_time": "2025-04-17T11:35:09.426930Z",
- "start_time": "2025-04-17T11:35:09.424303Z"
+ "end_time": "2025-05-24T19:53:12.477140Z",
+ "start_time": "2025-05-24T19:53:12.474402Z"
}
},
- "outputs": [],
"source": [
"def pdf_pipeline():\n",
" pipeline_options = PdfPipelineOptions()\n",
@@ -70,19 +68,19 @@
" pipeline_options.table_structure_options.do_cell_matching = True\n",
" pipeline_options.generate_page_images = True\n",
" return pipeline_options"
- ]
+ ],
+ "outputs": [],
+ "execution_count": 2
},
{
"cell_type": "code",
- "execution_count": 3,
"id": "a428f20a1724beb3",
"metadata": {
"ExecuteTime": {
- "end_time": "2025-04-17T11:35:09.500904Z",
- "start_time": "2025-04-17T11:35:09.498507Z"
+ "end_time": "2025-05-24T19:53:12.527478Z",
+ "start_time": "2025-05-24T19:53:12.524972Z"
}
},
- "outputs": [],
"source": [
"# Using the HTMLDocumentBackend to convert HTML files. Images will be ignored\n",
"doc_converter = (\n",
@@ -98,30 +96,19 @@
" },\n",
" )\n",
")"
- ]
+ ],
+ "outputs": [],
+ "execution_count": 3
},
{
"cell_type": "code",
- "execution_count": 4,
"id": "8f11e488b6e81339",
"metadata": {
"ExecuteTime": {
- "end_time": "2025-04-17T11:35:09.577033Z",
- "start_time": "2025-04-17T11:35:09.547937Z"
+ "end_time": "2025-05-24T19:53:12.595349Z",
+ "start_time": "2025-05-24T19:53:12.571403Z"
}
},
- "outputs": [
- {
- "data": {
- "text/markdown": "# Introduction to parsing HTML files withDocling\n\nDocling simplifies document processing, parsing diverse formats — including HTML — and providing seamless integrations with the gen AI ecosystem.\n\n### Supported file formats\n\nDocling supports multiple file formats..\n\n- Advanced PDF understanding\n- Microsoft Office DOCX\n- HTML files (with optional support for images)\n\n#### Three backends for handling HTML files\n\nDocling has three backends for parsing HTML files:\n\n1. HTMLDocumentBackend Ignores images\n2. HTMLDocumentBackendImagesInline Extracts images inline\n3. HTMLDocumentBackendImagesReferenced Extracts images as references",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
"source": [
"from IPython.display import Latex, Markdown, display\n",
"\n",
@@ -130,7 +117,20 @@
"document = result.document\n",
"markdown = document.export_to_markdown()\n",
"display(Markdown(markdown))"
- ]
+ ],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ],
+ "text/markdown": "## Introduction to parsing HTML files with Docling\n\nDocling simplifies document processing, parsing diverse formats — including HTML — and providing seamless integrations with the gen AI ecosystem.\n\n### Supported file formats\n\nDocling supports multiple file formats..\n\n- Advanced PDF understanding\n- Microsoft Office DOCX\n- HTML files (with optional support for images)\n\n#### Three backends for handling HTML files\n\nDocling has three backends for parsing HTML files:\n\n1. HTMLDocumentBackend Ignores images\n2. HTMLDocumentBackendImagesInline Extracts images inline\n3. HTMLDocumentBackendImagesReferenced Extracts images as references"
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "execution_count": 4
},
{
"cell_type": "markdown",
@@ -140,15 +140,13 @@
},
{
"cell_type": "code",
- "execution_count": 5,
"id": "e6c4ea8cf2cdf8c1",
"metadata": {
"ExecuteTime": {
- "end_time": "2025-04-17T11:35:09.595771Z",
- "start_time": "2025-04-17T11:35:09.592457Z"
+ "end_time": "2025-05-24T19:53:12.624815Z",
+ "start_time": "2025-05-24T19:53:12.621979Z"
}
},
- "outputs": [],
"source": [
"# Using the HTMLDocumentBackend to convert HTML files. Images will be extracted as references\n",
"doc_converter = (\n",
@@ -164,30 +162,19 @@
" },\n",
" )\n",
")"
- ]
+ ],
+ "outputs": [],
+ "execution_count": 5
},
{
"cell_type": "code",
- "execution_count": 6,
"id": "4f088a6b02ea54bd",
"metadata": {
"ExecuteTime": {
- "end_time": "2025-04-17T11:35:09.650443Z",
- "start_time": "2025-04-17T11:35:09.639985Z"
+ "end_time": "2025-05-24T19:53:12.679786Z",
+ "start_time": "2025-05-24T19:53:12.671326Z"
}
},
- "outputs": [
- {
- "data": {
- "text/markdown": "# Introduction to parsing HTML files withDocling\n\nDocling\n\n\n\nDocling simplifies document processing, parsing diverse formats — including HTML — and providing seamless integrations with the gen AI ecosystem.\n\n### Supported file formats\n\nDocling supports multiple file formats..\n\n- Advanced PDF understanding\n- PDF\n\n\n- Microsoft Office DOCX\n- DOCX\n\n\n- HTML files (with optional support for images)\n- HTML\n\n\n\n#### Three backends for handling HTML files\n\nDocling has three backends for parsing HTML files:\n\n1. HTMLDocumentBackend Ignores images\n2. HTMLDocumentBackendImagesInline Extracts images inline\n3. HTMLDocumentBackendImagesReferenced Extracts images as references",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
"source": [
"from IPython.display import Latex, Markdown, display\n",
"\n",
@@ -198,7 +185,20 @@
"# Does not show otherwise. Not sure why\n",
"markdown = markdown.replace(\"file://\", \"\")\n",
"display(Markdown(markdown))"
- ]
+ ],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ],
+ "text/markdown": "## Introduction to parsing HTML files with Docling\n\nDocling\n\n\n\nDocling simplifies document processing, parsing diverse formats — including HTML — and providing seamless integrations with the gen AI ecosystem.\n\n### Supported file formats\n\nDocling supports multiple file formats..\n\n- Advanced PDF understanding\n- PDF\n\n\n- Microsoft Office DOCX\n- DOCX\n\n\n- HTML files (with optional support for images)\n- HTML\n\n\n\n#### Three backends for handling HTML files\n\nDocling has three backends for parsing HTML files:\n\n1. HTMLDocumentBackend Ignores images\n2. HTMLDocumentBackendImagesInline Extracts images inline\n3. HTMLDocumentBackendImagesReferenced Extracts images as references"
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "execution_count": 6
},
{
"cell_type": "markdown",
@@ -208,15 +208,13 @@
},
{
"cell_type": "code",
- "execution_count": 7,
"id": "b1112e9c386805b2",
"metadata": {
"ExecuteTime": {
- "end_time": "2025-04-17T11:35:09.691056Z",
- "start_time": "2025-04-17T11:35:09.687780Z"
+ "end_time": "2025-05-24T19:53:12.725721Z",
+ "start_time": "2025-05-24T19:53:12.722455Z"
}
},
- "outputs": [],
"source": [
"# Using the HTMLDocumentBackend to convert HTML files. Images will be extracted as references\n",
"doc_converter = (\n",
@@ -226,45 +224,25 @@
" ], # whitelist formats, non-matching files are ignored.\n",
" format_options={\n",
" InputFormat.HTML: HTMLFormatOption(\n",
- " pipeline_cls=SimplePipeline, backend=HTMLDocumentBackendImagesInline\n",
+ " pipeline_cls=SimplePipeline, backend=HTMLDocumentBackendImagesEmbedded\n",
" ),\n",
" InputFormat.PDF: PdfFormatOption(pipeline_options=pdf_pipeline()),\n",
" },\n",
" )\n",
")"
- ]
+ ],
+ "outputs": [],
+ "execution_count": 7
},
{
"cell_type": "code",
- "execution_count": 8,
"id": "c37aa8c8afcacd16",
"metadata": {
"ExecuteTime": {
- "end_time": "2025-04-17T11:35:11.139730Z",
- "start_time": "2025-04-17T11:35:09.734469Z"
+ "end_time": "2025-05-24T19:53:14.244575Z",
+ "start_time": "2025-05-24T19:53:12.770749Z"
}
},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Could not load image (src=https://github.com/docling-project/docling/tree/main/docs/assets/pdf.png): cannot identify image file <_io.BytesIO object at 0x73d26d7658a0>\n",
- "Could not load image (src=https://github.com/docling-project/docling/tree/main/docs/assets/docx.png): cannot identify image file <_io.BytesIO object at 0x73d2675e5170>\n",
- "Could not load image (src=https://github.com/docling-project/docling/tree/main/docs/assets/html.png): cannot identify image file <_io.BytesIO object at 0x73d2675e4c20>\n"
- ]
- },
- {
- "data": {
- "text/markdown": "# Introduction to parsing HTML files withDocling\n\nDocling\n\n\n\nDocling simplifies document processing, parsing diverse formats — including HTML — and providing seamless integrations with the gen AI ecosystem.\n\n### Supported file formats\n\nDocling supports multiple file formats..\n\n- Advanced PDF understanding\n- PDF\n- Microsoft Office DOCX\n- DOCX\n- HTML files (with optional support for images)\n- HTML\n\n#### Three backends for handling HTML files\n\nDocling has three backends for parsing HTML files:\n\n1. HTMLDocumentBackend Ignores images\n2. HTMLDocumentBackendImagesInline Extracts images inline\n3. HTMLDocumentBackendImagesReferenced Extracts images as references",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
"source": [
"from IPython.display import Latex, Markdown, display\n",
"\n",
@@ -273,20 +251,5906 @@
"document = result.document\n",
"markdown = document.export_to_markdown(image_mode=ImageRefMode.EMBEDDED)\n",
"display(Markdown(markdown))"
- ]
+ ],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Could not load image (src=https://github.com/docling-project/docling/tree/main/docs/assets/pdf.png): cannot identify image file <_io.BytesIO object at 0x7269dfa022f0>\n",
+ "Could not load image (src=https://github.com/docling-project/docling/tree/main/docs/assets/docx.png): cannot identify image file <_io.BytesIO object at 0x7269dfa4b920>\n",
+ "Could not load image (src=https://github.com/docling-project/docling/tree/main/docs/assets/html.png): cannot identify image file <_io.BytesIO object at 0x7269dfa4b9c0>\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ],
+ "text/markdown": "## Introduction to parsing HTML files with Docling\n\nDocling\n\n\n\nDocling simplifies document processing, parsing diverse formats — including HTML — and providing seamless integrations with the gen AI ecosystem.\n\n### Supported file formats\n\nDocling supports multiple file formats..\n\n- Advanced PDF understanding\n- PDF\n- Microsoft Office DOCX\n- DOCX\n- HTML files (with optional support for images)\n- HTML\n\n#### Three backends for handling HTML files\n\nDocling has three backends for parsing HTML files:\n\n1. HTMLDocumentBackend Ignores images\n2. HTMLDocumentBackendImagesInline Extracts images inline\n3. HTMLDocumentBackendImagesReferenced Extracts images as references"
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "execution_count": 8
},
{
"cell_type": "code",
- "execution_count": null,
"id": "94f7cc6d7288c909",
"metadata": {
"ExecuteTime": {
- "end_time": "2025-04-17T11:35:11.185013Z",
- "start_time": "2025-04-17T11:35:11.182838Z"
+ "end_time": "2025-05-24T19:53:14.915566Z",
+ "start_time": "2025-05-24T19:53:14.262109Z"
}
},
+ "source": [
+ "from IPython.display import Latex, Markdown, display\n",
+ "\n",
+ "html_file = \"../../tests/data/html/wiki_duck.html\"\n",
+ "result = doc_converter.convert(html_file)\n",
+ "document = result.document\n",
+ "markdown = document.export_to_markdown(image_mode=ImageRefMode.EMBEDDED)\n",
+ "print(markdown)"
+ ],
+ "outputs": [
+ {
+ "ename": "AttributeError",
+ "evalue": "'TableCell' object has no attribute 'get_ref'",
+ "output_type": "error",
+ "traceback": [
+ "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
+ "\u001B[0;31mAttributeError\u001B[0m Traceback (most recent call last)",
+ "Cell \u001B[0;32mIn[9], line 4\u001B[0m\n\u001B[1;32m 1\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[38;5;21;01mIPython\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mdisplay\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[38;5;28;01mimport\u001B[39;00m Latex, Markdown, display\n\u001B[1;32m 3\u001B[0m html_file \u001B[38;5;241m=\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m../../tests/data/html/wiki_duck.html\u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m----> 4\u001B[0m result \u001B[38;5;241m=\u001B[39m \u001B[43mdoc_converter\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mconvert\u001B[49m\u001B[43m(\u001B[49m\u001B[43mhtml_file\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 5\u001B[0m document \u001B[38;5;241m=\u001B[39m result\u001B[38;5;241m.\u001B[39mdocument\n\u001B[1;32m 6\u001B[0m markdown \u001B[38;5;241m=\u001B[39m document\u001B[38;5;241m.\u001B[39mexport_to_markdown(image_mode\u001B[38;5;241m=\u001B[39mImageRefMode\u001B[38;5;241m.\u001B[39mEMBEDDED)\n",
+ "File \u001B[0;32m~/.cache/pypoetry/virtualenvs/docling-6_27CWgt-py3.10/lib/python3.10/site-packages/pydantic/_internal/_validate_call.py:39\u001B[0m, in \u001B[0;36mupdate_wrapper_attributes..wrapper_function\u001B[0;34m(*args, **kwargs)\u001B[0m\n\u001B[1;32m 37\u001B[0m \u001B[38;5;129m@functools\u001B[39m\u001B[38;5;241m.\u001B[39mwraps(wrapped)\n\u001B[1;32m 38\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[38;5;21mwrapper_function\u001B[39m(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs):\n\u001B[0;32m---> 39\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mwrapper\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n",
+ "File \u001B[0;32m~/.cache/pypoetry/virtualenvs/docling-6_27CWgt-py3.10/lib/python3.10/site-packages/pydantic/_internal/_validate_call.py:136\u001B[0m, in \u001B[0;36mValidateCallWrapper.__call__\u001B[0;34m(self, *args, **kwargs)\u001B[0m\n\u001B[1;32m 133\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m__pydantic_complete__:\n\u001B[1;32m 134\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_create_validators()\n\u001B[0;32m--> 136\u001B[0m res \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m__pydantic_validator__\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mvalidate_python\u001B[49m\u001B[43m(\u001B[49m\u001B[43mpydantic_core\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mArgsKwargs\u001B[49m\u001B[43m(\u001B[49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 137\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m__return_pydantic_validator__:\n\u001B[1;32m 138\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m__return_pydantic_validator__(res)\n",
+ "File \u001B[0;32m~/PycharmProjects/docling/docling/document_converter.py:222\u001B[0m, in \u001B[0;36mDocumentConverter.convert\u001B[0;34m(self, source, headers, raises_on_error, max_num_pages, max_file_size, page_range)\u001B[0m\n\u001B[1;32m 204\u001B[0m \u001B[38;5;129m@validate_call\u001B[39m(config\u001B[38;5;241m=\u001B[39mConfigDict(strict\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m))\n\u001B[1;32m 205\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[38;5;21mconvert\u001B[39m(\n\u001B[1;32m 206\u001B[0m \u001B[38;5;28mself\u001B[39m,\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 212\u001B[0m page_range: PageRange \u001B[38;5;241m=\u001B[39m DEFAULT_PAGE_RANGE,\n\u001B[1;32m 213\u001B[0m ) \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m>\u001B[39m ConversionResult:\n\u001B[1;32m 214\u001B[0m all_res \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mconvert_all(\n\u001B[1;32m 215\u001B[0m source\u001B[38;5;241m=\u001B[39m[source],\n\u001B[1;32m 216\u001B[0m raises_on_error\u001B[38;5;241m=\u001B[39mraises_on_error,\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 220\u001B[0m page_range\u001B[38;5;241m=\u001B[39mpage_range,\n\u001B[1;32m 221\u001B[0m )\n\u001B[0;32m--> 222\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mnext\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43mall_res\u001B[49m\u001B[43m)\u001B[49m\n",
+ "File \u001B[0;32m~/PycharmProjects/docling/docling/document_converter.py:245\u001B[0m, in \u001B[0;36mDocumentConverter.convert_all\u001B[0;34m(self, source, headers, raises_on_error, max_num_pages, max_file_size, page_range)\u001B[0m\n\u001B[1;32m 242\u001B[0m conv_res_iter \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_convert(conv_input, raises_on_error\u001B[38;5;241m=\u001B[39mraises_on_error)\n\u001B[1;32m 244\u001B[0m had_result \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mFalse\u001B[39;00m\n\u001B[0;32m--> 245\u001B[0m \u001B[38;5;28;01mfor\u001B[39;00m conv_res \u001B[38;5;129;01min\u001B[39;00m conv_res_iter:\n\u001B[1;32m 246\u001B[0m had_result \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mTrue\u001B[39;00m\n\u001B[1;32m 247\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m raises_on_error \u001B[38;5;129;01mand\u001B[39;00m conv_res\u001B[38;5;241m.\u001B[39mstatus \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;129;01min\u001B[39;00m {\n\u001B[1;32m 248\u001B[0m ConversionStatus\u001B[38;5;241m.\u001B[39mSUCCESS,\n\u001B[1;32m 249\u001B[0m ConversionStatus\u001B[38;5;241m.\u001B[39mPARTIAL_SUCCESS,\n\u001B[1;32m 250\u001B[0m }:\n",
+ "File \u001B[0;32m~/PycharmProjects/docling/docling/document_converter.py:280\u001B[0m, in \u001B[0;36mDocumentConverter._convert\u001B[0;34m(self, conv_input, raises_on_error)\u001B[0m\n\u001B[1;32m 271\u001B[0m _log\u001B[38;5;241m.\u001B[39minfo(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mGoing to convert document batch...\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m 273\u001B[0m \u001B[38;5;66;03m# parallel processing only within input_batch\u001B[39;00m\n\u001B[1;32m 274\u001B[0m \u001B[38;5;66;03m# with ThreadPoolExecutor(\u001B[39;00m\n\u001B[1;32m 275\u001B[0m \u001B[38;5;66;03m# max_workers=settings.perf.doc_batch_concurrency\u001B[39;00m\n\u001B[1;32m 276\u001B[0m \u001B[38;5;66;03m# ) as pool:\u001B[39;00m\n\u001B[1;32m 277\u001B[0m \u001B[38;5;66;03m# yield from pool.map(self.process_document, input_batch)\u001B[39;00m\n\u001B[1;32m 278\u001B[0m \u001B[38;5;66;03m# Note: PDF backends are not thread-safe, thread pool usage was disabled.\u001B[39;00m\n\u001B[0;32m--> 280\u001B[0m \u001B[38;5;28;01mfor\u001B[39;00m item \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28mmap\u001B[39m(\n\u001B[1;32m 281\u001B[0m partial(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_process_document, raises_on_error\u001B[38;5;241m=\u001B[39mraises_on_error),\n\u001B[1;32m 282\u001B[0m input_batch,\n\u001B[1;32m 283\u001B[0m ):\n\u001B[1;32m 284\u001B[0m elapsed \u001B[38;5;241m=\u001B[39m time\u001B[38;5;241m.\u001B[39mmonotonic() \u001B[38;5;241m-\u001B[39m start_time\n\u001B[1;32m 285\u001B[0m start_time \u001B[38;5;241m=\u001B[39m time\u001B[38;5;241m.\u001B[39mmonotonic()\n",
+ "File \u001B[0;32m~/PycharmProjects/docling/docling/document_converter.py:326\u001B[0m, in \u001B[0;36mDocumentConverter._process_document\u001B[0;34m(self, in_doc, raises_on_error)\u001B[0m\n\u001B[1;32m 322\u001B[0m valid \u001B[38;5;241m=\u001B[39m (\n\u001B[1;32m 323\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mallowed_formats \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m \u001B[38;5;129;01mand\u001B[39;00m in_doc\u001B[38;5;241m.\u001B[39mformat \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mallowed_formats\n\u001B[1;32m 324\u001B[0m )\n\u001B[1;32m 325\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m valid:\n\u001B[0;32m--> 326\u001B[0m conv_res \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_execute_pipeline\u001B[49m\u001B[43m(\u001B[49m\u001B[43min_doc\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mraises_on_error\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mraises_on_error\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 327\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m 328\u001B[0m error_message \u001B[38;5;241m=\u001B[39m \u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mFile format not allowed: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00min_doc\u001B[38;5;241m.\u001B[39mfile\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m\"\u001B[39m\n",
+ "File \u001B[0;32m~/PycharmProjects/docling/docling/document_converter.py:349\u001B[0m, in \u001B[0;36mDocumentConverter._execute_pipeline\u001B[0;34m(self, in_doc, raises_on_error)\u001B[0m\n\u001B[1;32m 347\u001B[0m pipeline \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_get_pipeline(in_doc\u001B[38;5;241m.\u001B[39mformat)\n\u001B[1;32m 348\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m pipeline \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[0;32m--> 349\u001B[0m conv_res \u001B[38;5;241m=\u001B[39m \u001B[43mpipeline\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mexecute\u001B[49m\u001B[43m(\u001B[49m\u001B[43min_doc\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mraises_on_error\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mraises_on_error\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 350\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m 351\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m raises_on_error:\n",
+ "File \u001B[0;32m~/PycharmProjects/docling/docling/pipeline/base_pipeline.py:54\u001B[0m, in \u001B[0;36mBasePipeline.execute\u001B[0;34m(self, in_doc, raises_on_error)\u001B[0m\n\u001B[1;32m 52\u001B[0m conv_res\u001B[38;5;241m.\u001B[39mstatus \u001B[38;5;241m=\u001B[39m ConversionStatus\u001B[38;5;241m.\u001B[39mFAILURE\n\u001B[1;32m 53\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m raises_on_error:\n\u001B[0;32m---> 54\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m e\n\u001B[1;32m 55\u001B[0m \u001B[38;5;28;01mfinally\u001B[39;00m:\n\u001B[1;32m 56\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_unload(conv_res)\n",
+ "File \u001B[0;32m~/PycharmProjects/docling/docling/pipeline/base_pipeline.py:46\u001B[0m, in \u001B[0;36mBasePipeline.execute\u001B[0;34m(self, in_doc, raises_on_error)\u001B[0m\n\u001B[1;32m 40\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[1;32m 41\u001B[0m \u001B[38;5;28;01mwith\u001B[39;00m TimeRecorder(\n\u001B[1;32m 42\u001B[0m conv_res, \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mpipeline_total\u001B[39m\u001B[38;5;124m\"\u001B[39m, scope\u001B[38;5;241m=\u001B[39mProfilingScope\u001B[38;5;241m.\u001B[39mDOCUMENT\n\u001B[1;32m 43\u001B[0m ):\n\u001B[1;32m 44\u001B[0m \u001B[38;5;66;03m# These steps are building and assembling the structure of the\u001B[39;00m\n\u001B[1;32m 45\u001B[0m \u001B[38;5;66;03m# output DoclingDocument.\u001B[39;00m\n\u001B[0;32m---> 46\u001B[0m conv_res \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_build_document\u001B[49m\u001B[43m(\u001B[49m\u001B[43mconv_res\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 47\u001B[0m conv_res \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_assemble_document(conv_res)\n\u001B[1;32m 48\u001B[0m \u001B[38;5;66;03m# From this stage, all operations should rely only on conv_res.output\u001B[39;00m\n",
+ "File \u001B[0;32m~/PycharmProjects/docling/docling/pipeline/simple_pipeline.py:40\u001B[0m, in \u001B[0;36mSimplePipeline._build_document\u001B[0;34m(self, conv_res)\u001B[0m\n\u001B[1;32m 33\u001B[0m \u001B[38;5;66;03m# conv_res.status = ConversionStatus.FAILURE\u001B[39;00m\n\u001B[1;32m 34\u001B[0m \u001B[38;5;66;03m# return conv_res\u001B[39;00m\n\u001B[1;32m 35\u001B[0m \n\u001B[1;32m 36\u001B[0m \u001B[38;5;66;03m# Instead of running a page-level pipeline to build up the document structure,\u001B[39;00m\n\u001B[1;32m 37\u001B[0m \u001B[38;5;66;03m# the backend is expected to be of type DeclarativeDocumentBackend, which can output\u001B[39;00m\n\u001B[1;32m 38\u001B[0m \u001B[38;5;66;03m# a DoclingDocument straight.\u001B[39;00m\n\u001B[1;32m 39\u001B[0m \u001B[38;5;28;01mwith\u001B[39;00m TimeRecorder(conv_res, \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mdoc_build\u001B[39m\u001B[38;5;124m\"\u001B[39m, scope\u001B[38;5;241m=\u001B[39mProfilingScope\u001B[38;5;241m.\u001B[39mDOCUMENT):\n\u001B[0;32m---> 40\u001B[0m conv_res\u001B[38;5;241m.\u001B[39mdocument \u001B[38;5;241m=\u001B[39m \u001B[43mconv_res\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43minput\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_backend\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mconvert\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 41\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m conv_res\n",
+ "File \u001B[0;32m~/PycharmProjects/docling/docling/backend/html_backend.py:106\u001B[0m, in \u001B[0;36mBaseHTMLDocumentBackend.convert\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 101\u001B[0m headers \u001B[38;5;241m=\u001B[39m body\u001B[38;5;241m.\u001B[39mfind(\u001B[38;5;28mlist\u001B[39m(_BLOCK_TAGS))\n\u001B[1;32m 102\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcontent_layer \u001B[38;5;241m=\u001B[39m (\n\u001B[1;32m 103\u001B[0m ContentLayer\u001B[38;5;241m.\u001B[39mBODY \u001B[38;5;28;01mif\u001B[39;00m headers \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m \u001B[38;5;28;01melse\u001B[39;00m ContentLayer\u001B[38;5;241m.\u001B[39mFURNITURE\n\u001B[1;32m 104\u001B[0m )\n\u001B[0;32m--> 106\u001B[0m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_walk\u001B[49m\u001B[43m(\u001B[49m\u001B[43mbody\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mdoc\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mparent\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mdoc\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mbody\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 107\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m doc\n",
+ "File \u001B[0;32m~/PycharmProjects/docling/docling/backend/html_backend.py:136\u001B[0m, in \u001B[0;36mBaseHTMLDocumentBackend._walk\u001B[0;34m(self, element, doc, parent)\u001B[0m\n\u001B[1;32m 134\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(node, Tag) \u001B[38;5;129;01mand\u001B[39;00m node\u001B[38;5;241m.\u001B[39mfind(\u001B[38;5;28mlist\u001B[39m(_BLOCK_TAGS)):\n\u001B[1;32m 135\u001B[0m flush_buffer()\n\u001B[0;32m--> 136\u001B[0m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_walk\u001B[49m\u001B[43m(\u001B[49m\u001B[43mnode\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mdoc\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mparent\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 137\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(node, Tag):\n\u001B[1;32m 138\u001B[0m buffer\u001B[38;5;241m.\u001B[39mappend(node\u001B[38;5;241m.\u001B[39mget_text())\n",
+ "File \u001B[0;32m~/PycharmProjects/docling/docling/backend/html_backend.py:136\u001B[0m, in \u001B[0;36mBaseHTMLDocumentBackend._walk\u001B[0;34m(self, element, doc, parent)\u001B[0m\n\u001B[1;32m 134\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(node, Tag) \u001B[38;5;129;01mand\u001B[39;00m node\u001B[38;5;241m.\u001B[39mfind(\u001B[38;5;28mlist\u001B[39m(_BLOCK_TAGS)):\n\u001B[1;32m 135\u001B[0m flush_buffer()\n\u001B[0;32m--> 136\u001B[0m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_walk\u001B[49m\u001B[43m(\u001B[49m\u001B[43mnode\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mdoc\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mparent\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 137\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(node, Tag):\n\u001B[1;32m 138\u001B[0m buffer\u001B[38;5;241m.\u001B[39mappend(node\u001B[38;5;241m.\u001B[39mget_text())\n",
+ " \u001B[0;31m[... skipping similar frames: BaseHTMLDocumentBackend._walk at line 136 (4 times)]\u001B[0m\n",
+ "File \u001B[0;32m~/PycharmProjects/docling/docling/backend/html_backend.py:136\u001B[0m, in \u001B[0;36mBaseHTMLDocumentBackend._walk\u001B[0;34m(self, element, doc, parent)\u001B[0m\n\u001B[1;32m 134\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(node, Tag) \u001B[38;5;129;01mand\u001B[39;00m node\u001B[38;5;241m.\u001B[39mfind(\u001B[38;5;28mlist\u001B[39m(_BLOCK_TAGS)):\n\u001B[1;32m 135\u001B[0m flush_buffer()\n\u001B[0;32m--> 136\u001B[0m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_walk\u001B[49m\u001B[43m(\u001B[49m\u001B[43mnode\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mdoc\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mparent\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 137\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(node, Tag):\n\u001B[1;32m 138\u001B[0m buffer\u001B[38;5;241m.\u001B[39mappend(node\u001B[38;5;241m.\u001B[39mget_text())\n",
+ "File \u001B[0;32m~/PycharmProjects/docling/docling/backend/html_backend.py:133\u001B[0m, in \u001B[0;36mBaseHTMLDocumentBackend._walk\u001B[0;34m(self, element, doc, parent)\u001B[0m\n\u001B[1;32m 131\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(node, Tag) \u001B[38;5;129;01mand\u001B[39;00m node\u001B[38;5;241m.\u001B[39mname\u001B[38;5;241m.\u001B[39mlower() \u001B[38;5;129;01min\u001B[39;00m _BLOCK_TAGS:\n\u001B[1;32m 132\u001B[0m flush_buffer()\n\u001B[0;32m--> 133\u001B[0m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_handle_block\u001B[49m\u001B[43m(\u001B[49m\u001B[43mnode\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mdoc\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mparent\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 134\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(node, Tag) \u001B[38;5;129;01mand\u001B[39;00m node\u001B[38;5;241m.\u001B[39mfind(\u001B[38;5;28mlist\u001B[39m(_BLOCK_TAGS)):\n\u001B[1;32m 135\u001B[0m flush_buffer()\n",
+ "File \u001B[0;32m~/PycharmProjects/docling/docling/backend/html_backend.py:201\u001B[0m, in \u001B[0;36mBaseHTMLDocumentBackend._handle_block\u001B[0;34m(self, tag, doc, parent)\u001B[0m\n\u001B[1;32m 198\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_emit_image(img_tag, doc, li_item)\n\u001B[1;32m 200\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m tag_name \u001B[38;5;241m==\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mtable\u001B[39m\u001B[38;5;124m\"\u001B[39m:\n\u001B[0;32m--> 201\u001B[0m data \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_parse_table\u001B[49m\u001B[43m(\u001B[49m\u001B[43mtag\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mdoc\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mparent\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 202\u001B[0m doc\u001B[38;5;241m.\u001B[39madd_table(data\u001B[38;5;241m=\u001B[39mdata, parent\u001B[38;5;241m=\u001B[39mparent)\n",
+ "File \u001B[0;32m~/PycharmProjects/docling/docling/backend/html_backend.py:282\u001B[0m, in \u001B[0;36mBaseHTMLDocumentBackend._parse_table\u001B[0;34m(self, table_tag, doc, parent)\u001B[0m\n\u001B[1;32m 280\u001B[0m cells\u001B[38;5;241m.\u001B[39mappend(cell)\n\u001B[1;32m 281\u001B[0m \u001B[38;5;28;01mfor\u001B[39;00m img_tag \u001B[38;5;129;01min\u001B[39;00m cell_tag\u001B[38;5;241m.\u001B[39mfind_all(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mimg\u001B[39m\u001B[38;5;124m\"\u001B[39m, recursive\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m):\n\u001B[0;32m--> 282\u001B[0m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_emit_image\u001B[49m\u001B[43m(\u001B[49m\u001B[43mimg_tag\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mdoc\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mcell\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 284\u001B[0m \u001B[38;5;28;01mfor\u001B[39;00m dr \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28mrange\u001B[39m(rs):\n\u001B[1;32m 285\u001B[0m \u001B[38;5;28;01mfor\u001B[39;00m dc \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28mrange\u001B[39m(cs):\n",
+ "File \u001B[0;32m~/PycharmProjects/docling/docling/backend/html_backend.py:211\u001B[0m, in \u001B[0;36mBaseHTMLDocumentBackend._emit_image\u001B[0;34m(self, img_tag, doc, parent)\u001B[0m\n\u001B[1;32m 209\u001B[0m caption_item \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m\n\u001B[1;32m 210\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m alt:\n\u001B[0;32m--> 211\u001B[0m caption_item \u001B[38;5;241m=\u001B[39m \u001B[43mdoc\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43madd_text\u001B[49m\u001B[43m(\u001B[49m\u001B[43mDocItemLabel\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mCAPTION\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43malt\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mparent\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mparent\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 213\u001B[0m src_url \u001B[38;5;241m=\u001B[39m img_tag\u001B[38;5;241m.\u001B[39mget(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124msrc\u001B[39m\u001B[38;5;124m\"\u001B[39m, \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m 214\u001B[0m width \u001B[38;5;241m=\u001B[39m img_tag\u001B[38;5;241m.\u001B[39mget(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mwidth\u001B[39m\u001B[38;5;124m\"\u001B[39m, \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m128\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n",
+ "File \u001B[0;32m~/.cache/pypoetry/virtualenvs/docling-6_27CWgt-py3.10/lib/python3.10/site-packages/docling_core/types/doc/document.py:2246\u001B[0m, in \u001B[0;36mDoclingDocument.add_text\u001B[0;34m(self, label, text, orig, prov, parent, content_layer, formatting, hyperlink)\u001B[0m\n\u001B[1;32m 2239\u001B[0m text_index \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mlen\u001B[39m(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mtexts)\n\u001B[1;32m 2240\u001B[0m cref \u001B[38;5;241m=\u001B[39m \u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m#/texts/\u001B[39m\u001B[38;5;132;01m{\u001B[39;00mtext_index\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m\"\u001B[39m\n\u001B[1;32m 2241\u001B[0m text_item \u001B[38;5;241m=\u001B[39m TextItem(\n\u001B[1;32m 2242\u001B[0m label\u001B[38;5;241m=\u001B[39mlabel,\n\u001B[1;32m 2243\u001B[0m text\u001B[38;5;241m=\u001B[39mtext,\n\u001B[1;32m 2244\u001B[0m orig\u001B[38;5;241m=\u001B[39morig,\n\u001B[1;32m 2245\u001B[0m self_ref\u001B[38;5;241m=\u001B[39mcref,\n\u001B[0;32m-> 2246\u001B[0m parent\u001B[38;5;241m=\u001B[39m\u001B[43mparent\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_ref\u001B[49m(),\n\u001B[1;32m 2247\u001B[0m formatting\u001B[38;5;241m=\u001B[39mformatting,\n\u001B[1;32m 2248\u001B[0m hyperlink\u001B[38;5;241m=\u001B[39mhyperlink,\n\u001B[1;32m 2249\u001B[0m )\n\u001B[1;32m 2250\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m prov:\n\u001B[1;32m 2251\u001B[0m text_item\u001B[38;5;241m.\u001B[39mprov\u001B[38;5;241m.\u001B[39mappend(prov)\n",
+ "File \u001B[0;32m~/.cache/pypoetry/virtualenvs/docling-6_27CWgt-py3.10/lib/python3.10/site-packages/pydantic/main.py:989\u001B[0m, in \u001B[0;36mBaseModel.__getattr__\u001B[0;34m(self, item)\u001B[0m\n\u001B[1;32m 986\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28msuper\u001B[39m()\u001B[38;5;241m.\u001B[39m\u001B[38;5;21m__getattribute__\u001B[39m(item) \u001B[38;5;66;03m# Raises AttributeError if appropriate\u001B[39;00m\n\u001B[1;32m 987\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m 988\u001B[0m \u001B[38;5;66;03m# this is the current error\u001B[39;00m\n\u001B[0;32m--> 989\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mAttributeError\u001B[39;00m(\u001B[38;5;124mf\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;132;01m{\u001B[39;00m\u001B[38;5;28mtype\u001B[39m(\u001B[38;5;28mself\u001B[39m)\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m\u001B[38;5;132;01m!r}\u001B[39;00m\u001B[38;5;124m object has no attribute \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mitem\u001B[38;5;132;01m!r}\u001B[39;00m\u001B[38;5;124m'\u001B[39m)\n",
+ "\u001B[0;31mAttributeError\u001B[0m: 'TableCell' object has no attribute 'get_ref'"
+ ]
+ }
+ ],
+ "execution_count": 9
+ },
+ {
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2025-05-24T19:53:14.919066675Z",
+ "start_time": "2025-05-24T19:49:22.082841Z"
+ }
+ },
+ "cell_type": "code",
+ "source": "",
+ "id": "5d325df8ba7fe9d2",
"outputs": [],
- "source": []
+ "execution_count": null
+ },
+ {
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2025-05-24T19:53:14.919492904Z",
+ "start_time": "2025-05-24T19:49:22.138930Z"
+ }
+ },
+ "cell_type": "code",
+ "source": "document.export_to_dict()",
+ "id": "32e863ce5e23040b",
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'schema_name': 'DoclingDocument',\n",
+ " 'version': '1.3.0',\n",
+ " 'name': 'wiki_duck',\n",
+ " 'origin': {'mimetype': 'text/html',\n",
+ " 'binary_hash': 8376021084832888905,\n",
+ " 'filename': 'wiki_duck.html'},\n",
+ " 'furniture': {'self_ref': '#/furniture',\n",
+ " 'children': [],\n",
+ " 'content_layer': 'furniture',\n",
+ " 'name': '_root_',\n",
+ " 'label': 'unspecified'},\n",
+ " 'body': {'self_ref': '#/body',\n",
+ " 'children': [{'$ref': '#/texts/0'},\n",
+ " {'$ref': '#/texts/1'},\n",
+ " {'$ref': '#/texts/2'},\n",
+ " {'$ref': '#/texts/3'},\n",
+ " {'$ref': '#/texts/4'},\n",
+ " {'$ref': '#/texts/5'},\n",
+ " {'$ref': '#/texts/6'},\n",
+ " {'$ref': '#/groups/0'},\n",
+ " {'$ref': '#/texts/13'},\n",
+ " {'$ref': '#/groups/1'},\n",
+ " {'$ref': '#/texts/19'},\n",
+ " {'$ref': '#/texts/20'},\n",
+ " {'$ref': '#/groups/2'},\n",
+ " {'$ref': '#/groups/3'},\n",
+ " {'$ref': '#/texts/22'},\n",
+ " {'$ref': '#/groups/4'},\n",
+ " {'$ref': '#/groups/5'},\n",
+ " {'$ref': '#/texts/25'},\n",
+ " {'$ref': '#/groups/6'},\n",
+ " {'$ref': '#/texts/28'},\n",
+ " {'$ref': '#/groups/7'},\n",
+ " {'$ref': '#/texts/31'},\n",
+ " {'$ref': '#/texts/32'},\n",
+ " {'$ref': '#/texts/33'},\n",
+ " {'$ref': '#/groups/8'},\n",
+ " {'$ref': '#/texts/54'},\n",
+ " {'$ref': '#/texts/55'},\n",
+ " {'$ref': '#/texts/56'},\n",
+ " {'$ref': '#/groups/27'},\n",
+ " {'$ref': '#/texts/193'},\n",
+ " {'$ref': '#/groups/28'},\n",
+ " {'$ref': '#/texts/196'},\n",
+ " {'$ref': '#/groups/29'},\n",
+ " {'$ref': '#/groups/30'},\n",
+ " {'$ref': '#/texts/200'},\n",
+ " {'$ref': '#/texts/201'},\n",
+ " {'$ref': '#/texts/202'},\n",
+ " {'$ref': '#/texts/203'},\n",
+ " {'$ref': '#/texts/204'},\n",
+ " {'$ref': '#/groups/31'},\n",
+ " {'$ref': '#/texts/208'},\n",
+ " {'$ref': '#/groups/32'},\n",
+ " {'$ref': '#/texts/219'},\n",
+ " {'$ref': '#/groups/33'},\n",
+ " {'$ref': '#/texts/222'},\n",
+ " {'$ref': '#/groups/34'},\n",
+ " {'$ref': '#/texts/225'},\n",
+ " {'$ref': '#/texts/226'},\n",
+ " {'$ref': '#/texts/227'},\n",
+ " {'$ref': '#/texts/228'},\n",
+ " {'$ref': '#/texts/229'},\n",
+ " {'$ref': '#/texts/230'},\n",
+ " {'$ref': '#/texts/231'},\n",
+ " {'$ref': '#/texts/232'},\n",
+ " {'$ref': '#/tables/0'},\n",
+ " {'$ref': '#/texts/233'},\n",
+ " {'$ref': '#/texts/234'},\n",
+ " {'$ref': '#/texts/235'},\n",
+ " {'$ref': '#/texts/236'},\n",
+ " {'$ref': '#/texts/237'},\n",
+ " {'$ref': '#/texts/238'},\n",
+ " {'$ref': '#/texts/239'},\n",
+ " {'$ref': '#/texts/240'},\n",
+ " {'$ref': '#/texts/241'},\n",
+ " {'$ref': '#/texts/242'},\n",
+ " {'$ref': '#/texts/243'},\n",
+ " {'$ref': '#/texts/244'},\n",
+ " {'$ref': '#/texts/245'},\n",
+ " {'$ref': '#/texts/246'},\n",
+ " {'$ref': '#/texts/247'},\n",
+ " {'$ref': '#/texts/248'},\n",
+ " {'$ref': '#/texts/249'},\n",
+ " {'$ref': '#/texts/250'},\n",
+ " {'$ref': '#/texts/251'},\n",
+ " {'$ref': '#/texts/252'},\n",
+ " {'$ref': '#/texts/253'},\n",
+ " {'$ref': '#/texts/254'},\n",
+ " {'$ref': '#/texts/255'},\n",
+ " {'$ref': '#/texts/256'},\n",
+ " {'$ref': '#/texts/257'},\n",
+ " {'$ref': '#/texts/258'},\n",
+ " {'$ref': '#/texts/259'},\n",
+ " {'$ref': '#/texts/260'},\n",
+ " {'$ref': '#/texts/261'},\n",
+ " {'$ref': '#/texts/262'},\n",
+ " {'$ref': '#/texts/263'},\n",
+ " {'$ref': '#/texts/264'},\n",
+ " {'$ref': '#/texts/265'},\n",
+ " {'$ref': '#/texts/266'},\n",
+ " {'$ref': '#/texts/267'},\n",
+ " {'$ref': '#/texts/268'},\n",
+ " {'$ref': '#/texts/269'},\n",
+ " {'$ref': '#/texts/270'},\n",
+ " {'$ref': '#/texts/271'},\n",
+ " {'$ref': '#/texts/272'},\n",
+ " {'$ref': '#/texts/273'},\n",
+ " {'$ref': '#/texts/274'},\n",
+ " {'$ref': '#/texts/275'},\n",
+ " {'$ref': '#/texts/276'},\n",
+ " {'$ref': '#/texts/277'},\n",
+ " {'$ref': '#/texts/278'},\n",
+ " {'$ref': '#/texts/279'},\n",
+ " {'$ref': '#/texts/280'},\n",
+ " {'$ref': '#/texts/281'},\n",
+ " {'$ref': '#/texts/282'},\n",
+ " {'$ref': '#/texts/283'},\n",
+ " {'$ref': '#/texts/284'},\n",
+ " {'$ref': '#/texts/285'},\n",
+ " {'$ref': '#/texts/286'},\n",
+ " {'$ref': '#/texts/287'},\n",
+ " {'$ref': '#/texts/288'},\n",
+ " {'$ref': '#/texts/289'},\n",
+ " {'$ref': '#/texts/290'},\n",
+ " {'$ref': '#/texts/291'},\n",
+ " {'$ref': '#/texts/292'},\n",
+ " {'$ref': '#/texts/293'},\n",
+ " {'$ref': '#/groups/35'},\n",
+ " {'$ref': '#/groups/36'},\n",
+ " {'$ref': '#/texts/301'},\n",
+ " {'$ref': '#/texts/302'},\n",
+ " {'$ref': '#/groups/37'},\n",
+ " {'$ref': '#/texts/358'},\n",
+ " {'$ref': '#/groups/38'},\n",
+ " {'$ref': '#/texts/379'},\n",
+ " {'$ref': '#/texts/380'},\n",
+ " {'$ref': '#/groups/39'},\n",
+ " {'$ref': '#/groups/40'},\n",
+ " {'$ref': '#/tables/1'},\n",
+ " {'$ref': '#/texts/391'},\n",
+ " {'$ref': '#/texts/392'},\n",
+ " {'$ref': '#/texts/393'},\n",
+ " {'$ref': '#/texts/394'},\n",
+ " {'$ref': '#/texts/395'},\n",
+ " {'$ref': '#/texts/396'},\n",
+ " {'$ref': '#/texts/397'},\n",
+ " {'$ref': '#/texts/398'},\n",
+ " {'$ref': '#/texts/399'},\n",
+ " {'$ref': '#/texts/400'},\n",
+ " {'$ref': '#/texts/401'},\n",
+ " {'$ref': '#/texts/402'},\n",
+ " {'$ref': '#/texts/403'},\n",
+ " {'$ref': '#/texts/404'},\n",
+ " {'$ref': '#/texts/405'},\n",
+ " {'$ref': '#/texts/406'},\n",
+ " {'$ref': '#/texts/407'},\n",
+ " {'$ref': '#/texts/408'},\n",
+ " {'$ref': '#/texts/409'},\n",
+ " {'$ref': '#/texts/410'},\n",
+ " {'$ref': '#/texts/411'},\n",
+ " {'$ref': '#/texts/412'},\n",
+ " {'$ref': '#/texts/413'},\n",
+ " {'$ref': '#/texts/414'},\n",
+ " {'$ref': '#/texts/415'},\n",
+ " {'$ref': '#/texts/416'},\n",
+ " {'$ref': '#/texts/417'},\n",
+ " {'$ref': '#/texts/418'},\n",
+ " {'$ref': '#/texts/419'},\n",
+ " {'$ref': '#/texts/420'},\n",
+ " {'$ref': '#/texts/421'},\n",
+ " {'$ref': '#/texts/422'},\n",
+ " {'$ref': '#/texts/423'},\n",
+ " {'$ref': '#/groups/41'},\n",
+ " {'$ref': '#/texts/427'},\n",
+ " {'$ref': '#/groups/42'},\n",
+ " {'$ref': '#/groups/43'},\n",
+ " {'$ref': '#/groups/44'},\n",
+ " {'$ref': '#/groups/45'},\n",
+ " {'$ref': '#/groups/46'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': '_root_',\n",
+ " 'label': 'unspecified'},\n",
+ " 'groups': [{'self_ref': '#/groups/0',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [{'$ref': '#/texts/7'},\n",
+ " {'$ref': '#/texts/8'},\n",
+ " {'$ref': '#/texts/9'},\n",
+ " {'$ref': '#/texts/10'},\n",
+ " {'$ref': '#/texts/11'},\n",
+ " {'$ref': '#/texts/12'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/1',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [{'$ref': '#/texts/14'},\n",
+ " {'$ref': '#/texts/15'},\n",
+ " {'$ref': '#/texts/16'},\n",
+ " {'$ref': '#/texts/17'},\n",
+ " {'$ref': '#/texts/18'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/2',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/3',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [{'$ref': '#/texts/21'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/4',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/5',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [{'$ref': '#/texts/23'}, {'$ref': '#/texts/24'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/6',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [{'$ref': '#/texts/26'}, {'$ref': '#/texts/27'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/7',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [{'$ref': '#/texts/29'}, {'$ref': '#/texts/30'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/8',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [{'$ref': '#/texts/34'},\n",
+ " {'$ref': '#/texts/35'},\n",
+ " {'$ref': '#/texts/36'},\n",
+ " {'$ref': '#/texts/37'},\n",
+ " {'$ref': '#/texts/38'},\n",
+ " {'$ref': '#/texts/39'},\n",
+ " {'$ref': '#/texts/44'},\n",
+ " {'$ref': '#/texts/49'},\n",
+ " {'$ref': '#/texts/50'},\n",
+ " {'$ref': '#/texts/53'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/9',\n",
+ " 'parent': {'$ref': '#/texts/35'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/10',\n",
+ " 'parent': {'$ref': '#/texts/36'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/11',\n",
+ " 'parent': {'$ref': '#/texts/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/12',\n",
+ " 'parent': {'$ref': '#/texts/38'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/13',\n",
+ " 'parent': {'$ref': '#/texts/39'},\n",
+ " 'children': [{'$ref': '#/texts/40'},\n",
+ " {'$ref': '#/texts/41'},\n",
+ " {'$ref': '#/texts/42'},\n",
+ " {'$ref': '#/texts/43'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/14',\n",
+ " 'parent': {'$ref': '#/texts/41'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/15',\n",
+ " 'parent': {'$ref': '#/texts/42'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/16',\n",
+ " 'parent': {'$ref': '#/texts/43'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/17',\n",
+ " 'parent': {'$ref': '#/texts/44'},\n",
+ " 'children': [{'$ref': '#/texts/45'},\n",
+ " {'$ref': '#/texts/46'},\n",
+ " {'$ref': '#/texts/47'},\n",
+ " {'$ref': '#/texts/48'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/18',\n",
+ " 'parent': {'$ref': '#/texts/45'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/19',\n",
+ " 'parent': {'$ref': '#/texts/46'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/20',\n",
+ " 'parent': {'$ref': '#/texts/47'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/21',\n",
+ " 'parent': {'$ref': '#/texts/48'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/22',\n",
+ " 'parent': {'$ref': '#/texts/49'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/23',\n",
+ " 'parent': {'$ref': '#/texts/50'},\n",
+ " 'children': [{'$ref': '#/texts/51'}, {'$ref': '#/texts/52'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/24',\n",
+ " 'parent': {'$ref': '#/texts/51'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/25',\n",
+ " 'parent': {'$ref': '#/texts/52'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/26',\n",
+ " 'parent': {'$ref': '#/texts/53'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/27',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [{'$ref': '#/texts/57'},\n",
+ " {'$ref': '#/texts/58'},\n",
+ " {'$ref': '#/texts/59'},\n",
+ " {'$ref': '#/texts/60'},\n",
+ " {'$ref': '#/texts/61'},\n",
+ " {'$ref': '#/texts/62'},\n",
+ " {'$ref': '#/texts/63'},\n",
+ " {'$ref': '#/texts/64'},\n",
+ " {'$ref': '#/texts/65'},\n",
+ " {'$ref': '#/texts/66'},\n",
+ " {'$ref': '#/texts/67'},\n",
+ " {'$ref': '#/texts/68'},\n",
+ " {'$ref': '#/texts/69'},\n",
+ " {'$ref': '#/texts/70'},\n",
+ " {'$ref': '#/texts/71'},\n",
+ " {'$ref': '#/texts/72'},\n",
+ " {'$ref': '#/texts/73'},\n",
+ " {'$ref': '#/texts/74'},\n",
+ " {'$ref': '#/texts/75'},\n",
+ " {'$ref': '#/texts/76'},\n",
+ " {'$ref': '#/texts/77'},\n",
+ " {'$ref': '#/texts/78'},\n",
+ " {'$ref': '#/texts/79'},\n",
+ " {'$ref': '#/texts/80'},\n",
+ " {'$ref': '#/texts/81'},\n",
+ " {'$ref': '#/texts/82'},\n",
+ " {'$ref': '#/texts/83'},\n",
+ " {'$ref': '#/texts/84'},\n",
+ " {'$ref': '#/texts/85'},\n",
+ " {'$ref': '#/texts/86'},\n",
+ " {'$ref': '#/texts/87'},\n",
+ " {'$ref': '#/texts/88'},\n",
+ " {'$ref': '#/texts/89'},\n",
+ " {'$ref': '#/texts/90'},\n",
+ " {'$ref': '#/texts/91'},\n",
+ " {'$ref': '#/texts/92'},\n",
+ " {'$ref': '#/texts/93'},\n",
+ " {'$ref': '#/texts/94'},\n",
+ " {'$ref': '#/texts/95'},\n",
+ " {'$ref': '#/texts/96'},\n",
+ " {'$ref': '#/texts/97'},\n",
+ " {'$ref': '#/texts/98'},\n",
+ " {'$ref': '#/texts/99'},\n",
+ " {'$ref': '#/texts/100'},\n",
+ " {'$ref': '#/texts/101'},\n",
+ " {'$ref': '#/texts/102'},\n",
+ " {'$ref': '#/texts/103'},\n",
+ " {'$ref': '#/texts/104'},\n",
+ " {'$ref': '#/texts/105'},\n",
+ " {'$ref': '#/texts/106'},\n",
+ " {'$ref': '#/texts/107'},\n",
+ " {'$ref': '#/texts/108'},\n",
+ " {'$ref': '#/texts/109'},\n",
+ " {'$ref': '#/texts/110'},\n",
+ " {'$ref': '#/texts/111'},\n",
+ " {'$ref': '#/texts/112'},\n",
+ " {'$ref': '#/texts/113'},\n",
+ " {'$ref': '#/texts/114'},\n",
+ " {'$ref': '#/texts/115'},\n",
+ " {'$ref': '#/texts/116'},\n",
+ " {'$ref': '#/texts/117'},\n",
+ " {'$ref': '#/texts/118'},\n",
+ " {'$ref': '#/texts/119'},\n",
+ " {'$ref': '#/texts/120'},\n",
+ " {'$ref': '#/texts/121'},\n",
+ " {'$ref': '#/texts/122'},\n",
+ " {'$ref': '#/texts/123'},\n",
+ " {'$ref': '#/texts/124'},\n",
+ " {'$ref': '#/texts/125'},\n",
+ " {'$ref': '#/texts/126'},\n",
+ " {'$ref': '#/texts/127'},\n",
+ " {'$ref': '#/texts/128'},\n",
+ " {'$ref': '#/texts/129'},\n",
+ " {'$ref': '#/texts/130'},\n",
+ " {'$ref': '#/texts/131'},\n",
+ " {'$ref': '#/texts/132'},\n",
+ " {'$ref': '#/texts/133'},\n",
+ " {'$ref': '#/texts/134'},\n",
+ " {'$ref': '#/texts/135'},\n",
+ " {'$ref': '#/texts/136'},\n",
+ " {'$ref': '#/texts/137'},\n",
+ " {'$ref': '#/texts/138'},\n",
+ " {'$ref': '#/texts/139'},\n",
+ " {'$ref': '#/texts/140'},\n",
+ " {'$ref': '#/texts/141'},\n",
+ " {'$ref': '#/texts/142'},\n",
+ " {'$ref': '#/texts/143'},\n",
+ " {'$ref': '#/texts/144'},\n",
+ " {'$ref': '#/texts/145'},\n",
+ " {'$ref': '#/texts/146'},\n",
+ " {'$ref': '#/texts/147'},\n",
+ " {'$ref': '#/texts/148'},\n",
+ " {'$ref': '#/texts/149'},\n",
+ " {'$ref': '#/texts/150'},\n",
+ " {'$ref': '#/texts/151'},\n",
+ " {'$ref': '#/texts/152'},\n",
+ " {'$ref': '#/texts/153'},\n",
+ " {'$ref': '#/texts/154'},\n",
+ " {'$ref': '#/texts/155'},\n",
+ " {'$ref': '#/texts/156'},\n",
+ " {'$ref': '#/texts/157'},\n",
+ " {'$ref': '#/texts/158'},\n",
+ " {'$ref': '#/texts/159'},\n",
+ " {'$ref': '#/texts/160'},\n",
+ " {'$ref': '#/texts/161'},\n",
+ " {'$ref': '#/texts/162'},\n",
+ " {'$ref': '#/texts/163'},\n",
+ " {'$ref': '#/texts/164'},\n",
+ " {'$ref': '#/texts/165'},\n",
+ " {'$ref': '#/texts/166'},\n",
+ " {'$ref': '#/texts/167'},\n",
+ " {'$ref': '#/texts/168'},\n",
+ " {'$ref': '#/texts/169'},\n",
+ " {'$ref': '#/texts/170'},\n",
+ " {'$ref': '#/texts/171'},\n",
+ " {'$ref': '#/texts/172'},\n",
+ " {'$ref': '#/texts/173'},\n",
+ " {'$ref': '#/texts/174'},\n",
+ " {'$ref': '#/texts/175'},\n",
+ " {'$ref': '#/texts/176'},\n",
+ " {'$ref': '#/texts/177'},\n",
+ " {'$ref': '#/texts/178'},\n",
+ " {'$ref': '#/texts/179'},\n",
+ " {'$ref': '#/texts/180'},\n",
+ " {'$ref': '#/texts/181'},\n",
+ " {'$ref': '#/texts/182'},\n",
+ " {'$ref': '#/texts/183'},\n",
+ " {'$ref': '#/texts/184'},\n",
+ " {'$ref': '#/texts/185'},\n",
+ " {'$ref': '#/texts/186'},\n",
+ " {'$ref': '#/texts/187'},\n",
+ " {'$ref': '#/texts/188'},\n",
+ " {'$ref': '#/texts/189'},\n",
+ " {'$ref': '#/texts/190'},\n",
+ " {'$ref': '#/texts/191'},\n",
+ " {'$ref': '#/texts/192'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/28',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [{'$ref': '#/texts/194'}, {'$ref': '#/texts/195'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/29',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/30',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [{'$ref': '#/texts/197'},\n",
+ " {'$ref': '#/texts/198'},\n",
+ " {'$ref': '#/texts/199'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/31',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [{'$ref': '#/texts/205'},\n",
+ " {'$ref': '#/texts/206'},\n",
+ " {'$ref': '#/texts/207'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/32',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [{'$ref': '#/texts/209'},\n",
+ " {'$ref': '#/texts/210'},\n",
+ " {'$ref': '#/texts/211'},\n",
+ " {'$ref': '#/texts/212'},\n",
+ " {'$ref': '#/texts/213'},\n",
+ " {'$ref': '#/texts/214'},\n",
+ " {'$ref': '#/texts/215'},\n",
+ " {'$ref': '#/texts/216'},\n",
+ " {'$ref': '#/texts/217'},\n",
+ " {'$ref': '#/texts/218'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/33',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [{'$ref': '#/texts/220'}, {'$ref': '#/texts/221'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/34',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [{'$ref': '#/texts/223'}, {'$ref': '#/texts/224'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/35',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [{'$ref': '#/texts/294'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/36',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [{'$ref': '#/texts/295'},\n",
+ " {'$ref': '#/texts/296'},\n",
+ " {'$ref': '#/texts/297'},\n",
+ " {'$ref': '#/texts/298'},\n",
+ " {'$ref': '#/texts/299'},\n",
+ " {'$ref': '#/texts/300'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/37',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [{'$ref': '#/texts/303'},\n",
+ " {'$ref': '#/texts/304'},\n",
+ " {'$ref': '#/texts/305'},\n",
+ " {'$ref': '#/texts/306'},\n",
+ " {'$ref': '#/texts/307'},\n",
+ " {'$ref': '#/texts/308'},\n",
+ " {'$ref': '#/texts/309'},\n",
+ " {'$ref': '#/texts/310'},\n",
+ " {'$ref': '#/texts/311'},\n",
+ " {'$ref': '#/texts/312'},\n",
+ " {'$ref': '#/texts/313'},\n",
+ " {'$ref': '#/texts/314'},\n",
+ " {'$ref': '#/texts/315'},\n",
+ " {'$ref': '#/texts/316'},\n",
+ " {'$ref': '#/texts/317'},\n",
+ " {'$ref': '#/texts/318'},\n",
+ " {'$ref': '#/texts/319'},\n",
+ " {'$ref': '#/texts/320'},\n",
+ " {'$ref': '#/texts/321'},\n",
+ " {'$ref': '#/texts/322'},\n",
+ " {'$ref': '#/texts/323'},\n",
+ " {'$ref': '#/texts/324'},\n",
+ " {'$ref': '#/texts/325'},\n",
+ " {'$ref': '#/texts/326'},\n",
+ " {'$ref': '#/texts/327'},\n",
+ " {'$ref': '#/texts/328'},\n",
+ " {'$ref': '#/texts/329'},\n",
+ " {'$ref': '#/texts/330'},\n",
+ " {'$ref': '#/texts/331'},\n",
+ " {'$ref': '#/texts/332'},\n",
+ " {'$ref': '#/texts/333'},\n",
+ " {'$ref': '#/texts/334'},\n",
+ " {'$ref': '#/texts/335'},\n",
+ " {'$ref': '#/texts/336'},\n",
+ " {'$ref': '#/texts/337'},\n",
+ " {'$ref': '#/texts/338'},\n",
+ " {'$ref': '#/texts/339'},\n",
+ " {'$ref': '#/texts/340'},\n",
+ " {'$ref': '#/texts/341'},\n",
+ " {'$ref': '#/texts/342'},\n",
+ " {'$ref': '#/texts/343'},\n",
+ " {'$ref': '#/texts/344'},\n",
+ " {'$ref': '#/texts/345'},\n",
+ " {'$ref': '#/texts/346'},\n",
+ " {'$ref': '#/texts/347'},\n",
+ " {'$ref': '#/texts/348'},\n",
+ " {'$ref': '#/texts/349'},\n",
+ " {'$ref': '#/texts/350'},\n",
+ " {'$ref': '#/texts/351'},\n",
+ " {'$ref': '#/texts/352'},\n",
+ " {'$ref': '#/texts/353'},\n",
+ " {'$ref': '#/texts/354'},\n",
+ " {'$ref': '#/texts/355'},\n",
+ " {'$ref': '#/texts/356'},\n",
+ " {'$ref': '#/texts/357'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'ordered_list'},\n",
+ " {'self_ref': '#/groups/38',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [{'$ref': '#/texts/359'},\n",
+ " {'$ref': '#/texts/360'},\n",
+ " {'$ref': '#/texts/361'},\n",
+ " {'$ref': '#/texts/362'},\n",
+ " {'$ref': '#/texts/363'},\n",
+ " {'$ref': '#/texts/364'},\n",
+ " {'$ref': '#/texts/365'},\n",
+ " {'$ref': '#/texts/366'},\n",
+ " {'$ref': '#/texts/367'},\n",
+ " {'$ref': '#/texts/368'},\n",
+ " {'$ref': '#/texts/369'},\n",
+ " {'$ref': '#/texts/370'},\n",
+ " {'$ref': '#/texts/371'},\n",
+ " {'$ref': '#/texts/372'},\n",
+ " {'$ref': '#/texts/373'},\n",
+ " {'$ref': '#/texts/374'},\n",
+ " {'$ref': '#/texts/375'},\n",
+ " {'$ref': '#/texts/376'},\n",
+ " {'$ref': '#/texts/377'},\n",
+ " {'$ref': '#/texts/378'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/39',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [{'$ref': '#/texts/381'},\n",
+ " {'$ref': '#/texts/382'},\n",
+ " {'$ref': '#/texts/383'},\n",
+ " {'$ref': '#/texts/384'},\n",
+ " {'$ref': '#/texts/385'},\n",
+ " {'$ref': '#/texts/386'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/40',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [{'$ref': '#/texts/387'},\n",
+ " {'$ref': '#/texts/388'},\n",
+ " {'$ref': '#/texts/389'},\n",
+ " {'$ref': '#/texts/390'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/41',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [{'$ref': '#/texts/424'},\n",
+ " {'$ref': '#/texts/425'},\n",
+ " {'$ref': '#/texts/426'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/42',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [{'$ref': '#/texts/428'},\n",
+ " {'$ref': '#/texts/429'},\n",
+ " {'$ref': '#/texts/430'},\n",
+ " {'$ref': '#/texts/431'},\n",
+ " {'$ref': '#/texts/432'},\n",
+ " {'$ref': '#/texts/433'},\n",
+ " {'$ref': '#/texts/434'},\n",
+ " {'$ref': '#/texts/435'},\n",
+ " {'$ref': '#/texts/436'},\n",
+ " {'$ref': '#/texts/437'},\n",
+ " {'$ref': '#/texts/438'},\n",
+ " {'$ref': '#/texts/439'},\n",
+ " {'$ref': '#/texts/440'},\n",
+ " {'$ref': '#/texts/441'},\n",
+ " {'$ref': '#/texts/442'},\n",
+ " {'$ref': '#/texts/443'},\n",
+ " {'$ref': '#/texts/444'},\n",
+ " {'$ref': '#/texts/445'},\n",
+ " {'$ref': '#/texts/446'},\n",
+ " {'$ref': '#/texts/447'},\n",
+ " {'$ref': '#/texts/448'},\n",
+ " {'$ref': '#/texts/449'},\n",
+ " {'$ref': '#/texts/450'},\n",
+ " {'$ref': '#/texts/451'},\n",
+ " {'$ref': '#/texts/452'},\n",
+ " {'$ref': '#/texts/453'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/43',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [{'$ref': '#/texts/454'}, {'$ref': '#/texts/455'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/44',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [{'$ref': '#/texts/456'},\n",
+ " {'$ref': '#/texts/457'},\n",
+ " {'$ref': '#/texts/458'},\n",
+ " {'$ref': '#/texts/459'},\n",
+ " {'$ref': '#/texts/460'},\n",
+ " {'$ref': '#/texts/461'},\n",
+ " {'$ref': '#/texts/462'},\n",
+ " {'$ref': '#/texts/463'},\n",
+ " {'$ref': '#/texts/464'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/45',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [{'$ref': '#/texts/465'}, {'$ref': '#/texts/467'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'},\n",
+ " {'self_ref': '#/groups/46',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'name': 'group',\n",
+ " 'label': 'list'}],\n",
+ " 'texts': [{'self_ref': '#/texts/0',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'title',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Duck - Wikipedia',\n",
+ " 'text': 'Duck - Wikipedia'},\n",
+ " {'self_ref': '#/texts/1',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Jump to content',\n",
+ " 'text': 'Jump to content'},\n",
+ " {'self_ref': '#/texts/2',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Main menu',\n",
+ " 'text': 'Main menu'},\n",
+ " {'self_ref': '#/texts/3',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Main menu',\n",
+ " 'text': 'Main menu'},\n",
+ " {'self_ref': '#/texts/4',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'move to sidebar',\n",
+ " 'text': 'move to sidebar'},\n",
+ " {'self_ref': '#/texts/5',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'hide',\n",
+ " 'text': 'hide'},\n",
+ " {'self_ref': '#/texts/6',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Navigation',\n",
+ " 'text': 'Navigation'},\n",
+ " {'self_ref': '#/texts/7',\n",
+ " 'parent': {'$ref': '#/groups/0'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Main page',\n",
+ " 'text': 'Main page',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/8',\n",
+ " 'parent': {'$ref': '#/groups/0'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Contents',\n",
+ " 'text': 'Contents',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/9',\n",
+ " 'parent': {'$ref': '#/groups/0'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Current events',\n",
+ " 'text': 'Current events',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/10',\n",
+ " 'parent': {'$ref': '#/groups/0'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Random article',\n",
+ " 'text': 'Random article',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/11',\n",
+ " 'parent': {'$ref': '#/groups/0'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'About Wikipedia',\n",
+ " 'text': 'About Wikipedia',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/12',\n",
+ " 'parent': {'$ref': '#/groups/0'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Contact us',\n",
+ " 'text': 'Contact us',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/13',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Contribute',\n",
+ " 'text': 'Contribute'},\n",
+ " {'self_ref': '#/texts/14',\n",
+ " 'parent': {'$ref': '#/groups/1'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Help',\n",
+ " 'text': 'Help',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/15',\n",
+ " 'parent': {'$ref': '#/groups/1'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Learn to edit',\n",
+ " 'text': 'Learn to edit',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/16',\n",
+ " 'parent': {'$ref': '#/groups/1'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Community portal',\n",
+ " 'text': 'Community portal',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/17',\n",
+ " 'parent': {'$ref': '#/groups/1'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Recent changes',\n",
+ " 'text': 'Recent changes',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/18',\n",
+ " 'parent': {'$ref': '#/groups/1'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Upload file',\n",
+ " 'text': 'Upload file',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/19',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Search',\n",
+ " 'text': 'Search'},\n",
+ " {'self_ref': '#/texts/20',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Search',\n",
+ " 'text': 'Search'},\n",
+ " {'self_ref': '#/texts/21',\n",
+ " 'parent': {'$ref': '#/groups/3'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Donate',\n",
+ " 'text': 'Donate',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/22',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Appearance',\n",
+ " 'text': 'Appearance'},\n",
+ " {'self_ref': '#/texts/23',\n",
+ " 'parent': {'$ref': '#/groups/5'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Create account',\n",
+ " 'text': 'Create account',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/24',\n",
+ " 'parent': {'$ref': '#/groups/5'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Log in',\n",
+ " 'text': 'Log in',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/25',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Personal tools',\n",
+ " 'text': 'Personal tools'},\n",
+ " {'self_ref': '#/texts/26',\n",
+ " 'parent': {'$ref': '#/groups/6'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Create account',\n",
+ " 'text': 'Create account',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/27',\n",
+ " 'parent': {'$ref': '#/groups/6'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Log in',\n",
+ " 'text': 'Log in',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/28',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Pages for logged out editors learn more',\n",
+ " 'text': 'Pages for logged out editors learn more'},\n",
+ " {'self_ref': '#/texts/29',\n",
+ " 'parent': {'$ref': '#/groups/7'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Contributions',\n",
+ " 'text': 'Contributions',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/30',\n",
+ " 'parent': {'$ref': '#/groups/7'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Talk',\n",
+ " 'text': 'Talk',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/31',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'section_header',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Contents',\n",
+ " 'text': 'Contents',\n",
+ " 'level': 2},\n",
+ " {'self_ref': '#/texts/32',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'move to sidebar',\n",
+ " 'text': 'move to sidebar'},\n",
+ " {'self_ref': '#/texts/33',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'hide',\n",
+ " 'text': 'hide'},\n",
+ " {'self_ref': '#/texts/34',\n",
+ " 'parent': {'$ref': '#/groups/8'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '(Top)',\n",
+ " 'text': '(Top)',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/35',\n",
+ " 'parent': {'$ref': '#/groups/8'},\n",
+ " 'children': [{'$ref': '#/groups/9'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '1 Etymology',\n",
+ " 'text': '1 Etymology',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/36',\n",
+ " 'parent': {'$ref': '#/groups/8'},\n",
+ " 'children': [{'$ref': '#/groups/10'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '2 Taxonomy',\n",
+ " 'text': '2 Taxonomy',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/37',\n",
+ " 'parent': {'$ref': '#/groups/8'},\n",
+ " 'children': [{'$ref': '#/groups/11'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '3 Morphology',\n",
+ " 'text': '3 Morphology',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/38',\n",
+ " 'parent': {'$ref': '#/groups/8'},\n",
+ " 'children': [{'$ref': '#/groups/12'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '4 Distribution and habitat',\n",
+ " 'text': '4 Distribution and habitat',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/39',\n",
+ " 'parent': {'$ref': '#/groups/8'},\n",
+ " 'children': [{'$ref': '#/groups/13'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '5 Behaviour Toggle Behaviour subsection',\n",
+ " 'text': '5 Behaviour Toggle Behaviour subsection',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/40',\n",
+ " 'parent': {'$ref': '#/groups/13'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '5.1 Feeding',\n",
+ " 'text': '5.1 Feeding',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/41',\n",
+ " 'parent': {'$ref': '#/groups/13'},\n",
+ " 'children': [{'$ref': '#/groups/14'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '5.2 Breeding',\n",
+ " 'text': '5.2 Breeding',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/42',\n",
+ " 'parent': {'$ref': '#/groups/13'},\n",
+ " 'children': [{'$ref': '#/groups/15'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': \"What if we have some random text here. , or here... 5.3 Communication ....or maybe here?\\n\\t\\t\\t\\tThis is the last garbage I'm adding today.\",\n",
+ " 'text': \"What if we have some random text here. , or here... 5.3 Communication ....or maybe here?\\n\\t\\t\\t\\tThis is the last garbage I'm adding today.\",\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/43',\n",
+ " 'parent': {'$ref': '#/groups/13'},\n",
+ " 'children': [{'$ref': '#/groups/16'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '5.4 Predators',\n",
+ " 'text': '5.4 Predators',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/44',\n",
+ " 'parent': {'$ref': '#/groups/8'},\n",
+ " 'children': [{'$ref': '#/groups/17'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '6 Relationship with humans Toggle Relationship with humans subsection',\n",
+ " 'text': '6 Relationship with humans Toggle Relationship with humans subsection',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/45',\n",
+ " 'parent': {'$ref': '#/groups/17'},\n",
+ " 'children': [{'$ref': '#/groups/18'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '6.1 Hunting',\n",
+ " 'text': '6.1 Hunting',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/46',\n",
+ " 'parent': {'$ref': '#/groups/17'},\n",
+ " 'children': [{'$ref': '#/groups/19'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '6.2 Domestication',\n",
+ " 'text': '6.2 Domestication',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/47',\n",
+ " 'parent': {'$ref': '#/groups/17'},\n",
+ " 'children': [{'$ref': '#/groups/20'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '6.3 Heraldry',\n",
+ " 'text': '6.3 Heraldry',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/48',\n",
+ " 'parent': {'$ref': '#/groups/17'},\n",
+ " 'children': [{'$ref': '#/groups/21'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '6.4 Cultural references',\n",
+ " 'text': '6.4 Cultural references',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/49',\n",
+ " 'parent': {'$ref': '#/groups/8'},\n",
+ " 'children': [{'$ref': '#/groups/22'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '7 See also',\n",
+ " 'text': '7 See also',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/50',\n",
+ " 'parent': {'$ref': '#/groups/8'},\n",
+ " 'children': [{'$ref': '#/groups/23'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '8 Notes Toggle Notes subsection',\n",
+ " 'text': '8 Notes Toggle Notes subsection',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/51',\n",
+ " 'parent': {'$ref': '#/groups/23'},\n",
+ " 'children': [{'$ref': '#/groups/24'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '8.1 Citations',\n",
+ " 'text': '8.1 Citations',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/52',\n",
+ " 'parent': {'$ref': '#/groups/23'},\n",
+ " 'children': [{'$ref': '#/groups/25'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '8.2 Sources',\n",
+ " 'text': '8.2 Sources',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/53',\n",
+ " 'parent': {'$ref': '#/groups/8'},\n",
+ " 'children': [{'$ref': '#/groups/26'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '9 External links',\n",
+ " 'text': '9 External links',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/54',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Toggle the table of contents',\n",
+ " 'text': 'Toggle the table of contents'},\n",
+ " {'self_ref': '#/texts/55',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'section_header',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Duck',\n",
+ " 'text': 'Duck',\n",
+ " 'level': 1},\n",
+ " {'self_ref': '#/texts/56',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': '136 languages',\n",
+ " 'text': '136 languages'},\n",
+ " {'self_ref': '#/texts/57',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Acèh',\n",
+ " 'text': 'Acèh',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/58',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Afrikaans',\n",
+ " 'text': 'Afrikaans',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/59',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Alemannisch',\n",
+ " 'text': 'Alemannisch',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/60',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'አማርኛ',\n",
+ " 'text': 'አማርኛ',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/61',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Ænglisc',\n",
+ " 'text': 'Ænglisc',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/62',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'العربية',\n",
+ " 'text': 'العربية',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/63',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Aragonés',\n",
+ " 'text': 'Aragonés',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/64',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'ܐܪܡܝܐ',\n",
+ " 'text': 'ܐܪܡܝܐ',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/65',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Armãneashti',\n",
+ " 'text': 'Armãneashti',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/66',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Asturianu',\n",
+ " 'text': 'Asturianu',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/67',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Atikamekw',\n",
+ " 'text': 'Atikamekw',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/68',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Авар',\n",
+ " 'text': 'Авар',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/69',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Aymar aru',\n",
+ " 'text': 'Aymar aru',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/70',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'تۆرکجه',\n",
+ " 'text': 'تۆرکجه',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/71',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Basa Bali',\n",
+ " 'text': 'Basa Bali',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/72',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'বাংলা',\n",
+ " 'text': 'বাংলা',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/73',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '閩南語 / Bân-lâm-gú',\n",
+ " 'text': '閩南語 / Bân-lâm-gú',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/74',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Беларуская',\n",
+ " 'text': 'Беларуская',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/75',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Беларуская (тарашкевіца)',\n",
+ " 'text': 'Беларуская (тарашкевіца)',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/76',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Bikol Central',\n",
+ " 'text': 'Bikol Central',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/77',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Български',\n",
+ " 'text': 'Български',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/78',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Brezhoneg',\n",
+ " 'text': 'Brezhoneg',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/79',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Буряад',\n",
+ " 'text': 'Буряад',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/80',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Català',\n",
+ " 'text': 'Català',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/81',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Чӑвашла',\n",
+ " 'text': 'Чӑвашла',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/82',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Čeština',\n",
+ " 'text': 'Čeština',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/83',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'ChiShona',\n",
+ " 'text': 'ChiShona',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/84',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Cymraeg',\n",
+ " 'text': 'Cymraeg',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/85',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Dagbanli',\n",
+ " 'text': 'Dagbanli',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/86',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Dansk',\n",
+ " 'text': 'Dansk',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/87',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Deitsch',\n",
+ " 'text': 'Deitsch',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/88',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Deutsch',\n",
+ " 'text': 'Deutsch',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/89',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'डोटेली',\n",
+ " 'text': 'डोटेली',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/90',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Ελληνικά',\n",
+ " 'text': 'Ελληνικά',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/91',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Emiliàn e rumagnòl',\n",
+ " 'text': 'Emiliàn e rumagnòl',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/92',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Español',\n",
+ " 'text': 'Español',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/93',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Esperanto',\n",
+ " 'text': 'Esperanto',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/94',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Euskara',\n",
+ " 'text': 'Euskara',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/95',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'فارسی',\n",
+ " 'text': 'فارسی',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/96',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Français',\n",
+ " 'text': 'Français',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/97',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Gaeilge',\n",
+ " 'text': 'Gaeilge',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/98',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Galego',\n",
+ " 'text': 'Galego',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/99',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'ГӀалгӀай',\n",
+ " 'text': 'ГӀалгӀай',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/100',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '贛語',\n",
+ " 'text': '贛語',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/101',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'گیلکی',\n",
+ " 'text': 'گیلکی',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/102',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '𐌲𐌿𐍄𐌹𐍃𐌺',\n",
+ " 'text': '𐌲𐌿𐍄𐌹𐍃𐌺',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/103',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'गोंयची कोंकणी / Gõychi Konknni',\n",
+ " 'text': 'गोंयची कोंकणी / Gõychi Konknni',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/104',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '客家語 / Hak-kâ-ngî',\n",
+ " 'text': '客家語 / Hak-kâ-ngî',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/105',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '한국어',\n",
+ " 'text': '한국어',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/106',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Hausa',\n",
+ " 'text': 'Hausa',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/107',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Հայերեն',\n",
+ " 'text': 'Հայերեն',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/108',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'हिन्दी',\n",
+ " 'text': 'हिन्दी',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/109',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Hrvatski',\n",
+ " 'text': 'Hrvatski',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/110',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Ido',\n",
+ " 'text': 'Ido',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/111',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Bahasa Indonesia',\n",
+ " 'text': 'Bahasa Indonesia',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/112',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Iñupiatun',\n",
+ " 'text': 'Iñupiatun',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/113',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Íslenska',\n",
+ " 'text': 'Íslenska',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/114',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Italiano',\n",
+ " 'text': 'Italiano',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/115',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'עברית',\n",
+ " 'text': 'עברית',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/116',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Jawa',\n",
+ " 'text': 'Jawa',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/117',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'ಕನ್ನಡ',\n",
+ " 'text': 'ಕನ್ನಡ',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/118',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Kapampangan',\n",
+ " 'text': 'Kapampangan',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/119',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'ქართული',\n",
+ " 'text': 'ქართული',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/120',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'कॉशुर / کٲشُر',\n",
+ " 'text': 'कॉशुर / کٲشُر',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/121',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Қазақша',\n",
+ " 'text': 'Қазақша',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/122',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Ikirundi',\n",
+ " 'text': 'Ikirundi',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/123',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Kongo',\n",
+ " 'text': 'Kongo',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/124',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Kreyòl ayisyen',\n",
+ " 'text': 'Kreyòl ayisyen',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/125',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Кырык мары',\n",
+ " 'text': 'Кырык мары',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/126',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'ລາວ',\n",
+ " 'text': 'ລາວ',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/127',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Latina',\n",
+ " 'text': 'Latina',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/128',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Latviešu',\n",
+ " 'text': 'Latviešu',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/129',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Lietuvių',\n",
+ " 'text': 'Lietuvių',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/130',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Li Niha',\n",
+ " 'text': 'Li Niha',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/131',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Ligure',\n",
+ " 'text': 'Ligure',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/132',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Limburgs',\n",
+ " 'text': 'Limburgs',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/133',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Lingála',\n",
+ " 'text': 'Lingála',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/134',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Malagasy',\n",
+ " 'text': 'Malagasy',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/135',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'മലയാളം',\n",
+ " 'text': 'മലയാളം',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/136',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'मराठी',\n",
+ " 'text': 'मराठी',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/137',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'مازِرونی',\n",
+ " 'text': 'مازِرونی',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/138',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Bahasa Melayu',\n",
+ " 'text': 'Bahasa Melayu',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/139',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'ꯃꯤꯇꯩ ꯂꯣꯟ',\n",
+ " 'text': 'ꯃꯤꯇꯩ ꯂꯣꯟ',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/140',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '閩東語 / Mìng-dĕ̤ng-ngṳ̄',\n",
+ " 'text': '閩東語 / Mìng-dĕ̤ng-ngṳ̄',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/141',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Мокшень',\n",
+ " 'text': 'Мокшень',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/142',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Монгол',\n",
+ " 'text': 'Монгол',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/143',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'မြန်မာဘာသာ',\n",
+ " 'text': 'မြန်မာဘာသာ',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/144',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Nederlands',\n",
+ " 'text': 'Nederlands',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/145',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Nedersaksies',\n",
+ " 'text': 'Nedersaksies',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/146',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'नेपाली',\n",
+ " 'text': 'नेपाली',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/147',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'नेपाल भाषा',\n",
+ " 'text': 'नेपाल भाषा',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/148',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '日本語',\n",
+ " 'text': '日本語',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/149',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Нохчийн',\n",
+ " 'text': 'Нохчийн',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/150',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Norsk nynorsk',\n",
+ " 'text': 'Norsk nynorsk',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/151',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Occitan',\n",
+ " 'text': 'Occitan',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/152',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Oromoo',\n",
+ " 'text': 'Oromoo',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/153',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'ਪੰਜਾਬੀ',\n",
+ " 'text': 'ਪੰਜਾਬੀ',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/154',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Picard',\n",
+ " 'text': 'Picard',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/155',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Plattdüütsch',\n",
+ " 'text': 'Plattdüütsch',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/156',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Polski',\n",
+ " 'text': 'Polski',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/157',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Português',\n",
+ " 'text': 'Português',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/158',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Qırımtatarca',\n",
+ " 'text': 'Qırımtatarca',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/159',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Română',\n",
+ " 'text': 'Română',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/160',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Русский',\n",
+ " 'text': 'Русский',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/161',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Саха тыла',\n",
+ " 'text': 'Саха тыла',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/162',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'ᱥᱟᱱᱛᱟᱲᱤ',\n",
+ " 'text': 'ᱥᱟᱱᱛᱟᱲᱤ',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/163',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Sardu',\n",
+ " 'text': 'Sardu',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/164',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Scots',\n",
+ " 'text': 'Scots',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/165',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Seeltersk',\n",
+ " 'text': 'Seeltersk',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/166',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Shqip',\n",
+ " 'text': 'Shqip',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/167',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Sicilianu',\n",
+ " 'text': 'Sicilianu',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/168',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'සිංහල',\n",
+ " 'text': 'සිංහල',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/169',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Simple English',\n",
+ " 'text': 'Simple English',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/170',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'سنڌي',\n",
+ " 'text': 'سنڌي',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/171',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'کوردی',\n",
+ " 'text': 'کوردی',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/172',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Српски / srpski',\n",
+ " 'text': 'Српски / srpski',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/173',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Srpskohrvatski / српскохрватски',\n",
+ " 'text': 'Srpskohrvatski / српскохрватски',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/174',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Sunda',\n",
+ " 'text': 'Sunda',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/175',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Svenska',\n",
+ " 'text': 'Svenska',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/176',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Tagalog',\n",
+ " 'text': 'Tagalog',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/177',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'தமிழ்',\n",
+ " 'text': 'தமிழ்',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/178',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Taqbaylit',\n",
+ " 'text': 'Taqbaylit',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/179',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Татарча / tatarça',\n",
+ " 'text': 'Татарча / tatarça',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/180',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'ไทย',\n",
+ " 'text': 'ไทย',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/181',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Türkçe',\n",
+ " 'text': 'Türkçe',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/182',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Українська',\n",
+ " 'text': 'Українська',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/183',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'ئۇيغۇرچە / Uyghurche',\n",
+ " 'text': 'ئۇيغۇرچە / Uyghurche',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/184',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Vahcuengh',\n",
+ " 'text': 'Vahcuengh',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/185',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Tiếng Việt',\n",
+ " 'text': 'Tiếng Việt',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/186',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Walon',\n",
+ " 'text': 'Walon',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/187',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '文言',\n",
+ " 'text': '文言',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/188',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Winaray',\n",
+ " 'text': 'Winaray',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/189',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '吴语',\n",
+ " 'text': '吴语',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/190',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '粵語',\n",
+ " 'text': '粵語',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/191',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Žemaitėška',\n",
+ " 'text': 'Žemaitėška',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/192',\n",
+ " 'parent': {'$ref': '#/groups/27'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '中文',\n",
+ " 'text': '中文',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/193',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Edit links',\n",
+ " 'text': 'Edit links'},\n",
+ " {'self_ref': '#/texts/194',\n",
+ " 'parent': {'$ref': '#/groups/28'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Article',\n",
+ " 'text': 'Article',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/195',\n",
+ " 'parent': {'$ref': '#/groups/28'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Talk',\n",
+ " 'text': 'Talk',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/196',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'English',\n",
+ " 'text': 'English'},\n",
+ " {'self_ref': '#/texts/197',\n",
+ " 'parent': {'$ref': '#/groups/30'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Read',\n",
+ " 'text': 'Read',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/198',\n",
+ " 'parent': {'$ref': '#/groups/30'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'View source',\n",
+ " 'text': 'View source',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/199',\n",
+ " 'parent': {'$ref': '#/groups/30'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'View history',\n",
+ " 'text': 'View history',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/200',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Tools',\n",
+ " 'text': 'Tools'},\n",
+ " {'self_ref': '#/texts/201',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Tools',\n",
+ " 'text': 'Tools'},\n",
+ " {'self_ref': '#/texts/202',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'move to sidebar',\n",
+ " 'text': 'move to sidebar'},\n",
+ " {'self_ref': '#/texts/203',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'hide',\n",
+ " 'text': 'hide'},\n",
+ " {'self_ref': '#/texts/204',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Actions',\n",
+ " 'text': 'Actions'},\n",
+ " {'self_ref': '#/texts/205',\n",
+ " 'parent': {'$ref': '#/groups/31'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Read',\n",
+ " 'text': 'Read',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/206',\n",
+ " 'parent': {'$ref': '#/groups/31'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'View source',\n",
+ " 'text': 'View source',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/207',\n",
+ " 'parent': {'$ref': '#/groups/31'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'View history',\n",
+ " 'text': 'View history',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/208',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'General',\n",
+ " 'text': 'General'},\n",
+ " {'self_ref': '#/texts/209',\n",
+ " 'parent': {'$ref': '#/groups/32'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'What links here',\n",
+ " 'text': 'What links here',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/210',\n",
+ " 'parent': {'$ref': '#/groups/32'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Related changes',\n",
+ " 'text': 'Related changes',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/211',\n",
+ " 'parent': {'$ref': '#/groups/32'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Upload file',\n",
+ " 'text': 'Upload file',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/212',\n",
+ " 'parent': {'$ref': '#/groups/32'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Special pages',\n",
+ " 'text': 'Special pages',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/213',\n",
+ " 'parent': {'$ref': '#/groups/32'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Permanent link',\n",
+ " 'text': 'Permanent link',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/214',\n",
+ " 'parent': {'$ref': '#/groups/32'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Page information',\n",
+ " 'text': 'Page information',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/215',\n",
+ " 'parent': {'$ref': '#/groups/32'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Cite this page',\n",
+ " 'text': 'Cite this page',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/216',\n",
+ " 'parent': {'$ref': '#/groups/32'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Get shortened URL',\n",
+ " 'text': 'Get shortened URL',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/217',\n",
+ " 'parent': {'$ref': '#/groups/32'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Download QR code',\n",
+ " 'text': 'Download QR code',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/218',\n",
+ " 'parent': {'$ref': '#/groups/32'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Wikidata item',\n",
+ " 'text': 'Wikidata item',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/219',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Print/export',\n",
+ " 'text': 'Print/export'},\n",
+ " {'self_ref': '#/texts/220',\n",
+ " 'parent': {'$ref': '#/groups/33'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Download as PDF',\n",
+ " 'text': 'Download as PDF',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/221',\n",
+ " 'parent': {'$ref': '#/groups/33'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Printable version',\n",
+ " 'text': 'Printable version',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/222',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'In other projects',\n",
+ " 'text': 'In other projects'},\n",
+ " {'self_ref': '#/texts/223',\n",
+ " 'parent': {'$ref': '#/groups/34'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Wikimedia Commons',\n",
+ " 'text': 'Wikimedia Commons',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/224',\n",
+ " 'parent': {'$ref': '#/groups/34'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Wikiquote',\n",
+ " 'text': 'Wikiquote',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/225',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Appearance',\n",
+ " 'text': 'Appearance'},\n",
+ " {'self_ref': '#/texts/226',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'move to sidebar',\n",
+ " 'text': 'move to sidebar'},\n",
+ " {'self_ref': '#/texts/227',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'hide',\n",
+ " 'text': 'hide'},\n",
+ " {'self_ref': '#/texts/228',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'From Wikipedia, the free encyclopedia',\n",
+ " 'text': 'From Wikipedia, the free encyclopedia'},\n",
+ " {'self_ref': '#/texts/229',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': '(Redirected from Duckling)',\n",
+ " 'text': '(Redirected from Duckling)'},\n",
+ " {'self_ref': '#/texts/230',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Common name for many species of bird',\n",
+ " 'text': 'Common name for many species of bird'},\n",
+ " {'self_ref': '#/texts/231',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'This article is about the bird. For duck as a food, see Duck as food. For other uses, see Duck (disambiguation).',\n",
+ " 'text': 'This article is about the bird. For duck as a food, see Duck as food. For other uses, see Duck (disambiguation).'},\n",
+ " {'self_ref': '#/texts/232',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': '\"Duckling\" redirects here. For other uses, see Duckling (disambiguation).',\n",
+ " 'text': '\"Duckling\" redirects here. For other uses, see Duckling (disambiguation).'},\n",
+ " {'self_ref': '#/texts/233',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Duck is the common name for numerous species of waterfowl in the family Anatidae. Ducks are generally smaller and shorter-necked than swans and geese, which are members of the same family. Divided among several subfamilies, they are a form taxon; they do not represent a monophyletic group (the group of all descendants of a single common ancestral species), since swans and geese are not considered ducks. Ducks are mostly aquatic birds, and may be found in both fresh water and sea water.',\n",
+ " 'text': 'Duck is the common name for numerous species of waterfowl in the family Anatidae. Ducks are generally smaller and shorter-necked than swans and geese, which are members of the same family. Divided among several subfamilies, they are a form taxon; they do not represent a monophyletic group (the group of all descendants of a single common ancestral species), since swans and geese are not considered ducks. Ducks are mostly aquatic birds, and may be found in both fresh water and sea water.'},\n",
+ " {'self_ref': '#/texts/234',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Ducks are sometimes confused with several types of unrelated water birds with similar forms, such as loons or divers, grebes, gallinules and coots.',\n",
+ " 'text': 'Ducks are sometimes confused with several types of unrelated water birds with similar forms, such as loons or divers, grebes, gallinules and coots.'},\n",
+ " {'self_ref': '#/texts/235',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'section_header',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Etymology',\n",
+ " 'text': 'Etymology',\n",
+ " 'level': 2},\n",
+ " {'self_ref': '#/texts/236',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': \"The word duck comes from Old English dūce 'diver', a derivative of the verb *dūcan 'to duck, bend down low as if to get under something, or dive', because of the way many species in the dabbling duck group feed by upending; compare with Dutch duiken and German tauchen 'to dive'.\",\n",
+ " 'text': \"The word duck comes from Old English dūce 'diver', a derivative of the verb *dūcan 'to duck, bend down low as if to get under something, or dive', because of the way many species in the dabbling duck group feed by upending; compare with Dutch duiken and German tauchen 'to dive'.\"},\n",
+ " {'self_ref': '#/texts/237',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Pacific black duck displaying the characteristic upending \"duck\"',\n",
+ " 'text': 'Pacific black duck displaying the characteristic upending \"duck\"'},\n",
+ " {'self_ref': '#/texts/238',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'This word replaced Old English ened /ænid \\'duck\\', possibly to avoid confusion with other words, such as ende \\'end\\' with similar forms. Other Germanic languages still have similar words for duck, for example, Dutch eend, German Ente and Norwegian and. The word ened /ænid was inherited from Proto-Indo-European; cf. Latin anas \"duck\", Lithuanian ántis \\'duck\\', Ancient Greek νῆσσα /νῆττα (nēssa /nētta) \\'duck\\', and Sanskrit ātí \\'water bird\\', among others.',\n",
+ " 'text': 'This word replaced Old English ened /ænid \\'duck\\', possibly to avoid confusion with other words, such as ende \\'end\\' with similar forms. Other Germanic languages still have similar words for duck, for example, Dutch eend, German Ente and Norwegian and. The word ened /ænid was inherited from Proto-Indo-European; cf. Latin anas \"duck\", Lithuanian ántis \\'duck\\', Ancient Greek νῆσσα /νῆττα (nēssa /nētta) \\'duck\\', and Sanskrit ātí \\'water bird\\', among others.'},\n",
+ " {'self_ref': '#/texts/239',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'A duckling is a young duck in downy plumage[1] or baby duck,[2] but in the food trade a young domestic duck which has just reached adult size and bulk and its meat is still fully tender, is sometimes labelled as a duckling.',\n",
+ " 'text': 'A duckling is a young duck in downy plumage[1] or baby duck,[2] but in the food trade a young domestic duck which has just reached adult size and bulk and its meat is still fully tender, is sometimes labelled as a duckling.'},\n",
+ " {'self_ref': '#/texts/240',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'A male is called a drake and the female is called a duck, or in ornithology a hen.[3][4]',\n",
+ " 'text': 'A male is called a drake and the female is called a duck, or in ornithology a hen.[3][4]'},\n",
+ " {'self_ref': '#/texts/241',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Male mallard.',\n",
+ " 'text': 'Male mallard.'},\n",
+ " {'self_ref': '#/texts/242',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Wood ducks.',\n",
+ " 'text': 'Wood ducks.'},\n",
+ " {'self_ref': '#/texts/243',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'section_header',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Taxonomy',\n",
+ " 'text': 'Taxonomy',\n",
+ " 'level': 2},\n",
+ " {'self_ref': '#/texts/244',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': \"All ducks belong to the biological order Anseriformes, a group that contains the ducks, geese and swans, as well as the screamers, and the magpie goose.[5] All except the screamers belong to the biological family Anatidae.[5] Within the family, ducks are split into a variety of subfamilies and 'tribes'. The number and composition of these subfamilies and tribes is the cause of considerable disagreement among taxonomists.[5] Some base their decisions on morphological characteristics, others on shared behaviours or genetic studies.[6][7] The number of suggested subfamilies containing ducks ranges from two to five.[8][9] The significant level of hybridisation that occurs among wild ducks complicates efforts to tease apart the relationships between various species.[9]\",\n",
+ " 'text': \"All ducks belong to the biological order Anseriformes, a group that contains the ducks, geese and swans, as well as the screamers, and the magpie goose.[5] All except the screamers belong to the biological family Anatidae.[5] Within the family, ducks are split into a variety of subfamilies and 'tribes'. The number and composition of these subfamilies and tribes is the cause of considerable disagreement among taxonomists.[5] Some base their decisions on morphological characteristics, others on shared behaviours or genetic studies.[6][7] The number of suggested subfamilies containing ducks ranges from two to five.[8][9] The significant level of hybridisation that occurs among wild ducks complicates efforts to tease apart the relationships between various species.[9]\"},\n",
+ " {'self_ref': '#/texts/245',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Mallard landing in approach',\n",
+ " 'text': 'Mallard landing in approach'},\n",
+ " {'self_ref': '#/texts/246',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': \"In most modern classifications, the so-called 'true ducks' belong to the subfamily Anatinae, which is further split into a varying number of tribes.[10] The largest of these, the Anatini, contains the 'dabbling' or 'river' ducks – named for their method of feeding primarily at the surface of fresh water.[11] The 'diving ducks', also named for their primary feeding method, make up the tribe Aythyini.[12] The 'sea ducks' of the tribe Mergini are diving ducks which specialise on fish and shellfish and spend a majority of their lives in saltwater.[13] The tribe Oxyurini contains the 'stifftails', diving ducks notable for their small size and stiff, upright tails.[14]\",\n",
+ " 'text': \"In most modern classifications, the so-called 'true ducks' belong to the subfamily Anatinae, which is further split into a varying number of tribes.[10] The largest of these, the Anatini, contains the 'dabbling' or 'river' ducks – named for their method of feeding primarily at the surface of fresh water.[11] The 'diving ducks', also named for their primary feeding method, make up the tribe Aythyini.[12] The 'sea ducks' of the tribe Mergini are diving ducks which specialise on fish and shellfish and spend a majority of their lives in saltwater.[13] The tribe Oxyurini contains the 'stifftails', diving ducks notable for their small size and stiff, upright tails.[14]\"},\n",
+ " {'self_ref': '#/texts/247',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': \"A number of other species called ducks are not considered to be 'true ducks', and are typically placed in other subfamilies or tribes. The whistling ducks are assigned either to a tribe (Dendrocygnini) in the subfamily Anatinae or the subfamily Anserinae,[15] or to their own subfamily (Dendrocygninae) or family (Dendrocyganidae).[9][16] The freckled duck of Australia is either the sole member of the tribe Stictonettini in the subfamily Anserinae,[15] or in its own family, the Stictonettinae.[9] The shelducks make up the tribe Tadornini in the family Anserinae in some classifications,[15] and their own subfamily, Tadorninae, in others,[17] while the steamer ducks are either placed in the family Anserinae in the tribe Tachyerini[15] or lumped with the shelducks in the tribe Tadorini.[9] The perching ducks make up in the tribe Cairinini in the subfamily Anserinae in some classifications, while that tribe is eliminated in other classifications and its members assigned to the tribe Anatini.[9] The torrent duck is generally included in the subfamily Anserinae in the monotypic tribe Merganettini,[15] but is sometimes included in the tribe Tadornini.[18] The pink-eared duck is sometimes included as a true duck either in the tribe Anatini[15] or the tribe Malacorhynchini,[19] and other times is included with the shelducks in the tribe Tadornini.[15]\",\n",
+ " 'text': \"A number of other species called ducks are not considered to be 'true ducks', and are typically placed in other subfamilies or tribes. The whistling ducks are assigned either to a tribe (Dendrocygnini) in the subfamily Anatinae or the subfamily Anserinae,[15] or to their own subfamily (Dendrocygninae) or family (Dendrocyganidae).[9][16] The freckled duck of Australia is either the sole member of the tribe Stictonettini in the subfamily Anserinae,[15] or in its own family, the Stictonettinae.[9] The shelducks make up the tribe Tadornini in the family Anserinae in some classifications,[15] and their own subfamily, Tadorninae, in others,[17] while the steamer ducks are either placed in the family Anserinae in the tribe Tachyerini[15] or lumped with the shelducks in the tribe Tadorini.[9] The perching ducks make up in the tribe Cairinini in the subfamily Anserinae in some classifications, while that tribe is eliminated in other classifications and its members assigned to the tribe Anatini.[9] The torrent duck is generally included in the subfamily Anserinae in the monotypic tribe Merganettini,[15] but is sometimes included in the tribe Tadornini.[18] The pink-eared duck is sometimes included as a true duck either in the tribe Anatini[15] or the tribe Malacorhynchini,[19] and other times is included with the shelducks in the tribe Tadornini.[15]\"},\n",
+ " {'self_ref': '#/texts/248',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'section_header',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Morphology',\n",
+ " 'text': 'Morphology',\n",
+ " 'level': 2},\n",
+ " {'self_ref': '#/texts/249',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Male Mandarin duck',\n",
+ " 'text': 'Male Mandarin duck'},\n",
+ " {'self_ref': '#/texts/250',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'The overall body plan of ducks is elongated and broad, and they are also relatively long-necked, albeit not as long-necked as the geese and swans. The body shape of diving ducks varies somewhat from this in being more rounded. The bill is usually broad and contains serrated pectens, which are particularly well defined in the filter-feeding species. In the case of some fishing species the bill is long and strongly serrated. The scaled legs are strong and well developed, and generally set far back on the body, more so in the highly aquatic species. The wings are very strong and are generally short and pointed, and the flight of ducks requires fast continuous strokes, requiring in turn strong wing muscles. Three species of steamer duck are almost flightless, however. Many species of duck are temporarily flightless while moulting; they seek out protected habitat with good food supplies during this period. This moult typically precedes migration.',\n",
+ " 'text': 'The overall body plan of ducks is elongated and broad, and they are also relatively long-necked, albeit not as long-necked as the geese and swans. The body shape of diving ducks varies somewhat from this in being more rounded. The bill is usually broad and contains serrated pectens, which are particularly well defined in the filter-feeding species. In the case of some fishing species the bill is long and strongly serrated. The scaled legs are strong and well developed, and generally set far back on the body, more so in the highly aquatic species. The wings are very strong and are generally short and pointed, and the flight of ducks requires fast continuous strokes, requiring in turn strong wing muscles. Three species of steamer duck are almost flightless, however. Many species of duck are temporarily flightless while moulting; they seek out protected habitat with good food supplies during this period. This moult typically precedes migration.'},\n",
+ " {'self_ref': '#/texts/251',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'The drakes of northern species often have extravagant plumage, but that is moulted in summer to give a more female-like appearance, the \"eclipse\" plumage. Southern resident species typically show less sexual dimorphism, although there are exceptions such as the paradise shelduck of New Zealand, which is both strikingly sexually dimorphic and in which the female\\'s plumage is brighter than that of the male. The plumage of juvenile birds generally resembles that of the female. Female ducks have evolved to have a corkscrew shaped vagina to prevent rape.',\n",
+ " 'text': 'The drakes of northern species often have extravagant plumage, but that is moulted in summer to give a more female-like appearance, the \"eclipse\" plumage. Southern resident species typically show less sexual dimorphism, although there are exceptions such as the paradise shelduck of New Zealand, which is both strikingly sexually dimorphic and in which the female\\'s plumage is brighter than that of the male. The plumage of juvenile birds generally resembles that of the female. Female ducks have evolved to have a corkscrew shaped vagina to prevent rape.'},\n",
+ " {'self_ref': '#/texts/252',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'section_header',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Distribution and habitat',\n",
+ " 'text': 'Distribution and habitat',\n",
+ " 'level': 2},\n",
+ " {'self_ref': '#/texts/253',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'See also: List of Anseriformes by population',\n",
+ " 'text': 'See also: List of Anseriformes by population'},\n",
+ " {'self_ref': '#/texts/254',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Flying steamer ducks in Ushuaia, Argentina',\n",
+ " 'text': 'Flying steamer ducks in Ushuaia, Argentina'},\n",
+ " {'self_ref': '#/texts/255',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Ducks have a cosmopolitan distribution, and are found on every continent except Antarctica.[5] Several species manage to live on subantarctic islands, including South Georgia and the Auckland Islands.[20] Ducks have reached a number of isolated oceanic islands, including the Hawaiian Islands, Micronesia and the Galápagos Islands, where they are often vagrants and less often residents.[21][22] A handful are endemic to such far-flung islands.[21]',\n",
+ " 'text': 'Ducks have a cosmopolitan distribution, and are found on every continent except Antarctica.[5] Several species manage to live on subantarctic islands, including South Georgia and the Auckland Islands.[20] Ducks have reached a number of isolated oceanic islands, including the Hawaiian Islands, Micronesia and the Galápagos Islands, where they are often vagrants and less often residents.[21][22] A handful are endemic to such far-flung islands.[21]'},\n",
+ " {'self_ref': '#/texts/256',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Female mallard in Cornwall, England',\n",
+ " 'text': 'Female mallard in Cornwall, England'},\n",
+ " {'self_ref': '#/texts/257',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Some duck species, mainly those breeding in the temperate and Arctic Northern Hemisphere, are migratory; those in the tropics are generally not. Some ducks, particularly in Australia where rainfall is erratic, are nomadic, seeking out the temporary lakes and pools that form after localised heavy rain.[23]',\n",
+ " 'text': 'Some duck species, mainly those breeding in the temperate and Arctic Northern Hemisphere, are migratory; those in the tropics are generally not. Some ducks, particularly in Australia where rainfall is erratic, are nomadic, seeking out the temporary lakes and pools that form after localised heavy rain.[23]'},\n",
+ " {'self_ref': '#/texts/258',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'section_header',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Behaviour',\n",
+ " 'text': 'Behaviour',\n",
+ " 'level': 2},\n",
+ " {'self_ref': '#/texts/259',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'section_header',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Feeding',\n",
+ " 'text': 'Feeding',\n",
+ " 'level': 3},\n",
+ " {'self_ref': '#/texts/260',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Pecten along the bill',\n",
+ " 'text': 'Pecten along the bill'},\n",
+ " {'self_ref': '#/texts/261',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Mallard duckling preening',\n",
+ " 'text': 'Mallard duckling preening'},\n",
+ " {'self_ref': '#/texts/262',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Ducks eat food sources such as grasses, aquatic plants, fish, insects, small amphibians, worms, and small molluscs.',\n",
+ " 'text': 'Ducks eat food sources such as grasses, aquatic plants, fish, insects, small amphibians, worms, and small molluscs.'},\n",
+ " {'self_ref': '#/texts/263',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Dabbling ducks feed on the surface of water or on land, or as deep as they can reach by up-ending without completely submerging.[24] Along the edge of the bill, there is a comb-like structure called a pecten. This strains the water squirting from the side of the bill and traps any food. The pecten is also used to preen feathers and to hold slippery food items.',\n",
+ " 'text': 'Dabbling ducks feed on the surface of water or on land, or as deep as they can reach by up-ending without completely submerging.[24] Along the edge of the bill, there is a comb-like structure called a pecten. This strains the water squirting from the side of the bill and traps any food. The pecten is also used to preen feathers and to hold slippery food items.'},\n",
+ " {'self_ref': '#/texts/264',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Diving ducks and sea ducks forage deep underwater. To be able to submerge more easily, the diving ducks are heavier than dabbling ducks, and therefore have more difficulty taking off to fly.',\n",
+ " 'text': 'Diving ducks and sea ducks forage deep underwater. To be able to submerge more easily, the diving ducks are heavier than dabbling ducks, and therefore have more difficulty taking off to fly.'},\n",
+ " {'self_ref': '#/texts/265',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'A few specialized species such as the mergansers are adapted to catch and swallow large fish.',\n",
+ " 'text': 'A few specialized species such as the mergansers are adapted to catch and swallow large fish.'},\n",
+ " {'self_ref': '#/texts/266',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'The others have the characteristic wide flat bill adapted to dredging-type jobs such as pulling up waterweed, pulling worms and small molluscs out of mud, searching for insect larvae, and bulk jobs such as dredging out, holding, turning head first, and swallowing a squirming frog. To avoid injury when digging into sediment it has no cere, but the nostrils come out through hard horn.',\n",
+ " 'text': 'The others have the characteristic wide flat bill adapted to dredging-type jobs such as pulling up waterweed, pulling worms and small molluscs out of mud, searching for insect larvae, and bulk jobs such as dredging out, holding, turning head first, and swallowing a squirming frog. To avoid injury when digging into sediment it has no cere, but the nostrils come out through hard horn.'},\n",
+ " {'self_ref': '#/texts/267',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'The Guardian published an article advising that ducks should not be fed with bread because it damages the health of the ducks and pollutes waterways.[25]',\n",
+ " 'text': 'The Guardian published an article advising that ducks should not be fed with bread because it damages the health of the ducks and pollutes waterways.[25]'},\n",
+ " {'self_ref': '#/texts/268',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'section_header',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Breeding',\n",
+ " 'text': 'Breeding',\n",
+ " 'level': 3},\n",
+ " {'self_ref': '#/texts/269',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'A Muscovy duckling',\n",
+ " 'text': 'A Muscovy duckling'},\n",
+ " {'self_ref': '#/texts/270',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Ducks generally only have one partner at a time, although the partnership usually only lasts one year.[26] Larger species and the more sedentary species (like fast-river specialists) tend to have pair-bonds that last numerous years.[27] Most duck species breed once a year, choosing to do so in favourable conditions (spring/summer or wet seasons). Ducks also tend to make a nest before breeding, and, after hatching, lead their ducklings to water. Mother ducks are very caring and protective of their young, but may abandon some of their ducklings if they are physically stuck in an area they cannot get out of (such as nesting in an enclosed courtyard) or are not prospering due to genetic defects or sickness brought about by hypothermia, starvation, or disease. Ducklings can also be orphaned by inconsistent late hatching where a few eggs hatch after the mother has abandoned the nest and led her ducklings to water.[28]',\n",
+ " 'text': 'Ducks generally only have one partner at a time, although the partnership usually only lasts one year.[26] Larger species and the more sedentary species (like fast-river specialists) tend to have pair-bonds that last numerous years.[27] Most duck species breed once a year, choosing to do so in favourable conditions (spring/summer or wet seasons). Ducks also tend to make a nest before breeding, and, after hatching, lead their ducklings to water. Mother ducks are very caring and protective of their young, but may abandon some of their ducklings if they are physically stuck in an area they cannot get out of (such as nesting in an enclosed courtyard) or are not prospering due to genetic defects or sickness brought about by hypothermia, starvation, or disease. Ducklings can also be orphaned by inconsistent late hatching where a few eggs hatch after the mother has abandoned the nest and led her ducklings to water.[28]'},\n",
+ " {'self_ref': '#/texts/271',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'section_header',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Communication',\n",
+ " 'text': 'Communication',\n",
+ " 'level': 3},\n",
+ " {'self_ref': '#/texts/272',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Female mallard ducks (as well as several other species in the genus Anas, such as the American and Pacific black ducks, spot-billed duck, northern pintail and common teal) make the classic \"quack\" sound while males make a similar but raspier sound that is sometimes written as \"breeeeze\",[29][self-published source?] but, despite widespread misconceptions, most species of duck do not \"quack\".[30] In general, ducks make a range of calls, including whistles, cooing, yodels and grunts. For example, the scaup – which are diving ducks – make a noise like \"scaup\" (hence their name). Calls may be loud displaying calls or quieter contact calls.',\n",
+ " 'text': 'Female mallard ducks (as well as several other species in the genus Anas, such as the American and Pacific black ducks, spot-billed duck, northern pintail and common teal) make the classic \"quack\" sound while males make a similar but raspier sound that is sometimes written as \"breeeeze\",[29][self-published source?] but, despite widespread misconceptions, most species of duck do not \"quack\".[30] In general, ducks make a range of calls, including whistles, cooing, yodels and grunts. For example, the scaup – which are diving ducks – make a noise like \"scaup\" (hence their name). Calls may be loud displaying calls or quieter contact calls.'},\n",
+ " {'self_ref': '#/texts/273',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': \"A common urban legend claims that duck quacks do not echo; however, this has been proven to be false. This myth was first debunked by the Acoustics Research Centre at the University of Salford in 2003 as part of the British Association's Festival of Science.[31] It was also debunked in one of the earlier episodes of the popular Discovery Channel television show MythBusters.[32]\",\n",
+ " 'text': \"A common urban legend claims that duck quacks do not echo; however, this has been proven to be false. This myth was first debunked by the Acoustics Research Centre at the University of Salford in 2003 as part of the British Association's Festival of Science.[31] It was also debunked in one of the earlier episodes of the popular Discovery Channel television show MythBusters.[32]\"},\n",
+ " {'self_ref': '#/texts/274',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'section_header',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Predators',\n",
+ " 'text': 'Predators',\n",
+ " 'level': 3},\n",
+ " {'self_ref': '#/texts/275',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Ringed teal',\n",
+ " 'text': 'Ringed teal'},\n",
+ " {'self_ref': '#/texts/276',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': \"Ducks have many predators. Ducklings are particularly vulnerable, since their inability to fly makes them easy prey not only for predatory birds but also for large fish like pike, crocodilians, predatory testudines such as the alligator snapping turtle, and other aquatic hunters, including fish-eating birds such as herons. Ducks' nests are raided by land-based predators, and brooding females may be caught unaware on the nest by mammals, such as foxes, or large birds, such as hawks or owls.\",\n",
+ " 'text': \"Ducks have many predators. Ducklings are particularly vulnerable, since their inability to fly makes them easy prey not only for predatory birds but also for large fish like pike, crocodilians, predatory testudines such as the alligator snapping turtle, and other aquatic hunters, including fish-eating birds such as herons. Ducks' nests are raided by land-based predators, and brooding females may be caught unaware on the nest by mammals, such as foxes, or large birds, such as hawks or owls.\"},\n",
+ " {'self_ref': '#/texts/277',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Adult ducks are fast fliers, but may be caught on the water by large aquatic predators including big fish such as the North American muskie and the European pike. In flight, ducks are safe from all but a few predators such as humans and the peregrine falcon, which uses its speed and strength to catch ducks.',\n",
+ " 'text': 'Adult ducks are fast fliers, but may be caught on the water by large aquatic predators including big fish such as the North American muskie and the European pike. In flight, ducks are safe from all but a few predators such as humans and the peregrine falcon, which uses its speed and strength to catch ducks.'},\n",
+ " {'self_ref': '#/texts/278',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'section_header',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Relationship with humans',\n",
+ " 'text': 'Relationship with humans',\n",
+ " 'level': 2},\n",
+ " {'self_ref': '#/texts/279',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'section_header',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Hunting',\n",
+ " 'text': 'Hunting',\n",
+ " 'level': 3},\n",
+ " {'self_ref': '#/texts/280',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Main article: Waterfowl hunting',\n",
+ " 'text': 'Main article: Waterfowl hunting'},\n",
+ " {'self_ref': '#/texts/281',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Humans have hunted ducks since prehistoric times. Excavations of middens in California dating to 7800 – 6400 BP have turned up bones of ducks, including at least one now-extinct flightless species.[33] Ducks were captured in \"significant numbers\" by Holocene inhabitants of the lower Ohio River valley, suggesting they took advantage of the seasonal bounty provided by migrating waterfowl.[34] Neolithic hunters in locations as far apart as the Caribbean,[35] Scandinavia,[36] Egypt,[37] Switzerland,[38] and China relied on ducks as a source of protein for some or all of the year.[39] Archeological evidence shows that Māori people in New Zealand hunted the flightless Finsch\\'s duck, possibly to extinction, though rat predation may also have contributed to its fate.[40] A similar end awaited the Chatham duck, a species with reduced flying capabilities which went extinct shortly after its island was colonised by Polynesian settlers.[41] It is probable that duck eggs were gathered by Neolithic hunter-gathers as well, though hard evidence of this is uncommon.[35][42]',\n",
+ " 'text': 'Humans have hunted ducks since prehistoric times. Excavations of middens in California dating to 7800 – 6400 BP have turned up bones of ducks, including at least one now-extinct flightless species.[33] Ducks were captured in \"significant numbers\" by Holocene inhabitants of the lower Ohio River valley, suggesting they took advantage of the seasonal bounty provided by migrating waterfowl.[34] Neolithic hunters in locations as far apart as the Caribbean,[35] Scandinavia,[36] Egypt,[37] Switzerland,[38] and China relied on ducks as a source of protein for some or all of the year.[39] Archeological evidence shows that Māori people in New Zealand hunted the flightless Finsch\\'s duck, possibly to extinction, though rat predation may also have contributed to its fate.[40] A similar end awaited the Chatham duck, a species with reduced flying capabilities which went extinct shortly after its island was colonised by Polynesian settlers.[41] It is probable that duck eggs were gathered by Neolithic hunter-gathers as well, though hard evidence of this is uncommon.[35][42]'},\n",
+ " {'self_ref': '#/texts/282',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'In many areas, wild ducks (including ducks farmed and released into the wild) are hunted for food or sport,[43] by shooting, or by being trapped using duck decoys. Because an idle floating duck or a duck squatting on land cannot react to fly or move quickly, \"a sitting duck\" has come to mean \"an easy target\". These ducks may be contaminated by pollutants such as PCBs.[44]',\n",
+ " 'text': 'In many areas, wild ducks (including ducks farmed and released into the wild) are hunted for food or sport,[43] by shooting, or by being trapped using duck decoys. Because an idle floating duck or a duck squatting on land cannot react to fly or move quickly, \"a sitting duck\" has come to mean \"an easy target\". These ducks may be contaminated by pollutants such as PCBs.[44]'},\n",
+ " {'self_ref': '#/texts/283',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'section_header',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Domestication',\n",
+ " 'text': 'Domestication',\n",
+ " 'level': 3},\n",
+ " {'self_ref': '#/texts/284',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Main article: Domestic duck',\n",
+ " 'text': 'Main article: Domestic duck'},\n",
+ " {'self_ref': '#/texts/285',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Indian Runner ducks, a common breed of domestic ducks',\n",
+ " 'text': 'Indian Runner ducks, a common breed of domestic ducks'},\n",
+ " {'self_ref': '#/texts/286',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': \"Ducks have many economic uses, being farmed for their meat, eggs, and feathers (particularly their down). Approximately 3 billion ducks are slaughtered each year for meat worldwide.[45] They are also kept and bred by aviculturists and often displayed in zoos. Almost all the varieties of domestic ducks are descended from the mallard (Anas platyrhynchos), apart from the Muscovy duck (Cairina moschata).[46][47] The Call duck is another example of a domestic duck breed. Its name comes from its original use established by hunters, as a decoy to attract wild mallards from the sky, into traps set for them on the ground. The call duck is the world's smallest domestic duck breed, as it weighs less than 1\\xa0kg (2.2\\xa0lb).[48]\",\n",
+ " 'text': \"Ducks have many economic uses, being farmed for their meat, eggs, and feathers (particularly their down). Approximately 3 billion ducks are slaughtered each year for meat worldwide.[45] They are also kept and bred by aviculturists and often displayed in zoos. Almost all the varieties of domestic ducks are descended from the mallard (Anas platyrhynchos), apart from the Muscovy duck (Cairina moschata).[46][47] The Call duck is another example of a domestic duck breed. Its name comes from its original use established by hunters, as a decoy to attract wild mallards from the sky, into traps set for them on the ground. The call duck is the world's smallest domestic duck breed, as it weighs less than 1\\xa0kg (2.2\\xa0lb).[48]\"},\n",
+ " {'self_ref': '#/texts/287',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'section_header',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Heraldry',\n",
+ " 'text': 'Heraldry',\n",
+ " 'level': 3},\n",
+ " {'self_ref': '#/texts/288',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Three black-colored ducks in the coat of arms of Maaninka[49]',\n",
+ " 'text': 'Three black-colored ducks in the coat of arms of Maaninka[49]'},\n",
+ " {'self_ref': '#/texts/289',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Ducks appear on several coats of arms, including the coat of arms of Lubāna (Latvia)[50] and the coat of arms of Föglö (Åland).[51]',\n",
+ " 'text': 'Ducks appear on several coats of arms, including the coat of arms of Lubāna (Latvia)[50] and the coat of arms of Föglö (Åland).[51]'},\n",
+ " {'self_ref': '#/texts/290',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'section_header',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Cultural references',\n",
+ " 'text': 'Cultural references',\n",
+ " 'level': 3},\n",
+ " {'self_ref': '#/texts/291',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'In 2002, psychologist Richard Wiseman and colleagues at the University of Hertfordshire, UK, finished a year-long LaughLab experiment, concluding that of all animals, ducks attract the most humor and silliness; he said, \"If you\\'re going to tell a joke involving an animal, make it a duck.\"[52] The word \"duck\" may have become an inherently funny word in many languages, possibly because ducks are seen as silly in their looks or behavior. Of the many ducks in fiction, many are cartoon characters, such as Walt Disney\\'s Donald Duck, and Warner Bros.\\' Daffy Duck. Howard the Duck started as a comic book character in 1973[53][54] and was made into a movie in 1986.',\n",
+ " 'text': 'In 2002, psychologist Richard Wiseman and colleagues at the University of Hertfordshire, UK, finished a year-long LaughLab experiment, concluding that of all animals, ducks attract the most humor and silliness; he said, \"If you\\'re going to tell a joke involving an animal, make it a duck.\"[52] The word \"duck\" may have become an inherently funny word in many languages, possibly because ducks are seen as silly in their looks or behavior. Of the many ducks in fiction, many are cartoon characters, such as Walt Disney\\'s Donald Duck, and Warner Bros.\\' Daffy Duck. Howard the Duck started as a comic book character in 1973[53][54] and was made into a movie in 1986.'},\n",
+ " {'self_ref': '#/texts/292',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'The 1992 Disney film The Mighty Ducks, starring Emilio Estevez, chose the duck as the mascot for the fictional youth hockey team who are protagonists of the movie, based on the duck being described as a fierce fighter. This led to the duck becoming the nickname and mascot for the eventual National Hockey League professional team of the Anaheim Ducks, who were founded with the name the Mighty Ducks of Anaheim.[citation needed] The duck is also the nickname of the University of Oregon sports teams as well as the Long Island Ducks minor league baseball team.[55]',\n",
+ " 'text': 'The 1992 Disney film The Mighty Ducks, starring Emilio Estevez, chose the duck as the mascot for the fictional youth hockey team who are protagonists of the movie, based on the duck being described as a fierce fighter. This led to the duck becoming the nickname and mascot for the eventual National Hockey League professional team of the Anaheim Ducks, who were founded with the name the Mighty Ducks of Anaheim.[citation needed] The duck is also the nickname of the University of Oregon sports teams as well as the Long Island Ducks minor league baseball team.[55]'},\n",
+ " {'self_ref': '#/texts/293',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'section_header',\n",
+ " 'prov': [],\n",
+ " 'orig': 'See also',\n",
+ " 'text': 'See also',\n",
+ " 'level': 2},\n",
+ " {'self_ref': '#/texts/294',\n",
+ " 'parent': {'$ref': '#/groups/35'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Birds portal',\n",
+ " 'text': 'Birds portal',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/295',\n",
+ " 'parent': {'$ref': '#/groups/36'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Domestic duck',\n",
+ " 'text': 'Domestic duck',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/296',\n",
+ " 'parent': {'$ref': '#/groups/36'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Duck as food',\n",
+ " 'text': 'Duck as food',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/297',\n",
+ " 'parent': {'$ref': '#/groups/36'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Duck test',\n",
+ " 'text': 'Duck test',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/298',\n",
+ " 'parent': {'$ref': '#/groups/36'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Duck breeds',\n",
+ " 'text': 'Duck breeds',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/299',\n",
+ " 'parent': {'$ref': '#/groups/36'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Fictional ducks',\n",
+ " 'text': 'Fictional ducks',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/300',\n",
+ " 'parent': {'$ref': '#/groups/36'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Rubber duck',\n",
+ " 'text': 'Rubber duck',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/301',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'section_header',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Notes',\n",
+ " 'text': 'Notes',\n",
+ " 'level': 2},\n",
+ " {'self_ref': '#/texts/302',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'section_header',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Citations',\n",
+ " 'text': 'Citations',\n",
+ " 'level': 3},\n",
+ " {'self_ref': '#/texts/303',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ \"Duckling\" . The American Heritage Dictionary of the English Language, Fourth Edition . Houghton Mifflin Company. 2006 . Retrieved 2015-05-22 .',\n",
+ " 'text': '^ \"Duckling\" . The American Heritage Dictionary of the English Language, Fourth Edition . Houghton Mifflin Company. 2006 . Retrieved 2015-05-22 .',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/304',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ \"Duckling\" . Kernerman English Multilingual Dictionary (Beta Version) . K. Dictionaries Ltd. 2000–2006 . Retrieved 2015-05-22 .',\n",
+ " 'text': '^ \"Duckling\" . Kernerman English Multilingual Dictionary (Beta Version) . K. Dictionaries Ltd. 2000–2006 . Retrieved 2015-05-22 .',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/305',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Dohner, Janet Vorwald (2001). The Encyclopedia of Historic and Endangered Livestock and Poultry Breeds . Yale University Press. ISBN 978-0300138139 .',\n",
+ " 'text': '^ Dohner, Janet Vorwald (2001). The Encyclopedia of Historic and Endangered Livestock and Poultry Breeds . Yale University Press. ISBN 978-0300138139 .',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/306',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Visca, Curt; Visca, Kelley (2003). How to Draw Cartoon Birds . The Rosen Publishing Group. ISBN 9780823961566 .',\n",
+ " 'text': '^ Visca, Curt; Visca, Kelley (2003). How to Draw Cartoon Birds . The Rosen Publishing Group. ISBN 9780823961566 .',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/307',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ a b c d Carboneras 1992 , p.\\xa0536.',\n",
+ " 'text': '^ a b c d Carboneras 1992 , p.\\xa0536.',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/308',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Livezey 1986 , pp.\\xa0737–738.',\n",
+ " 'text': '^ Livezey 1986 , pp.\\xa0737–738.',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/309',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Madsen, McHugh & de Kloet 1988 , p.\\xa0452.',\n",
+ " 'text': '^ Madsen, McHugh & de Kloet 1988 , p.\\xa0452.',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/310',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Donne-Goussé, Laudet & Hänni 2002 , pp.\\xa0353–354.',\n",
+ " 'text': '^ Donne-Goussé, Laudet & Hänni 2002 , pp.\\xa0353–354.',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/311',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ a b c d e f Carboneras 1992 , p.\\xa0540.',\n",
+ " 'text': '^ a b c d e f Carboneras 1992 , p.\\xa0540.',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/312',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Elphick, Dunning & Sibley 2001 , p.\\xa0191.',\n",
+ " 'text': '^ Elphick, Dunning & Sibley 2001 , p.\\xa0191.',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/313',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Kear 2005 , p.\\xa0448.',\n",
+ " 'text': '^ Kear 2005 , p.\\xa0448.',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/314',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Kear 2005 , p.\\xa0622–623.',\n",
+ " 'text': '^ Kear 2005 , p.\\xa0622–623.',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/315',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Kear 2005 , p.\\xa0686.',\n",
+ " 'text': '^ Kear 2005 , p.\\xa0686.',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/316',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Elphick, Dunning & Sibley 2001 , p.\\xa0193.',\n",
+ " 'text': '^ Elphick, Dunning & Sibley 2001 , p.\\xa0193.',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/317',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ a b c d e f g Carboneras 1992 , p.\\xa0537.',\n",
+ " 'text': '^ a b c d e f g Carboneras 1992 , p.\\xa0537.',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/318',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': \"^ American Ornithologists' Union 1998 , p.\\xa0xix.\",\n",
+ " 'text': \"^ American Ornithologists' Union 1998 , p.\\xa0xix.\",\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/319',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': \"^ American Ornithologists' Union 1998 .\",\n",
+ " 'text': \"^ American Ornithologists' Union 1998 .\",\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/320',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Carboneras 1992 , p.\\xa0538.',\n",
+ " 'text': '^ Carboneras 1992 , p.\\xa0538.',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/321',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Christidis & Boles 2008 , p.\\xa062.',\n",
+ " 'text': '^ Christidis & Boles 2008 , p.\\xa062.',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/322',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Shirihai 2008 , pp.\\xa0239, 245.',\n",
+ " 'text': '^ Shirihai 2008 , pp.\\xa0239, 245.',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/323',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ a b Pratt, Bruner & Berrett 1987 , pp.\\xa098–107.',\n",
+ " 'text': '^ a b Pratt, Bruner & Berrett 1987 , pp.\\xa098–107.',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/324',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Fitter, Fitter & Hosking 2000 , pp.\\xa052–3.',\n",
+ " 'text': '^ Fitter, Fitter & Hosking 2000 , pp.\\xa052–3.',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/325',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ \"Pacific Black Duck\" . www.wiresnr.org . Retrieved 2018-04-27 .',\n",
+ " 'text': '^ \"Pacific Black Duck\" . www.wiresnr.org . Retrieved 2018-04-27 .',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/326',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Ogden, Evans. \"Dabbling Ducks\" . CWE . Retrieved 2006-11-02 .',\n",
+ " 'text': '^ Ogden, Evans. \"Dabbling Ducks\" . CWE . Retrieved 2006-11-02 .',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/327',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Karl Mathiesen (16 March 2015). \"Don\\'t feed the ducks bread, say conservationists\" . The Guardian . Retrieved 13 November 2016 .',\n",
+ " 'text': '^ Karl Mathiesen (16 March 2015). \"Don\\'t feed the ducks bread, say conservationists\" . The Guardian . Retrieved 13 November 2016 .',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/328',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Rohwer, Frank C.; Anderson, Michael G. (1988). \"Female-Biased Philopatry, Monogamy, and the Timing of Pair Formation in Migratory Waterfowl\". Current Ornithology . pp.\\xa0187–221. doi : 10.1007/978-1-4615-6787-5_4 . ISBN 978-1-4615-6789-9 .',\n",
+ " 'text': '^ Rohwer, Frank C.; Anderson, Michael G. (1988). \"Female-Biased Philopatry, Monogamy, and the Timing of Pair Formation in Migratory Waterfowl\". Current Ornithology . pp.\\xa0187–221. doi : 10.1007/978-1-4615-6787-5_4 . ISBN 978-1-4615-6789-9 .',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/329',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Smith, Cyndi M.; Cooke, Fred; Robertson, Gregory J.; Goudie, R. Ian; Boyd, W. Sean (2000). \"Long-Term Pair Bonds in Harlequin Ducks\" . The Condor . 102 (1): 201–205. doi : 10.1093/condor/102.1.201 . hdl : 10315/13797 .',\n",
+ " 'text': '^ Smith, Cyndi M.; Cooke, Fred; Robertson, Gregory J.; Goudie, R. Ian; Boyd, W. Sean (2000). \"Long-Term Pair Bonds in Harlequin Ducks\" . The Condor . 102 (1): 201–205. doi : 10.1093/condor/102.1.201 . hdl : 10315/13797 .',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/330',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ \"If You Find An Orphaned Duckling - Wildlife Rehabber\" . wildliferehabber.com . Archived from the original on 2018-09-23 . Retrieved 2018-12-22 .',\n",
+ " 'text': '^ \"If You Find An Orphaned Duckling - Wildlife Rehabber\" . wildliferehabber.com . Archived from the original on 2018-09-23 . Retrieved 2018-12-22 .',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/331',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Carver, Heather (2011). The Duck Bible . Lulu.com. ISBN 9780557901562 . [ self-published source ]',\n",
+ " 'text': '^ Carver, Heather (2011). The Duck Bible . Lulu.com. ISBN 9780557901562 . [ self-published source ]',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/332',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Titlow, Budd (2013-09-03). Bird Brains: Inside the Strange Minds of Our Fine Feathered Friends . Rowman & Littlefield. ISBN 9780762797707 .',\n",
+ " 'text': '^ Titlow, Budd (2013-09-03). Bird Brains: Inside the Strange Minds of Our Fine Feathered Friends . Rowman & Littlefield. ISBN 9780762797707 .',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/333',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Amos, Jonathan (2003-09-08). \"Sound science is quackers\" . BBC News . Retrieved 2006-11-02 .',\n",
+ " 'text': '^ Amos, Jonathan (2003-09-08). \"Sound science is quackers\" . BBC News . Retrieved 2006-11-02 .',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/334',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ \"Mythbusters Episode 8\" . 12 December 2003.',\n",
+ " 'text': '^ \"Mythbusters Episode 8\" . 12 December 2003.',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/335',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Erlandson 1994 , p.\\xa0171.',\n",
+ " 'text': '^ Erlandson 1994 , p.\\xa0171.',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/336',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Jeffries 2008 , pp.\\xa0168, 243.',\n",
+ " 'text': '^ Jeffries 2008 , pp.\\xa0168, 243.',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/337',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ a b Sued-Badillo 2003 , p.\\xa065.',\n",
+ " 'text': '^ a b Sued-Badillo 2003 , p.\\xa065.',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/338',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Thorpe 1996 , p.\\xa068.',\n",
+ " 'text': '^ Thorpe 1996 , p.\\xa068.',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/339',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Maisels 1999 , p.\\xa042.',\n",
+ " 'text': '^ Maisels 1999 , p.\\xa042.',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/340',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Rau 1876 , p.\\xa0133.',\n",
+ " 'text': '^ Rau 1876 , p.\\xa0133.',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/341',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Higman 2012 , p.\\xa023.',\n",
+ " 'text': '^ Higman 2012 , p.\\xa023.',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/342',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Hume 2012 , p.\\xa053.',\n",
+ " 'text': '^ Hume 2012 , p.\\xa053.',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/343',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Hume 2012 , p.\\xa052.',\n",
+ " 'text': '^ Hume 2012 , p.\\xa052.',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/344',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Fieldhouse 2002 , p.\\xa0167.',\n",
+ " 'text': '^ Fieldhouse 2002 , p.\\xa0167.',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/345',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Livingston, A. D. (1998-01-01). Guide to Edible Plants and Animals . Wordsworth Editions, Limited. ISBN 9781853263774 .',\n",
+ " 'text': '^ Livingston, A. D. (1998-01-01). Guide to Edible Plants and Animals . Wordsworth Editions, Limited. ISBN 9781853263774 .',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/346',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ \"Study plan for waterfowl injury assessment: Determining PCB concentrations in Hudson river resident waterfowl\" (PDF) . New York State Department of Environmental Conservation . US Department of Commerce. December 2008. p.\\xa03. Archived (PDF) from the original on 2022-10-09 . Retrieved 2 July 2019 .',\n",
+ " 'text': '^ \"Study plan for waterfowl injury assessment: Determining PCB concentrations in Hudson river resident waterfowl\" (PDF) . New York State Department of Environmental Conservation . US Department of Commerce. December 2008. p.\\xa03. Archived (PDF) from the original on 2022-10-09 . Retrieved 2 July 2019 .',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/347',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ \"FAOSTAT\" . www.fao.org . Retrieved 2019-10-25 .',\n",
+ " 'text': '^ \"FAOSTAT\" . www.fao.org . Retrieved 2019-10-25 .',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/348',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ \"Anas platyrhynchos, Domestic Duck; DigiMorph Staff - The University of Texas at Austin\" . Digimorph.org . Retrieved 2012-12-23 .',\n",
+ " 'text': '^ \"Anas platyrhynchos, Domestic Duck; DigiMorph Staff - The University of Texas at Austin\" . Digimorph.org . Retrieved 2012-12-23 .',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/349',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Sy Montgomery. \"Mallard; Encyclopædia Britannica\" . Britannica.com . Retrieved 2012-12-23 .',\n",
+ " 'text': '^ Sy Montgomery. \"Mallard; Encyclopædia Britannica\" . Britannica.com . Retrieved 2012-12-23 .',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/350',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Glenday, Craig (2014). Guinness World Records . Guinness World Records Limited. pp. 135 . ISBN 978-1-908843-15-9 .',\n",
+ " 'text': '^ Glenday, Craig (2014). Guinness World Records . Guinness World Records Limited. pp. 135 . ISBN 978-1-908843-15-9 .',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/351',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Suomen kunnallisvaakunat (in Finnish). Suomen Kunnallisliitto. 1982. p.\\xa0147. ISBN 951-773-085-3 .',\n",
+ " 'text': '^ Suomen kunnallisvaakunat (in Finnish). Suomen Kunnallisliitto. 1982. p.\\xa0147. ISBN 951-773-085-3 .',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/352',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ \"Lubānas simbolika\" (in Latvian) . Retrieved September 9, 2021 .',\n",
+ " 'text': '^ \"Lubānas simbolika\" (in Latvian) . Retrieved September 9, 2021 .',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/353',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ \"Föglö\" (in Swedish) . Retrieved September 9, 2021 .',\n",
+ " 'text': '^ \"Föglö\" (in Swedish) . Retrieved September 9, 2021 .',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/354',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Young, Emma. \"World\\'s funniest joke revealed\" . New Scientist . Retrieved 7 January 2019 .',\n",
+ " 'text': '^ Young, Emma. \"World\\'s funniest joke revealed\" . New Scientist . Retrieved 7 January 2019 .',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/355',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ \"Howard the Duck (character)\" . Grand Comics Database .',\n",
+ " 'text': '^ \"Howard the Duck (character)\" . Grand Comics Database .',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/356',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ Sanderson, Peter ; Gilbert, Laura (2008). \"1970s\". Marvel Chronicle A Year by Year History . London, United Kingdom: Dorling Kindersley . p.\\xa0161. ISBN 978-0756641238 . December saw the debut of the cigar-smoking Howard the Duck. In this story by writer Steve Gerber and artist Val Mayerik, various beings from different realities had begun turning up in the Man-Thing\\'s Florida swamp, including this bad-tempered talking duck.',\n",
+ " 'text': '^ Sanderson, Peter ; Gilbert, Laura (2008). \"1970s\". Marvel Chronicle A Year by Year History . London, United Kingdom: Dorling Kindersley . p.\\xa0161. ISBN 978-0756641238 . December saw the debut of the cigar-smoking Howard the Duck. In this story by writer Steve Gerber and artist Val Mayerik, various beings from different realities had begun turning up in the Man-Thing\\'s Florida swamp, including this bad-tempered talking duck.',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/357',\n",
+ " 'parent': {'$ref': '#/groups/37'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '^ \"The Duck\" . University of Oregon Athletics . Retrieved 2022-01-20 .',\n",
+ " 'text': '^ \"The Duck\" . University of Oregon Athletics . Retrieved 2022-01-20 .',\n",
+ " 'enumerated': True,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/358',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'section_header',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Sources',\n",
+ " 'text': 'Sources',\n",
+ " 'level': 3},\n",
+ " {'self_ref': '#/texts/359',\n",
+ " 'parent': {'$ref': '#/groups/38'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': \"American Ornithologists' Union (1998). Checklist of North American Birds (PDF) . Washington, DC: American Ornithologists' Union. ISBN 978-1-891276-00-2 . Archived (PDF) from the original on 2022-10-09.\",\n",
+ " 'text': \"American Ornithologists' Union (1998). Checklist of North American Birds (PDF) . Washington, DC: American Ornithologists' Union. ISBN 978-1-891276-00-2 . Archived (PDF) from the original on 2022-10-09.\",\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/360',\n",
+ " 'parent': {'$ref': '#/groups/38'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Carboneras, Carlos (1992). del Hoyo, Josep; Elliott, Andrew; Sargatal, Jordi (eds.). Handbook of the Birds of the World . Vol.\\xa01: Ostrich to Ducks. Barcelona: Lynx Edicions. ISBN 978-84-87334-10-8 .',\n",
+ " 'text': 'Carboneras, Carlos (1992). del Hoyo, Josep; Elliott, Andrew; Sargatal, Jordi (eds.). Handbook of the Birds of the World . Vol.\\xa01: Ostrich to Ducks. Barcelona: Lynx Edicions. ISBN 978-84-87334-10-8 .',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/361',\n",
+ " 'parent': {'$ref': '#/groups/38'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Christidis, Les; Boles, Walter E., eds. (2008). Systematics and Taxonomy of Australian Birds . Collingwood, VIC: Csiro Publishing. ISBN 978-0-643-06511-6 .',\n",
+ " 'text': 'Christidis, Les; Boles, Walter E., eds. (2008). Systematics and Taxonomy of Australian Birds . Collingwood, VIC: Csiro Publishing. ISBN 978-0-643-06511-6 .',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/362',\n",
+ " 'parent': {'$ref': '#/groups/38'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Donne-Goussé, Carole; Laudet, Vincent; Hänni, Catherine (July 2002). \"A molecular phylogeny of Anseriformes based on mitochondrial DNA analysis\". Molecular Phylogenetics and Evolution . 23 (3): 339–356. Bibcode : 2002MolPE..23..339D . doi : 10.1016/S1055-7903(02)00019-2 . PMID 12099792 .',\n",
+ " 'text': 'Donne-Goussé, Carole; Laudet, Vincent; Hänni, Catherine (July 2002). \"A molecular phylogeny of Anseriformes based on mitochondrial DNA analysis\". Molecular Phylogenetics and Evolution . 23 (3): 339–356. Bibcode : 2002MolPE..23..339D . doi : 10.1016/S1055-7903(02)00019-2 . PMID 12099792 .',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/363',\n",
+ " 'parent': {'$ref': '#/groups/38'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Elphick, Chris; Dunning, John B. Jr.; Sibley, David, eds. (2001). The Sibley Guide to Bird Life and Behaviour . London: Christopher Helm. ISBN 978-0-7136-6250-4 .',\n",
+ " 'text': 'Elphick, Chris; Dunning, John B. Jr.; Sibley, David, eds. (2001). The Sibley Guide to Bird Life and Behaviour . London: Christopher Helm. ISBN 978-0-7136-6250-4 .',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/364',\n",
+ " 'parent': {'$ref': '#/groups/38'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Erlandson, Jon M. (1994). Early Hunter-Gatherers of the California Coast . New York, NY: Springer Science & Business Media. ISBN 978-1-4419-3231-0 .',\n",
+ " 'text': 'Erlandson, Jon M. (1994). Early Hunter-Gatherers of the California Coast . New York, NY: Springer Science & Business Media. ISBN 978-1-4419-3231-0 .',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/365',\n",
+ " 'parent': {'$ref': '#/groups/38'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Fieldhouse, Paul (2002). Food, Feasts, and Faith: An Encyclopedia of Food Culture in World Religions . Vol.\\xa0I: A–K. Santa Barbara: ABC-CLIO. ISBN 978-1-61069-412-4 .',\n",
+ " 'text': 'Fieldhouse, Paul (2002). Food, Feasts, and Faith: An Encyclopedia of Food Culture in World Religions . Vol.\\xa0I: A–K. Santa Barbara: ABC-CLIO. ISBN 978-1-61069-412-4 .',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/366',\n",
+ " 'parent': {'$ref': '#/groups/38'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Fitter, Julian; Fitter, Daniel; Hosking, David (2000). Wildlife of the Galápagos . Princeton, NJ: Princeton University Press. ISBN 978-0-691-10295-5 .',\n",
+ " 'text': 'Fitter, Julian; Fitter, Daniel; Hosking, David (2000). Wildlife of the Galápagos . Princeton, NJ: Princeton University Press. ISBN 978-0-691-10295-5 .',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/367',\n",
+ " 'parent': {'$ref': '#/groups/38'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Higman, B. W. (2012). How Food Made History . Chichester, UK: John Wiley & Sons. ISBN 978-1-4051-8947-7 .',\n",
+ " 'text': 'Higman, B. W. (2012). How Food Made History . Chichester, UK: John Wiley & Sons. ISBN 978-1-4051-8947-7 .',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/368',\n",
+ " 'parent': {'$ref': '#/groups/38'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Hume, Julian H. (2012). Extinct Birds . London: Christopher Helm. ISBN 978-1-4729-3744-5 .',\n",
+ " 'text': 'Hume, Julian H. (2012). Extinct Birds . London: Christopher Helm. ISBN 978-1-4729-3744-5 .',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/369',\n",
+ " 'parent': {'$ref': '#/groups/38'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Jeffries, Richard (2008). Holocene Hunter-Gatherers of the Lower Ohio River Valley . Tuscaloosa: University of Alabama Press. ISBN 978-0-8173-1658-7 .',\n",
+ " 'text': 'Jeffries, Richard (2008). Holocene Hunter-Gatherers of the Lower Ohio River Valley . Tuscaloosa: University of Alabama Press. ISBN 978-0-8173-1658-7 .',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/370',\n",
+ " 'parent': {'$ref': '#/groups/38'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Kear, Janet, ed. (2005). Ducks, Geese and Swans: Species Accounts ( Cairina to Mergus ) . Bird Families of the World. Oxford: Oxford University Press. ISBN 978-0-19-861009-0 .',\n",
+ " 'text': 'Kear, Janet, ed. (2005). Ducks, Geese and Swans: Species Accounts ( Cairina to Mergus ) . Bird Families of the World. Oxford: Oxford University Press. ISBN 978-0-19-861009-0 .',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/371',\n",
+ " 'parent': {'$ref': '#/groups/38'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Livezey, Bradley C. (October 1986). \"A phylogenetic analysis of recent Anseriform genera using morphological characters\" (PDF) . The Auk . 103 (4): 737–754. doi : 10.1093/auk/103.4.737 . Archived (PDF) from the original on 2022-10-09.',\n",
+ " 'text': 'Livezey, Bradley C. (October 1986). \"A phylogenetic analysis of recent Anseriform genera using morphological characters\" (PDF) . The Auk . 103 (4): 737–754. doi : 10.1093/auk/103.4.737 . Archived (PDF) from the original on 2022-10-09.',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/372',\n",
+ " 'parent': {'$ref': '#/groups/38'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Madsen, Cort S.; McHugh, Kevin P.; de Kloet, Siwo R. (July 1988). \"A partial classification of waterfowl (Anatidae) based on single-copy DNA\" (PDF) . The Auk . 105 (3): 452–459. doi : 10.1093/auk/105.3.452 . Archived (PDF) from the original on 2022-10-09.',\n",
+ " 'text': 'Madsen, Cort S.; McHugh, Kevin P.; de Kloet, Siwo R. (July 1988). \"A partial classification of waterfowl (Anatidae) based on single-copy DNA\" (PDF) . The Auk . 105 (3): 452–459. doi : 10.1093/auk/105.3.452 . Archived (PDF) from the original on 2022-10-09.',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/373',\n",
+ " 'parent': {'$ref': '#/groups/38'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Maisels, Charles Keith (1999). Early Civilizations of the Old World . London: Routledge. ISBN 978-0-415-10975-8 .',\n",
+ " 'text': 'Maisels, Charles Keith (1999). Early Civilizations of the Old World . London: Routledge. ISBN 978-0-415-10975-8 .',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/374',\n",
+ " 'parent': {'$ref': '#/groups/38'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Pratt, H. Douglas; Bruner, Phillip L.; Berrett, Delwyn G. (1987). A Field Guide to the Birds of Hawaii and the Tropical Pacific . Princeton, NJ: Princeton University Press. ISBN 0-691-02399-9 .',\n",
+ " 'text': 'Pratt, H. Douglas; Bruner, Phillip L.; Berrett, Delwyn G. (1987). A Field Guide to the Birds of Hawaii and the Tropical Pacific . Princeton, NJ: Princeton University Press. ISBN 0-691-02399-9 .',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/375',\n",
+ " 'parent': {'$ref': '#/groups/38'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Rau, Charles (1876). Early Man in Europe . New York: Harper & Brothers. LCCN 05040168 .',\n",
+ " 'text': 'Rau, Charles (1876). Early Man in Europe . New York: Harper & Brothers. LCCN 05040168 .',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/376',\n",
+ " 'parent': {'$ref': '#/groups/38'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Shirihai, Hadoram (2008). A Complete Guide to Antarctic Wildlife . Princeton, NJ, US: Princeton University Press. ISBN 978-0-691-13666-0 .',\n",
+ " 'text': 'Shirihai, Hadoram (2008). A Complete Guide to Antarctic Wildlife . Princeton, NJ, US: Princeton University Press. ISBN 978-0-691-13666-0 .',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/377',\n",
+ " 'parent': {'$ref': '#/groups/38'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Sued-Badillo, Jalil (2003). Autochthonous Societies . General History of the Caribbean. Paris: UNESCO. ISBN 978-92-3-103832-7 .',\n",
+ " 'text': 'Sued-Badillo, Jalil (2003). Autochthonous Societies . General History of the Caribbean. Paris: UNESCO. ISBN 978-92-3-103832-7 .',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/378',\n",
+ " 'parent': {'$ref': '#/groups/38'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Thorpe, I. J. (1996). The Origins of Agriculture in Europe . New York: Routledge. ISBN 978-0-415-08009-5 .',\n",
+ " 'text': 'Thorpe, I. J. (1996). The Origins of Agriculture in Europe . New York: Routledge. ISBN 978-0-415-08009-5 .',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/379',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'section_header',\n",
+ " 'prov': [],\n",
+ " 'orig': 'External links',\n",
+ " 'text': 'External links',\n",
+ " 'level': 2},\n",
+ " {'self_ref': '#/texts/380',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': \"Duck at Wikipedia's sister projects\",\n",
+ " 'text': \"Duck at Wikipedia's sister projects\"},\n",
+ " {'self_ref': '#/texts/381',\n",
+ " 'parent': {'$ref': '#/groups/39'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Definitions from Wiktionary',\n",
+ " 'text': 'Definitions from Wiktionary',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/382',\n",
+ " 'parent': {'$ref': '#/groups/39'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Media from Commons',\n",
+ " 'text': 'Media from Commons',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/383',\n",
+ " 'parent': {'$ref': '#/groups/39'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Quotations from Wikiquote',\n",
+ " 'text': 'Quotations from Wikiquote',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/384',\n",
+ " 'parent': {'$ref': '#/groups/39'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Recipes from Wikibooks',\n",
+ " 'text': 'Recipes from Wikibooks',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/385',\n",
+ " 'parent': {'$ref': '#/groups/39'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Taxa from Wikispecies',\n",
+ " 'text': 'Taxa from Wikispecies',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/386',\n",
+ " 'parent': {'$ref': '#/groups/39'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Data from Wikidata',\n",
+ " 'text': 'Data from Wikidata',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/387',\n",
+ " 'parent': {'$ref': '#/groups/40'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'list of books (useful looking abstracts)',\n",
+ " 'text': 'list of books (useful looking abstracts)',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/388',\n",
+ " 'parent': {'$ref': '#/groups/40'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Ducks on postage stamps Archived 2013-05-13 at the Wayback Machine',\n",
+ " 'text': 'Ducks on postage stamps Archived 2013-05-13 at the Wayback Machine',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/389',\n",
+ " 'parent': {'$ref': '#/groups/40'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '',\n",
+ " 'text': '',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/390',\n",
+ " 'parent': {'$ref': '#/groups/40'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Ducks at a Distance, by Rob Hines at Project Gutenberg - A modern illustrated guide to identification of US waterfowl',\n",
+ " 'text': 'Ducks at a Distance, by Rob Hines at Project Gutenberg - A modern illustrated guide to identification of US waterfowl',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/391',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'NewPP limit report',\n",
+ " 'text': 'NewPP limit report'},\n",
+ " {'self_ref': '#/texts/392',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Parsed by mw‐web.codfw.main‐5d5b97b956‐mw5gf',\n",
+ " 'text': 'Parsed by mw‐web.codfw.main‐5d5b97b956‐mw5gf'},\n",
+ " {'self_ref': '#/texts/393',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Cached time: 20241001035144',\n",
+ " 'text': 'Cached time: 20241001035144'},\n",
+ " {'self_ref': '#/texts/394',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Cache expiry: 2592000',\n",
+ " 'text': 'Cache expiry: 2592000'},\n",
+ " {'self_ref': '#/texts/395',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Reduced expiry: false',\n",
+ " 'text': 'Reduced expiry: false'},\n",
+ " {'self_ref': '#/texts/396',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Complications: [vary‐revision‐sha1, show‐toc]',\n",
+ " 'text': 'Complications: [vary‐revision‐sha1, show‐toc]'},\n",
+ " {'self_ref': '#/texts/397',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'CPU time usage: 1.191 seconds',\n",
+ " 'text': 'CPU time usage: 1.191 seconds'},\n",
+ " {'self_ref': '#/texts/398',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Real time usage: 1.452 seconds',\n",
+ " 'text': 'Real time usage: 1.452 seconds'},\n",
+ " {'self_ref': '#/texts/399',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Preprocessor visited node count: 12444/1000000',\n",
+ " 'text': 'Preprocessor visited node count: 12444/1000000'},\n",
+ " {'self_ref': '#/texts/400',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Post‐expand include size: 122530/2097152 bytes',\n",
+ " 'text': 'Post‐expand include size: 122530/2097152 bytes'},\n",
+ " {'self_ref': '#/texts/401',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Template argument size: 9168/2097152 bytes',\n",
+ " 'text': 'Template argument size: 9168/2097152 bytes'},\n",
+ " {'self_ref': '#/texts/402',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Highest expansion depth: 14/100',\n",
+ " 'text': 'Highest expansion depth: 14/100'},\n",
+ " {'self_ref': '#/texts/403',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Expensive parser function count: 14/500',\n",
+ " 'text': 'Expensive parser function count: 14/500'},\n",
+ " {'self_ref': '#/texts/404',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Unstrip recursion depth: 1/20',\n",
+ " 'text': 'Unstrip recursion depth: 1/20'},\n",
+ " {'self_ref': '#/texts/405',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Unstrip post‐expand size: 165576/5000000 bytes',\n",
+ " 'text': 'Unstrip post‐expand size: 165576/5000000 bytes'},\n",
+ " {'self_ref': '#/texts/406',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Lua time usage: 0.865/10.000 seconds',\n",
+ " 'text': 'Lua time usage: 0.865/10.000 seconds'},\n",
+ " {'self_ref': '#/texts/407',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Lua memory usage: 25093454/52428800 bytes',\n",
+ " 'text': 'Lua memory usage: 25093454/52428800 bytes'},\n",
+ " {'self_ref': '#/texts/408',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Number of Wikibase entities loaded: 1/400',\n",
+ " 'text': 'Number of Wikibase entities loaded: 1/400'},\n",
+ " {'self_ref': '#/texts/409',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Transclusion expansion time report (%,ms,calls,template)',\n",
+ " 'text': 'Transclusion expansion time report (%,ms,calls,template)'},\n",
+ " {'self_ref': '#/texts/410',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': '100.00% 1311.889 1 -total',\n",
+ " 'text': '100.00% 1311.889 1 -total'},\n",
+ " {'self_ref': '#/texts/411',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': '22.17% 290.786 1 Template:Automatic_taxobox',\n",
+ " 'text': '22.17% 290.786 1 Template:Automatic_taxobox'},\n",
+ " {'self_ref': '#/texts/412',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': '16.91% 221.802 1 Template:Reflist',\n",
+ " 'text': '16.91% 221.802 1 Template:Reflist'},\n",
+ " {'self_ref': '#/texts/413',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': '10.86% 142.472 44 Template:Sfn',\n",
+ " 'text': '10.86% 142.472 44 Template:Sfn'},\n",
+ " {'self_ref': '#/texts/414',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': '9.48% 124.344 15 Template:Lang',\n",
+ " 'text': '9.48% 124.344 15 Template:Lang'},\n",
+ " {'self_ref': '#/texts/415',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': '9.25% 121.312 15 Template:Cite_web',\n",
+ " 'text': '9.25% 121.312 15 Template:Cite_web'},\n",
+ " {'self_ref': '#/texts/416',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': '8.03% 105.346 26 Template:Cite_book',\n",
+ " 'text': '8.03% 105.346 26 Template:Cite_book'},\n",
+ " {'self_ref': '#/texts/417',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': '5.85% 76.725 1 Template:Short_description',\n",
+ " 'text': '5.85% 76.725 1 Template:Short_description'},\n",
+ " {'self_ref': '#/texts/418',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': '5.74% 75.262 1 Template:Authority_control',\n",
+ " 'text': '5.74% 75.262 1 Template:Authority_control'},\n",
+ " {'self_ref': '#/texts/419',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': '4.50% 58.973 1 Template:Sisterlinks',\n",
+ " 'text': '4.50% 58.973 1 Template:Sisterlinks'},\n",
+ " {'self_ref': '#/texts/420',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Saved in parser cache with key enwiki:pcache:idhash:37674-0!canonical and timestamp 20241001035144 and revision id 1246843351. Rendering was triggered because: page-view',\n",
+ " 'text': 'Saved in parser cache with key enwiki:pcache:idhash:37674-0!canonical and timestamp 20241001035144 and revision id 1246843351. Rendering was triggered because: page-view'},\n",
+ " {'self_ref': '#/texts/421',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'esi ',\n",
+ " 'text': 'esi '},\n",
+ " {'self_ref': '#/texts/422',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Retrieved from \"https://en.wikipedia.org/w/index.php?title=Duck&oldid=1246843351\"',\n",
+ " 'text': 'Retrieved from \"https://en.wikipedia.org/w/index.php?title=Duck&oldid=1246843351\"'},\n",
+ " {'self_ref': '#/texts/423',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Categories:',\n",
+ " 'text': 'Categories:'},\n",
+ " {'self_ref': '#/texts/424',\n",
+ " 'parent': {'$ref': '#/groups/41'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Ducks',\n",
+ " 'text': 'Ducks',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/425',\n",
+ " 'parent': {'$ref': '#/groups/41'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Game birds',\n",
+ " 'text': 'Game birds',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/426',\n",
+ " 'parent': {'$ref': '#/groups/41'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Bird common names',\n",
+ " 'text': 'Bird common names',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/427',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'text',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Hidden categories:',\n",
+ " 'text': 'Hidden categories:'},\n",
+ " {'self_ref': '#/texts/428',\n",
+ " 'parent': {'$ref': '#/groups/42'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'All accuracy disputes',\n",
+ " 'text': 'All accuracy disputes',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/429',\n",
+ " 'parent': {'$ref': '#/groups/42'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Accuracy disputes from February 2020',\n",
+ " 'text': 'Accuracy disputes from February 2020',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/430',\n",
+ " 'parent': {'$ref': '#/groups/42'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'CS1 Finnish-language sources (fi)',\n",
+ " 'text': 'CS1 Finnish-language sources (fi)',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/431',\n",
+ " 'parent': {'$ref': '#/groups/42'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'CS1 Latvian-language sources (lv)',\n",
+ " 'text': 'CS1 Latvian-language sources (lv)',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/432',\n",
+ " 'parent': {'$ref': '#/groups/42'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'CS1 Swedish-language sources (sv)',\n",
+ " 'text': 'CS1 Swedish-language sources (sv)',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/433',\n",
+ " 'parent': {'$ref': '#/groups/42'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Articles with short description',\n",
+ " 'text': 'Articles with short description',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/434',\n",
+ " 'parent': {'$ref': '#/groups/42'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Short description is different from Wikidata',\n",
+ " 'text': 'Short description is different from Wikidata',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/435',\n",
+ " 'parent': {'$ref': '#/groups/42'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Wikipedia indefinitely move-protected pages',\n",
+ " 'text': 'Wikipedia indefinitely move-protected pages',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/436',\n",
+ " 'parent': {'$ref': '#/groups/42'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Wikipedia indefinitely semi-protected pages',\n",
+ " 'text': 'Wikipedia indefinitely semi-protected pages',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/437',\n",
+ " 'parent': {'$ref': '#/groups/42'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': \"Articles with 'species' microformats\",\n",
+ " 'text': \"Articles with 'species' microformats\",\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/438',\n",
+ " 'parent': {'$ref': '#/groups/42'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Articles containing Old English (ca. 450-1100)-language text',\n",
+ " 'text': 'Articles containing Old English (ca. 450-1100)-language text',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/439',\n",
+ " 'parent': {'$ref': '#/groups/42'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Articles containing Dutch-language text',\n",
+ " 'text': 'Articles containing Dutch-language text',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/440',\n",
+ " 'parent': {'$ref': '#/groups/42'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Articles containing German-language text',\n",
+ " 'text': 'Articles containing German-language text',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/441',\n",
+ " 'parent': {'$ref': '#/groups/42'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Articles containing Norwegian-language text',\n",
+ " 'text': 'Articles containing Norwegian-language text',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/442',\n",
+ " 'parent': {'$ref': '#/groups/42'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Articles containing Lithuanian-language text',\n",
+ " 'text': 'Articles containing Lithuanian-language text',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/443',\n",
+ " 'parent': {'$ref': '#/groups/42'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Articles containing Ancient Greek (to 1453)-language text',\n",
+ " 'text': 'Articles containing Ancient Greek (to 1453)-language text',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/444',\n",
+ " 'parent': {'$ref': '#/groups/42'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'All articles with self-published sources',\n",
+ " 'text': 'All articles with self-published sources',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/445',\n",
+ " 'parent': {'$ref': '#/groups/42'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Articles with self-published sources from February 2020',\n",
+ " 'text': 'Articles with self-published sources from February 2020',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/446',\n",
+ " 'parent': {'$ref': '#/groups/42'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'All articles with unsourced statements',\n",
+ " 'text': 'All articles with unsourced statements',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/447',\n",
+ " 'parent': {'$ref': '#/groups/42'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Articles with unsourced statements from January 2022',\n",
+ " 'text': 'Articles with unsourced statements from January 2022',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/448',\n",
+ " 'parent': {'$ref': '#/groups/42'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'CS1: long volume value',\n",
+ " 'text': 'CS1: long volume value',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/449',\n",
+ " 'parent': {'$ref': '#/groups/42'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Pages using Sister project links with wikidata mismatch',\n",
+ " 'text': 'Pages using Sister project links with wikidata mismatch',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/450',\n",
+ " 'parent': {'$ref': '#/groups/42'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Pages using Sister project links with hidden wikidata',\n",
+ " 'text': 'Pages using Sister project links with hidden wikidata',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/451',\n",
+ " 'parent': {'$ref': '#/groups/42'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Webarchive template wayback links',\n",
+ " 'text': 'Webarchive template wayback links',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/452',\n",
+ " 'parent': {'$ref': '#/groups/42'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Articles with Project Gutenberg links',\n",
+ " 'text': 'Articles with Project Gutenberg links',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/453',\n",
+ " 'parent': {'$ref': '#/groups/42'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Articles containing video clips',\n",
+ " 'text': 'Articles containing video clips',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/454',\n",
+ " 'parent': {'$ref': '#/groups/43'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'This page was last edited on 21 September 2024, at 12:11 (UTC) .',\n",
+ " 'text': 'This page was last edited on 21 September 2024, at 12:11 (UTC) .',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/455',\n",
+ " 'parent': {'$ref': '#/groups/43'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Text is available under the Creative Commons Attribution-ShareAlike License 4.0 ;\\nadditional terms may apply. By using this site, you agree to the Terms of Use and Privacy Policy . Wikipedia® is a registered trademark of the Wikimedia Foundation, Inc. , a non-profit organization.',\n",
+ " 'text': 'Text is available under the Creative Commons Attribution-ShareAlike License 4.0 ;\\nadditional terms may apply. By using this site, you agree to the Terms of Use and Privacy Policy . Wikipedia® is a registered trademark of the Wikimedia Foundation, Inc. , a non-profit organization.',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/456',\n",
+ " 'parent': {'$ref': '#/groups/44'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Privacy policy',\n",
+ " 'text': 'Privacy policy',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/457',\n",
+ " 'parent': {'$ref': '#/groups/44'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'About Wikipedia',\n",
+ " 'text': 'About Wikipedia',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/458',\n",
+ " 'parent': {'$ref': '#/groups/44'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Disclaimers',\n",
+ " 'text': 'Disclaimers',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/459',\n",
+ " 'parent': {'$ref': '#/groups/44'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Contact Wikipedia',\n",
+ " 'text': 'Contact Wikipedia',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/460',\n",
+ " 'parent': {'$ref': '#/groups/44'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Code of Conduct',\n",
+ " 'text': 'Code of Conduct',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/461',\n",
+ " 'parent': {'$ref': '#/groups/44'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Developers',\n",
+ " 'text': 'Developers',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/462',\n",
+ " 'parent': {'$ref': '#/groups/44'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Statistics',\n",
+ " 'text': 'Statistics',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/463',\n",
+ " 'parent': {'$ref': '#/groups/44'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Cookie statement',\n",
+ " 'text': 'Cookie statement',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/464',\n",
+ " 'parent': {'$ref': '#/groups/44'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Mobile view',\n",
+ " 'text': 'Mobile view',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/465',\n",
+ " 'parent': {'$ref': '#/groups/45'},\n",
+ " 'children': [{'$ref': '#/texts/466'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '',\n",
+ " 'text': '',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/466',\n",
+ " 'parent': {'$ref': '#/texts/465'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'caption',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Wikimedia Foundation',\n",
+ " 'text': 'Wikimedia Foundation'},\n",
+ " {'self_ref': '#/texts/467',\n",
+ " 'parent': {'$ref': '#/groups/45'},\n",
+ " 'children': [{'$ref': '#/texts/468'}],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'list_item',\n",
+ " 'prov': [],\n",
+ " 'orig': '',\n",
+ " 'text': '',\n",
+ " 'enumerated': False,\n",
+ " 'marker': '-'},\n",
+ " {'self_ref': '#/texts/468',\n",
+ " 'parent': {'$ref': '#/texts/467'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'caption',\n",
+ " 'prov': [],\n",
+ " 'orig': 'Powered by MediaWiki',\n",
+ " 'text': 'Powered by MediaWiki'}],\n",
+ " 'pictures': [],\n",
+ " 'tables': [{'self_ref': '#/tables/0',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'table',\n",
+ " 'prov': [],\n",
+ " 'captions': [],\n",
+ " 'references': [],\n",
+ " 'footnotes': [],\n",
+ " 'data': {'table_cells': [{'row_span': 1,\n",
+ " 'col_span': 2,\n",
+ " 'start_row_offset_idx': 0,\n",
+ " 'end_row_offset_idx': 1,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'Duck',\n",
+ " 'column_header': True,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 2,\n",
+ " 'start_row_offset_idx': 1,\n",
+ " 'end_row_offset_idx': 2,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': '',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 2,\n",
+ " 'start_row_offset_idx': 2,\n",
+ " 'end_row_offset_idx': 3,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'Bufflehead(Bucephala albeola)',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 2,\n",
+ " 'start_row_offset_idx': 3,\n",
+ " 'end_row_offset_idx': 4,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'Scientific classification',\n",
+ " 'column_header': True,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 4,\n",
+ " 'end_row_offset_idx': 5,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 1,\n",
+ " 'text': 'Domain:',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 4,\n",
+ " 'end_row_offset_idx': 5,\n",
+ " 'start_col_offset_idx': 1,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'Eukaryota',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 5,\n",
+ " 'end_row_offset_idx': 6,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 1,\n",
+ " 'text': 'Kingdom:',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 5,\n",
+ " 'end_row_offset_idx': 6,\n",
+ " 'start_col_offset_idx': 1,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'Animalia',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 6,\n",
+ " 'end_row_offset_idx': 7,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 1,\n",
+ " 'text': 'Phylum:',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 6,\n",
+ " 'end_row_offset_idx': 7,\n",
+ " 'start_col_offset_idx': 1,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'Chordata',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 7,\n",
+ " 'end_row_offset_idx': 8,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 1,\n",
+ " 'text': 'Class:',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 7,\n",
+ " 'end_row_offset_idx': 8,\n",
+ " 'start_col_offset_idx': 1,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'Aves',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 8,\n",
+ " 'end_row_offset_idx': 9,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 1,\n",
+ " 'text': 'Order:',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 8,\n",
+ " 'end_row_offset_idx': 9,\n",
+ " 'start_col_offset_idx': 1,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'Anseriformes',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 9,\n",
+ " 'end_row_offset_idx': 10,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 1,\n",
+ " 'text': 'Superfamily:',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 9,\n",
+ " 'end_row_offset_idx': 10,\n",
+ " 'start_col_offset_idx': 1,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'Anatoidea',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 10,\n",
+ " 'end_row_offset_idx': 11,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 1,\n",
+ " 'text': 'Family:',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 10,\n",
+ " 'end_row_offset_idx': 11,\n",
+ " 'start_col_offset_idx': 1,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'Anatidae',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 2,\n",
+ " 'start_row_offset_idx': 11,\n",
+ " 'end_row_offset_idx': 12,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'Subfamilies',\n",
+ " 'column_header': True,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 2,\n",
+ " 'start_row_offset_idx': 12,\n",
+ " 'end_row_offset_idx': 13,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'See text',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False}],\n",
+ " 'num_rows': 13,\n",
+ " 'num_cols': 2,\n",
+ " 'grid': [[{'row_span': 1,\n",
+ " 'col_span': 2,\n",
+ " 'start_row_offset_idx': 0,\n",
+ " 'end_row_offset_idx': 1,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'Duck',\n",
+ " 'column_header': True,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 2,\n",
+ " 'start_row_offset_idx': 0,\n",
+ " 'end_row_offset_idx': 1,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'Duck',\n",
+ " 'column_header': True,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False}],\n",
+ " [{'row_span': 1,\n",
+ " 'col_span': 2,\n",
+ " 'start_row_offset_idx': 1,\n",
+ " 'end_row_offset_idx': 2,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': '',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 2,\n",
+ " 'start_row_offset_idx': 1,\n",
+ " 'end_row_offset_idx': 2,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': '',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False}],\n",
+ " [{'row_span': 1,\n",
+ " 'col_span': 2,\n",
+ " 'start_row_offset_idx': 2,\n",
+ " 'end_row_offset_idx': 3,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'Bufflehead(Bucephala albeola)',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 2,\n",
+ " 'start_row_offset_idx': 2,\n",
+ " 'end_row_offset_idx': 3,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'Bufflehead(Bucephala albeola)',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False}],\n",
+ " [{'row_span': 1,\n",
+ " 'col_span': 2,\n",
+ " 'start_row_offset_idx': 3,\n",
+ " 'end_row_offset_idx': 4,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'Scientific classification',\n",
+ " 'column_header': True,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 2,\n",
+ " 'start_row_offset_idx': 3,\n",
+ " 'end_row_offset_idx': 4,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'Scientific classification',\n",
+ " 'column_header': True,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False}],\n",
+ " [{'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 4,\n",
+ " 'end_row_offset_idx': 5,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 1,\n",
+ " 'text': 'Domain:',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 4,\n",
+ " 'end_row_offset_idx': 5,\n",
+ " 'start_col_offset_idx': 1,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'Eukaryota',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False}],\n",
+ " [{'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 5,\n",
+ " 'end_row_offset_idx': 6,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 1,\n",
+ " 'text': 'Kingdom:',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 5,\n",
+ " 'end_row_offset_idx': 6,\n",
+ " 'start_col_offset_idx': 1,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'Animalia',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False}],\n",
+ " [{'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 6,\n",
+ " 'end_row_offset_idx': 7,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 1,\n",
+ " 'text': 'Phylum:',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 6,\n",
+ " 'end_row_offset_idx': 7,\n",
+ " 'start_col_offset_idx': 1,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'Chordata',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False}],\n",
+ " [{'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 7,\n",
+ " 'end_row_offset_idx': 8,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 1,\n",
+ " 'text': 'Class:',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 7,\n",
+ " 'end_row_offset_idx': 8,\n",
+ " 'start_col_offset_idx': 1,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'Aves',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False}],\n",
+ " [{'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 8,\n",
+ " 'end_row_offset_idx': 9,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 1,\n",
+ " 'text': 'Order:',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 8,\n",
+ " 'end_row_offset_idx': 9,\n",
+ " 'start_col_offset_idx': 1,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'Anseriformes',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False}],\n",
+ " [{'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 9,\n",
+ " 'end_row_offset_idx': 10,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 1,\n",
+ " 'text': 'Superfamily:',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 9,\n",
+ " 'end_row_offset_idx': 10,\n",
+ " 'start_col_offset_idx': 1,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'Anatoidea',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False}],\n",
+ " [{'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 10,\n",
+ " 'end_row_offset_idx': 11,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 1,\n",
+ " 'text': 'Family:',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 10,\n",
+ " 'end_row_offset_idx': 11,\n",
+ " 'start_col_offset_idx': 1,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'Anatidae',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False}],\n",
+ " [{'row_span': 1,\n",
+ " 'col_span': 2,\n",
+ " 'start_row_offset_idx': 11,\n",
+ " 'end_row_offset_idx': 12,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'Subfamilies',\n",
+ " 'column_header': True,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 2,\n",
+ " 'start_row_offset_idx': 11,\n",
+ " 'end_row_offset_idx': 12,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'Subfamilies',\n",
+ " 'column_header': True,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False}],\n",
+ " [{'row_span': 1,\n",
+ " 'col_span': 2,\n",
+ " 'start_row_offset_idx': 12,\n",
+ " 'end_row_offset_idx': 13,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'See text',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 2,\n",
+ " 'start_row_offset_idx': 12,\n",
+ " 'end_row_offset_idx': 13,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'See text',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False}]]}},\n",
+ " {'self_ref': '#/tables/1',\n",
+ " 'parent': {'$ref': '#/body'},\n",
+ " 'children': [],\n",
+ " 'content_layer': 'body',\n",
+ " 'label': 'table',\n",
+ " 'prov': [],\n",
+ " 'captions': [],\n",
+ " 'references': [],\n",
+ " 'footnotes': [],\n",
+ " 'data': {'table_cells': [{'row_span': 1,\n",
+ " 'col_span': 2,\n",
+ " 'start_row_offset_idx': 0,\n",
+ " 'end_row_offset_idx': 1,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'Authority control databases',\n",
+ " 'column_header': True,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 1,\n",
+ " 'end_row_offset_idx': 2,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 1,\n",
+ " 'text': 'National',\n",
+ " 'column_header': True,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 1,\n",
+ " 'end_row_offset_idx': 2,\n",
+ " 'start_col_offset_idx': 1,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'United StatesFranceBnF dataJapanLatviaIsrael',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 2,\n",
+ " 'end_row_offset_idx': 3,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 1,\n",
+ " 'text': 'Other',\n",
+ " 'column_header': True,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 2,\n",
+ " 'end_row_offset_idx': 3,\n",
+ " 'start_col_offset_idx': 1,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'IdRef',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False}],\n",
+ " 'num_rows': 3,\n",
+ " 'num_cols': 2,\n",
+ " 'grid': [[{'row_span': 1,\n",
+ " 'col_span': 2,\n",
+ " 'start_row_offset_idx': 0,\n",
+ " 'end_row_offset_idx': 1,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'Authority control databases',\n",
+ " 'column_header': True,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 2,\n",
+ " 'start_row_offset_idx': 0,\n",
+ " 'end_row_offset_idx': 1,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'Authority control databases',\n",
+ " 'column_header': True,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False}],\n",
+ " [{'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 1,\n",
+ " 'end_row_offset_idx': 2,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 1,\n",
+ " 'text': 'National',\n",
+ " 'column_header': True,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 1,\n",
+ " 'end_row_offset_idx': 2,\n",
+ " 'start_col_offset_idx': 1,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'United StatesFranceBnF dataJapanLatviaIsrael',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False}],\n",
+ " [{'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 2,\n",
+ " 'end_row_offset_idx': 3,\n",
+ " 'start_col_offset_idx': 0,\n",
+ " 'end_col_offset_idx': 1,\n",
+ " 'text': 'Other',\n",
+ " 'column_header': True,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False},\n",
+ " {'row_span': 1,\n",
+ " 'col_span': 1,\n",
+ " 'start_row_offset_idx': 2,\n",
+ " 'end_row_offset_idx': 3,\n",
+ " 'start_col_offset_idx': 1,\n",
+ " 'end_col_offset_idx': 2,\n",
+ " 'text': 'IdRef',\n",
+ " 'column_header': False,\n",
+ " 'row_header': False,\n",
+ " 'row_section': False}]]}}],\n",
+ " 'key_value_items': [],\n",
+ " 'form_items': [],\n",
+ " 'pages': {}}"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "execution_count": 10
}
],
"metadata": {