mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-01 15:02:21 +00:00
Merge from main
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
commit
d788bf2a6e
@ -1,9 +1,9 @@
|
|||||||
import logging
|
import logging
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional, Set, Union
|
from typing import Optional, Union, cast
|
||||||
|
|
||||||
from bs4 import BeautifulSoup, Tag
|
from bs4 import BeautifulSoup, NavigableString, PageElement, Tag
|
||||||
from docling_core.types.doc import (
|
from docling_core.types.doc import (
|
||||||
DocItemLabel,
|
DocItemLabel,
|
||||||
DoclingDocument,
|
DoclingDocument,
|
||||||
@ -12,6 +12,7 @@ from docling_core.types.doc import (
|
|||||||
TableCell,
|
TableCell,
|
||||||
TableData,
|
TableData,
|
||||||
)
|
)
|
||||||
|
from typing_extensions import override
|
||||||
|
|
||||||
from docling.backend.abstract_backend import DeclarativeDocumentBackend
|
from docling.backend.abstract_backend import DeclarativeDocumentBackend
|
||||||
from docling.datamodel.base_models import InputFormat
|
from docling.datamodel.base_models import InputFormat
|
||||||
@ -21,6 +22,7 @@ _log = logging.getLogger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
||||||
|
@override
|
||||||
def __init__(self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path]):
|
def __init__(self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path]):
|
||||||
super().__init__(in_doc, path_or_stream)
|
super().__init__(in_doc, path_or_stream)
|
||||||
_log.debug("About to init HTML backend...")
|
_log.debug("About to init HTML backend...")
|
||||||
@ -48,13 +50,16 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
f"Could not initialize HTML backend for file with hash {self.document_hash}."
|
f"Could not initialize HTML backend for file with hash {self.document_hash}."
|
||||||
) from e
|
) from e
|
||||||
|
|
||||||
|
@override
|
||||||
def is_valid(self) -> bool:
|
def is_valid(self) -> bool:
|
||||||
return self.soup is not None
|
return self.soup is not None
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@override
|
||||||
def supports_pagination(cls) -> bool:
|
def supports_pagination(cls) -> bool:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@override
|
||||||
def unload(self):
|
def unload(self):
|
||||||
if isinstance(self.path_or_stream, BytesIO):
|
if isinstance(self.path_or_stream, BytesIO):
|
||||||
self.path_or_stream.close()
|
self.path_or_stream.close()
|
||||||
@ -62,9 +67,11 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
self.path_or_stream = None
|
self.path_or_stream = None
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def supported_formats(cls) -> Set[InputFormat]:
|
@override
|
||||||
|
def supported_formats(cls) -> set[InputFormat]:
|
||||||
return {InputFormat.HTML}
|
return {InputFormat.HTML}
|
||||||
|
|
||||||
|
@override
|
||||||
def convert(self) -> DoclingDocument:
|
def convert(self) -> DoclingDocument:
|
||||||
# access self.path_or_stream to load stuff
|
# access self.path_or_stream to load stuff
|
||||||
origin = DocumentOrigin(
|
origin = DocumentOrigin(
|
||||||
@ -80,98 +87,78 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
assert self.soup is not None
|
assert self.soup is not None
|
||||||
content = self.soup.body or self.soup
|
content = self.soup.body or self.soup
|
||||||
# Replace <br> tags with newline characters
|
# Replace <br> tags with newline characters
|
||||||
for br in content.find_all("br"):
|
for br in content("br"):
|
||||||
br.replace_with("\n")
|
br.replace_with(NavigableString("\n"))
|
||||||
doc = self.walk(content, doc)
|
self.walk(content, doc)
|
||||||
else:
|
else:
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
f"Cannot convert doc with {self.document_hash} because the backend failed to init."
|
f"Cannot convert doc with {self.document_hash} because the backend failed to init."
|
||||||
)
|
)
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
def walk(self, element: Tag, doc: DoclingDocument):
|
def walk(self, tag: Tag, doc: DoclingDocument) -> None:
|
||||||
try:
|
# Iterate over elements in the body of the document
|
||||||
# Iterate over elements in the body of the document
|
for element in tag.children:
|
||||||
for idx, element in enumerate(element.children):
|
if isinstance(element, Tag):
|
||||||
try:
|
try:
|
||||||
self.analyse_element(element, idx, doc)
|
self.analyze_tag(cast(Tag, element), doc)
|
||||||
except Exception as exc_child:
|
except Exception as exc_child:
|
||||||
|
_log.error(
|
||||||
_log.error(" -> error treating child: ", exc_child)
|
f"Error processing child from tag{tag.name}: {exc_child}"
|
||||||
_log.error(" => element: ", element, "\n")
|
)
|
||||||
raise exc_child
|
raise exc_child
|
||||||
|
|
||||||
except Exception as exc:
|
return
|
||||||
pass
|
|
||||||
|
|
||||||
return doc
|
def analyze_tag(self, tag: Tag, doc: DoclingDocument) -> None:
|
||||||
|
if tag.name in self.labels:
|
||||||
def analyse_element(self, element: Tag, idx: int, doc: DoclingDocument):
|
self.labels[tag.name] += 1
|
||||||
"""
|
|
||||||
if element.name!=None:
|
|
||||||
_log.debug("\t"*self.level, idx, "\t", f"{element.name} ({self.level})")
|
|
||||||
"""
|
|
||||||
|
|
||||||
if element.name in self.labels:
|
|
||||||
self.labels[element.name] += 1
|
|
||||||
else:
|
else:
|
||||||
self.labels[element.name] = 1
|
self.labels[tag.name] = 1
|
||||||
|
|
||||||
if element.name in ["h1", "h2", "h3", "h4", "h5", "h6"]:
|
if tag.name in ["h1", "h2", "h3", "h4", "h5", "h6"]:
|
||||||
self.handle_header(element, idx, doc)
|
self.handle_header(tag, doc)
|
||||||
elif element.name in ["p"]:
|
elif tag.name in ["p"]:
|
||||||
self.handle_paragraph(element, idx, doc)
|
self.handle_paragraph(tag, doc)
|
||||||
elif element.name in ["pre"]:
|
elif tag.name in ["pre"]:
|
||||||
self.handle_code(element, idx, doc)
|
self.handle_code(tag, doc)
|
||||||
elif element.name in ["ul", "ol"]:
|
elif tag.name in ["ul", "ol"]:
|
||||||
self.handle_list(element, idx, doc)
|
self.handle_list(tag, doc)
|
||||||
elif element.name in ["li"]:
|
elif tag.name in ["li"]:
|
||||||
self.handle_listitem(element, idx, doc)
|
self.handle_list_item(tag, doc)
|
||||||
elif element.name == "table":
|
elif tag.name == "table":
|
||||||
self.handle_table(element, idx, doc)
|
self.handle_table(tag, doc)
|
||||||
elif element.name == "figure":
|
elif tag.name == "figure":
|
||||||
self.handle_figure(element, idx, doc)
|
self.handle_figure(tag, doc)
|
||||||
elif element.name == "img":
|
elif tag.name == "img":
|
||||||
self.handle_image(element, idx, doc)
|
self.handle_image(doc)
|
||||||
else:
|
else:
|
||||||
self.walk(element, doc)
|
self.walk(tag, doc)
|
||||||
|
|
||||||
def get_direct_text(self, item: Tag):
|
def get_text(self, item: PageElement) -> str:
|
||||||
"""Get the direct text of the <li> element (ignoring nested lists)."""
|
"""Get the text content of a tag."""
|
||||||
text = item.find(string=True, recursive=False)
|
parts: list[str] = self.extract_text_recursively(item)
|
||||||
if isinstance(text, str):
|
|
||||||
return text.strip()
|
|
||||||
|
|
||||||
return ""
|
return "".join(parts) + " "
|
||||||
|
|
||||||
# Function to recursively extract text from all child nodes
|
# Function to recursively extract text from all child nodes
|
||||||
def extract_text_recursively(self, item: Tag):
|
def extract_text_recursively(self, item: PageElement) -> list[str]:
|
||||||
result = []
|
result: list[str] = []
|
||||||
|
|
||||||
if isinstance(item, str):
|
if isinstance(item, NavigableString):
|
||||||
return [item]
|
return [item]
|
||||||
|
|
||||||
if item.name not in ["ul", "ol"]:
|
tag = cast(Tag, item)
|
||||||
try:
|
if tag.name not in ["ul", "ol"]:
|
||||||
# Iterate over the children (and their text and tails)
|
for child in tag:
|
||||||
for child in item:
|
# Recursively get the child's text content
|
||||||
try:
|
result.extend(self.extract_text_recursively(child))
|
||||||
# Recursively get the child's text content
|
|
||||||
result.extend(self.extract_text_recursively(child))
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
except:
|
|
||||||
_log.warn("item has no children")
|
|
||||||
pass
|
|
||||||
|
|
||||||
return "".join(result) + " "
|
return ["".join(result) + " "]
|
||||||
|
|
||||||
def handle_header(self, element: Tag, idx: int, doc: DoclingDocument):
|
def handle_header(self, element: Tag, doc: DoclingDocument) -> None:
|
||||||
"""Handles header tags (h1, h2, etc.)."""
|
"""Handles header tags (h1, h2, etc.)."""
|
||||||
hlevel = int(element.name.replace("h", ""))
|
hlevel = int(element.name.replace("h", ""))
|
||||||
slevel = hlevel - 1
|
|
||||||
|
|
||||||
label = DocItemLabel.SECTION_HEADER
|
|
||||||
text = element.text.strip()
|
text = element.text.strip()
|
||||||
|
|
||||||
if hlevel == 1:
|
if hlevel == 1:
|
||||||
@ -197,7 +184,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
elif hlevel < self.level:
|
elif hlevel < self.level:
|
||||||
|
|
||||||
# remove the tail
|
# remove the tail
|
||||||
for key, val in self.parents.items():
|
for key in self.parents.keys():
|
||||||
if key > hlevel:
|
if key > hlevel:
|
||||||
self.parents[key] = None
|
self.parents[key] = None
|
||||||
self.level = hlevel
|
self.level = hlevel
|
||||||
@ -208,27 +195,24 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
level=hlevel,
|
level=hlevel,
|
||||||
)
|
)
|
||||||
|
|
||||||
def handle_code(self, element: Tag, idx: int, doc: DoclingDocument):
|
def handle_code(self, element: Tag, doc: DoclingDocument) -> None:
|
||||||
"""Handles monospace code snippets (pre)."""
|
"""Handles monospace code snippets (pre)."""
|
||||||
if element.text is None:
|
if element.text is None:
|
||||||
return
|
return
|
||||||
text = element.text.strip()
|
text = element.text.strip()
|
||||||
label = DocItemLabel.CODE
|
if text:
|
||||||
if len(text) == 0:
|
doc.add_code(parent=self.parents[self.level], text=text)
|
||||||
return
|
|
||||||
doc.add_code(parent=self.parents[self.level], text=text)
|
|
||||||
|
|
||||||
def handle_paragraph(self, element: Tag, idx: int, doc: DoclingDocument):
|
def handle_paragraph(self, element: Tag, doc: DoclingDocument) -> None:
|
||||||
"""Handles paragraph tags (p)."""
|
"""Handles paragraph tags (p)."""
|
||||||
if element.text is None:
|
if element.text is None:
|
||||||
return
|
return
|
||||||
text = element.text.strip()
|
text = element.text.strip()
|
||||||
label = DocItemLabel.PARAGRAPH
|
label = DocItemLabel.PARAGRAPH
|
||||||
if len(text) == 0:
|
if text:
|
||||||
return
|
doc.add_text(parent=self.parents[self.level], label=label, text=text)
|
||||||
doc.add_text(parent=self.parents[self.level], label=label, text=text)
|
|
||||||
|
|
||||||
def handle_list(self, element: Tag, idx: int, doc: DoclingDocument):
|
def handle_list(self, element: Tag, doc: DoclingDocument) -> None:
|
||||||
"""Handles list tags (ul, ol) and their list items."""
|
"""Handles list tags (ul, ol) and their list items."""
|
||||||
|
|
||||||
if element.name == "ul":
|
if element.name == "ul":
|
||||||
@ -250,18 +234,17 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
self.parents[self.level + 1] = None
|
self.parents[self.level + 1] = None
|
||||||
self.level -= 1
|
self.level -= 1
|
||||||
|
|
||||||
def handle_listitem(self, element: Tag, idx: int, doc: DoclingDocument):
|
def handle_list_item(self, element: Tag, doc: DoclingDocument) -> None:
|
||||||
"""Handles listitem tags (li)."""
|
"""Handles listitem tags (li)."""
|
||||||
nested_lists = element.find(["ul", "ol"])
|
nested_list = element.find(["ul", "ol"])
|
||||||
|
|
||||||
parent_list_label = self.parents[self.level].label
|
parent_list_label = self.parents[self.level].label
|
||||||
index_in_list = len(self.parents[self.level].children) + 1
|
index_in_list = len(self.parents[self.level].children) + 1
|
||||||
|
|
||||||
if nested_lists:
|
if nested_list:
|
||||||
name = element.name
|
|
||||||
# Text in list item can be hidden within hierarchy, hence
|
# Text in list item can be hidden within hierarchy, hence
|
||||||
# we need to extract it recursively
|
# we need to extract it recursively
|
||||||
text = self.extract_text_recursively(element)
|
text: str = self.get_text(element)
|
||||||
# Flatten text, remove break lines:
|
# Flatten text, remove break lines:
|
||||||
text = text.replace("\n", "").replace("\r", "")
|
text = text.replace("\n", "").replace("\r", "")
|
||||||
text = " ".join(text.split()).strip()
|
text = " ".join(text.split()).strip()
|
||||||
@ -287,7 +270,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
self.parents[self.level + 1] = None
|
self.parents[self.level + 1] = None
|
||||||
self.level -= 1
|
self.level -= 1
|
||||||
|
|
||||||
elif isinstance(element.text, str):
|
elif element.text.strip():
|
||||||
text = element.text.strip()
|
text = element.text.strip()
|
||||||
|
|
||||||
marker = ""
|
marker = ""
|
||||||
@ -302,59 +285,79 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
parent=self.parents[self.level],
|
parent=self.parents[self.level],
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
_log.warn("list-item has no text: ", element)
|
_log.warning(f"list-item has no text: {element}")
|
||||||
|
|
||||||
def handle_table(self, element: Tag, idx: int, doc: DoclingDocument):
|
|
||||||
"""Handles table tags."""
|
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def parse_table_data(element: Tag) -> Optional[TableData]:
|
||||||
nested_tables = element.find("table")
|
nested_tables = element.find("table")
|
||||||
if nested_tables is not None:
|
if nested_tables is not None:
|
||||||
_log.warn("detected nested tables: skipping for now")
|
_log.warning("Skipping nested table.")
|
||||||
return
|
return None
|
||||||
|
|
||||||
# Count the number of rows (number of <tr> elements)
|
# Count the number of rows (number of <tr> elements)
|
||||||
num_rows = len(element.find_all("tr"))
|
num_rows = len(element("tr"))
|
||||||
|
|
||||||
# Find the number of columns (taking into account colspan)
|
# Find the number of columns (taking into account colspan)
|
||||||
num_cols = 0
|
num_cols = 0
|
||||||
for row in element.find_all("tr"):
|
for row in element("tr"):
|
||||||
col_count = 0
|
col_count = 0
|
||||||
for cell in row.find_all(["td", "th"]):
|
if not isinstance(row, Tag):
|
||||||
colspan = int(cell.get("colspan", 1))
|
continue
|
||||||
|
for cell in row(["td", "th"]):
|
||||||
|
if not isinstance(row, Tag):
|
||||||
|
continue
|
||||||
|
val = cast(Tag, cell).get("colspan", "1")
|
||||||
|
colspan = int(val) if (isinstance(val, str) and val.isnumeric()) else 1
|
||||||
col_count += colspan
|
col_count += colspan
|
||||||
num_cols = max(num_cols, col_count)
|
num_cols = max(num_cols, col_count)
|
||||||
|
|
||||||
grid = [[None for _ in range(num_cols)] for _ in range(num_rows)]
|
grid: list = [[None for _ in range(num_cols)] for _ in range(num_rows)]
|
||||||
|
|
||||||
data = TableData(num_rows=num_rows, num_cols=num_cols, table_cells=[])
|
data = TableData(num_rows=num_rows, num_cols=num_cols, table_cells=[])
|
||||||
|
|
||||||
# Iterate over the rows in the table
|
# Iterate over the rows in the table
|
||||||
for row_idx, row in enumerate(element.find_all("tr")):
|
for row_idx, row in enumerate(element("tr")):
|
||||||
|
if not isinstance(row, Tag):
|
||||||
|
continue
|
||||||
|
|
||||||
# For each row, find all the column cells (both <td> and <th>)
|
# For each row, find all the column cells (both <td> and <th>)
|
||||||
cells = row.find_all(["td", "th"])
|
cells = row(["td", "th"])
|
||||||
|
|
||||||
# Check if each cell in the row is a header -> means it is a column header
|
# Check if each cell in the row is a header -> means it is a column header
|
||||||
col_header = True
|
col_header = True
|
||||||
for j, html_cell in enumerate(cells):
|
for html_cell in cells:
|
||||||
if html_cell.name == "td":
|
if isinstance(html_cell, Tag) and html_cell.name == "td":
|
||||||
col_header = False
|
col_header = False
|
||||||
|
|
||||||
|
# Extract the text content of each cell
|
||||||
col_idx = 0
|
col_idx = 0
|
||||||
# Extract and print the text content of each cell
|
for html_cell in cells:
|
||||||
for _, html_cell in enumerate(cells):
|
if not isinstance(html_cell, Tag):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# extract inline formulas
|
||||||
|
for formula in html_cell("inline-formula"):
|
||||||
|
math_parts = formula.text.split("$$")
|
||||||
|
if len(math_parts) == 3:
|
||||||
|
math_formula = f"$${math_parts[1]}$$"
|
||||||
|
formula.replace_with(NavigableString(math_formula))
|
||||||
|
|
||||||
|
# TODO: extract content correctly from table-cells with lists
|
||||||
text = html_cell.text
|
text = html_cell.text
|
||||||
try:
|
|
||||||
text = self.extract_table_cell_text(html_cell)
|
|
||||||
except Exception as exc:
|
|
||||||
_log.warn("exception: ", exc)
|
|
||||||
exit(-1)
|
|
||||||
|
|
||||||
# label = html_cell.name
|
# label = html_cell.name
|
||||||
|
col_val = html_cell.get("colspan", "1")
|
||||||
col_span = int(html_cell.get("colspan", 1))
|
col_span = (
|
||||||
row_span = int(html_cell.get("rowspan", 1))
|
int(col_val)
|
||||||
|
if isinstance(col_val, str) and col_val.isnumeric()
|
||||||
|
else 1
|
||||||
|
)
|
||||||
|
row_val = html_cell.get("rowspan", "1")
|
||||||
|
row_span = (
|
||||||
|
int(row_val)
|
||||||
|
if isinstance(row_val, str) and row_val.isnumeric()
|
||||||
|
else 1
|
||||||
|
)
|
||||||
|
|
||||||
while grid[row_idx][col_idx] is not None:
|
while grid[row_idx][col_idx] is not None:
|
||||||
col_idx += 1
|
col_idx += 1
|
||||||
@ -362,7 +365,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
for c in range(col_span):
|
for c in range(col_span):
|
||||||
grid[row_idx + r][col_idx + c] = text
|
grid[row_idx + r][col_idx + c] = text
|
||||||
|
|
||||||
cell = TableCell(
|
table_cell = TableCell(
|
||||||
text=text,
|
text=text,
|
||||||
row_span=row_span,
|
row_span=row_span,
|
||||||
col_span=col_span,
|
col_span=col_span,
|
||||||
@ -373,57 +376,57 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
col_header=col_header,
|
col_header=col_header,
|
||||||
row_header=((not col_header) and html_cell.name == "th"),
|
row_header=((not col_header) and html_cell.name == "th"),
|
||||||
)
|
)
|
||||||
data.table_cells.append(cell)
|
data.table_cells.append(table_cell)
|
||||||
|
|
||||||
doc.add_table(data=data, parent=self.parents[self.level])
|
return data
|
||||||
|
|
||||||
def get_list_text(self, list_element: Tag, level=0):
|
def handle_table(self, element: Tag, doc: DoclingDocument) -> None:
|
||||||
|
"""Handles table tags."""
|
||||||
|
|
||||||
|
table_data = HTMLDocumentBackend.parse_table_data(element)
|
||||||
|
|
||||||
|
if table_data is not None:
|
||||||
|
doc.add_table(data=table_data, parent=self.parents[self.level])
|
||||||
|
|
||||||
|
def get_list_text(self, list_element: Tag, level: int = 0) -> list[str]:
|
||||||
"""Recursively extract text from <ul> or <ol> with proper indentation."""
|
"""Recursively extract text from <ul> or <ol> with proper indentation."""
|
||||||
result = []
|
result = []
|
||||||
bullet_char = "*" # Default bullet character for unordered lists
|
bullet_char = "*" # Default bullet character for unordered lists
|
||||||
|
|
||||||
if list_element.name == "ol": # For ordered lists, use numbers
|
if list_element.name == "ol": # For ordered lists, use numbers
|
||||||
for i, li in enumerate(list_element.find_all("li", recursive=False), 1):
|
for i, li in enumerate(list_element("li", recursive=False), 1):
|
||||||
|
if not isinstance(li, Tag):
|
||||||
|
continue
|
||||||
# Add numbering for ordered lists
|
# Add numbering for ordered lists
|
||||||
result.append(f"{' ' * level}{i}. {li.get_text(strip=True)}")
|
result.append(f"{' ' * level}{i}. {li.get_text(strip=True)}")
|
||||||
# Handle nested lists
|
# Handle nested lists
|
||||||
nested_list = li.find(["ul", "ol"])
|
nested_list = li.find(["ul", "ol"])
|
||||||
if nested_list:
|
if isinstance(nested_list, Tag):
|
||||||
result.extend(self.get_list_text(nested_list, level + 1))
|
result.extend(self.get_list_text(nested_list, level + 1))
|
||||||
elif list_element.name == "ul": # For unordered lists, use bullet points
|
elif list_element.name == "ul": # For unordered lists, use bullet points
|
||||||
for li in list_element.find_all("li", recursive=False):
|
for li in list_element("li", recursive=False):
|
||||||
|
if not isinstance(li, Tag):
|
||||||
|
continue
|
||||||
# Add bullet points for unordered lists
|
# Add bullet points for unordered lists
|
||||||
result.append(
|
result.append(
|
||||||
f"{' ' * level}{bullet_char} {li.get_text(strip=True)}"
|
f"{' ' * level}{bullet_char} {li.get_text(strip=True)}"
|
||||||
)
|
)
|
||||||
# Handle nested lists
|
# Handle nested lists
|
||||||
nested_list = li.find(["ul", "ol"])
|
nested_list = li.find(["ul", "ol"])
|
||||||
if nested_list:
|
if isinstance(nested_list, Tag):
|
||||||
result.extend(self.get_list_text(nested_list, level + 1))
|
result.extend(self.get_list_text(nested_list, level + 1))
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def extract_table_cell_text(self, cell: Tag):
|
def handle_figure(self, element: Tag, doc: DoclingDocument) -> None:
|
||||||
"""Extract text from a table cell, including lists with indents."""
|
|
||||||
contains_lists = cell.find(["ul", "ol"])
|
|
||||||
if contains_lists is None:
|
|
||||||
return cell.text
|
|
||||||
else:
|
|
||||||
_log.debug(
|
|
||||||
"should extract the content correctly for table-cells with lists ..."
|
|
||||||
)
|
|
||||||
return cell.text
|
|
||||||
|
|
||||||
def handle_figure(self, element: Tag, idx: int, doc: DoclingDocument):
|
|
||||||
"""Handles image tags (img)."""
|
"""Handles image tags (img)."""
|
||||||
|
|
||||||
# Extract the image URI from the <img> tag
|
# Extract the image URI from the <img> tag
|
||||||
# image_uri = root.xpath('//figure//img/@src')[0]
|
# image_uri = root.xpath('//figure//img/@src')[0]
|
||||||
|
|
||||||
contains_captions = element.find(["figcaption"])
|
contains_captions = element.find(["figcaption"])
|
||||||
if contains_captions is None:
|
if not isinstance(contains_captions, Tag):
|
||||||
doc.add_picture(parent=self.parents[self.level], caption=None)
|
doc.add_picture(parent=self.parents[self.level], caption=None)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
texts = []
|
texts = []
|
||||||
for item in contains_captions:
|
for item in contains_captions:
|
||||||
@ -437,6 +440,6 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
caption=fig_caption,
|
caption=fig_caption,
|
||||||
)
|
)
|
||||||
|
|
||||||
def handle_image(self, element: Tag, idx, doc: DoclingDocument):
|
def handle_image(self, doc: DoclingDocument) -> None:
|
||||||
"""Handles image tags (img)."""
|
"""Handles image tags (img)."""
|
||||||
doc.add_picture(parent=self.parents[self.level], caption=None)
|
doc.add_picture(parent=self.parents[self.level], caption=None)
|
||||||
|
@ -4,7 +4,7 @@ from io import BytesIO
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Final, Optional, Union
|
from typing import Final, Optional, Union
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup, Tag
|
||||||
from docling_core.types.doc import (
|
from docling_core.types.doc import (
|
||||||
DocItemLabel,
|
DocItemLabel,
|
||||||
DoclingDocument,
|
DoclingDocument,
|
||||||
@ -12,14 +12,13 @@ from docling_core.types.doc import (
|
|||||||
GroupItem,
|
GroupItem,
|
||||||
GroupLabel,
|
GroupLabel,
|
||||||
NodeItem,
|
NodeItem,
|
||||||
TableCell,
|
|
||||||
TableData,
|
|
||||||
TextItem,
|
TextItem,
|
||||||
)
|
)
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from typing_extensions import TypedDict, override
|
from typing_extensions import TypedDict, override
|
||||||
|
|
||||||
from docling.backend.abstract_backend import DeclarativeDocumentBackend
|
from docling.backend.abstract_backend import DeclarativeDocumentBackend
|
||||||
|
from docling.backend.html_backend import HTMLDocumentBackend
|
||||||
from docling.datamodel.base_models import InputFormat
|
from docling.datamodel.base_models import InputFormat
|
||||||
from docling.datamodel.document import InputDocument
|
from docling.datamodel.document import InputDocument
|
||||||
|
|
||||||
@ -540,71 +539,10 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
) -> None:
|
) -> None:
|
||||||
soup = BeautifulSoup(table_xml_component["content"], "html.parser")
|
soup = BeautifulSoup(table_xml_component["content"], "html.parser")
|
||||||
table_tag = soup.find("table")
|
table_tag = soup.find("table")
|
||||||
|
if not isinstance(table_tag, Tag):
|
||||||
nested_tables = table_tag.find("table")
|
|
||||||
if nested_tables:
|
|
||||||
_log.warning(f"Skipping nested table in {str(self.file)}")
|
|
||||||
return
|
return
|
||||||
|
|
||||||
# Count the number of rows (number of <tr> elements)
|
data = HTMLDocumentBackend.parse_table_data(table_tag)
|
||||||
num_rows = len(table_tag.find_all("tr"))
|
|
||||||
|
|
||||||
# Find the number of columns (taking into account colspan)
|
|
||||||
num_cols = 0
|
|
||||||
for row in table_tag.find_all("tr"):
|
|
||||||
col_count = 0
|
|
||||||
for cell in row.find_all(["td", "th"]):
|
|
||||||
colspan = int(cell.get("colspan", 1))
|
|
||||||
col_count += colspan
|
|
||||||
num_cols = max(num_cols, col_count)
|
|
||||||
|
|
||||||
grid = [[None for _ in range(num_cols)] for _ in range(num_rows)]
|
|
||||||
|
|
||||||
data = TableData(num_rows=num_rows, num_cols=num_cols, table_cells=[])
|
|
||||||
|
|
||||||
# Iterate over the rows in the table
|
|
||||||
for row_idx, row in enumerate(table_tag.find_all("tr")):
|
|
||||||
# For each row, find all the column cells (both <td> and <th>)
|
|
||||||
cells = row.find_all(["td", "th"])
|
|
||||||
|
|
||||||
# Check if each cell in the row is a header -> means it is a column header
|
|
||||||
col_header = True
|
|
||||||
for j, html_cell in enumerate(cells):
|
|
||||||
if html_cell.name == "td":
|
|
||||||
col_header = False
|
|
||||||
|
|
||||||
# Extract and print the text content of each cell
|
|
||||||
col_idx = 0
|
|
||||||
for _, html_cell in enumerate(cells):
|
|
||||||
# extract inline formulas
|
|
||||||
for formula in html_cell.find_all("inline-formula"):
|
|
||||||
math_parts = formula.text.split("$$")
|
|
||||||
if len(math_parts) == 3:
|
|
||||||
math_formula = f"$${math_parts[1]}$$"
|
|
||||||
formula.replaceWith(math_formula)
|
|
||||||
text = html_cell.text
|
|
||||||
|
|
||||||
col_span = int(html_cell.get("colspan", 1))
|
|
||||||
row_span = int(html_cell.get("rowspan", 1))
|
|
||||||
|
|
||||||
while grid[row_idx][col_idx] is not None:
|
|
||||||
col_idx += 1
|
|
||||||
for r in range(row_span):
|
|
||||||
for c in range(col_span):
|
|
||||||
grid[row_idx + r][col_idx + c] = text
|
|
||||||
|
|
||||||
cell = TableCell(
|
|
||||||
text=text,
|
|
||||||
row_span=row_span,
|
|
||||||
col_span=col_span,
|
|
||||||
start_row_offset_idx=row_idx,
|
|
||||||
end_row_offset_idx=row_idx + row_span,
|
|
||||||
start_col_offset_idx=col_idx,
|
|
||||||
end_col_offset_idx=col_idx + col_span,
|
|
||||||
col_header=col_header,
|
|
||||||
row_header=((not col_header) and html_cell.name == "th"),
|
|
||||||
)
|
|
||||||
data.table_cells.append(cell)
|
|
||||||
|
|
||||||
# TODO: format label vs caption once styling is supported
|
# TODO: format label vs caption once styling is supported
|
||||||
label = table_xml_component["label"]
|
label = table_xml_component["label"]
|
||||||
@ -616,7 +554,8 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
else None
|
else None
|
||||||
)
|
)
|
||||||
|
|
||||||
doc.add_table(data=data, parent=parent, caption=table_caption)
|
if data is not None:
|
||||||
|
doc.add_table(data=data, parent=parent, caption=table_caption)
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
@ -673,7 +612,6 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
def _walk_linear(
|
def _walk_linear(
|
||||||
self, doc: DoclingDocument, parent: NodeItem, node: etree._Element
|
self, doc: DoclingDocument, parent: NodeItem, node: etree._Element
|
||||||
) -> str:
|
) -> str:
|
||||||
# _log.debug(f"Walking on {node.tag} with {len(list(node))} children")
|
|
||||||
skip_tags = ["term"]
|
skip_tags = ["term"]
|
||||||
flush_tags = ["ack", "sec", "list", "boxed-text", "disp-formula", "fig"]
|
flush_tags = ["ack", "sec", "list", "boxed-text", "disp-formula", "fig"]
|
||||||
new_parent: NodeItem = parent
|
new_parent: NodeItem = parent
|
||||||
|
@ -14,7 +14,7 @@ from abc import ABC, abstractmethod
|
|||||||
from enum import Enum, unique
|
from enum import Enum, unique
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Final, Optional, Union
|
from typing import Final, Optional, Union
|
||||||
|
|
||||||
from bs4 import BeautifulSoup, Tag
|
from bs4 import BeautifulSoup, Tag
|
||||||
from docling_core.types.doc import (
|
from docling_core.types.doc import (
|
||||||
@ -1406,6 +1406,10 @@ class XmlTable:
|
|||||||
http://oasis-open.org/specs/soextblx.dtd
|
http://oasis-open.org/specs/soextblx.dtd
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
class ColInfo(TypedDict):
|
||||||
|
ncols: int
|
||||||
|
colinfo: list[dict]
|
||||||
|
|
||||||
class MinColInfoType(TypedDict):
|
class MinColInfoType(TypedDict):
|
||||||
offset: list[int]
|
offset: list[int]
|
||||||
colwidth: list[int]
|
colwidth: list[int]
|
||||||
@ -1425,7 +1429,7 @@ class XmlTable:
|
|||||||
self.empty_text = ""
|
self.empty_text = ""
|
||||||
self._soup = BeautifulSoup(input, features="xml")
|
self._soup = BeautifulSoup(input, features="xml")
|
||||||
|
|
||||||
def _create_tg_range(self, tgs: list[dict[str, Any]]) -> dict[int, ColInfoType]:
|
def _create_tg_range(self, tgs: list[ColInfo]) -> dict[int, ColInfoType]:
|
||||||
"""Create a unified range along the table groups.
|
"""Create a unified range along the table groups.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@ -1532,19 +1536,26 @@ class XmlTable:
|
|||||||
Returns:
|
Returns:
|
||||||
A docling table object.
|
A docling table object.
|
||||||
"""
|
"""
|
||||||
tgs_align = []
|
tgs_align: list[XmlTable.ColInfo] = []
|
||||||
tg_secs = table.find_all("tgroup")
|
tg_secs = table("tgroup")
|
||||||
if tg_secs:
|
if tg_secs:
|
||||||
for tg_sec in tg_secs:
|
for tg_sec in tg_secs:
|
||||||
ncols = tg_sec.get("cols", None)
|
if not isinstance(tg_sec, Tag):
|
||||||
if ncols:
|
continue
|
||||||
ncols = int(ncols)
|
col_val = tg_sec.get("cols")
|
||||||
tg_align = {"ncols": ncols, "colinfo": []}
|
ncols = (
|
||||||
cs_secs = tg_sec.find_all("colspec")
|
int(col_val)
|
||||||
|
if isinstance(col_val, str) and col_val.isnumeric()
|
||||||
|
else 1
|
||||||
|
)
|
||||||
|
tg_align: XmlTable.ColInfo = {"ncols": ncols, "colinfo": []}
|
||||||
|
cs_secs = tg_sec("colspec")
|
||||||
if cs_secs:
|
if cs_secs:
|
||||||
for cs_sec in cs_secs:
|
for cs_sec in cs_secs:
|
||||||
colname = cs_sec.get("colname", None)
|
if not isinstance(cs_sec, Tag):
|
||||||
colwidth = cs_sec.get("colwidth", None)
|
continue
|
||||||
|
colname = cs_sec.get("colname")
|
||||||
|
colwidth = cs_sec.get("colwidth")
|
||||||
tg_align["colinfo"].append(
|
tg_align["colinfo"].append(
|
||||||
{"colname": colname, "colwidth": colwidth}
|
{"colname": colname, "colwidth": colwidth}
|
||||||
)
|
)
|
||||||
@ -1565,16 +1576,23 @@ class XmlTable:
|
|||||||
table_data: list[TableCell] = []
|
table_data: list[TableCell] = []
|
||||||
i_row_global = 0
|
i_row_global = 0
|
||||||
is_row_empty: bool = True
|
is_row_empty: bool = True
|
||||||
tg_secs = table.find_all("tgroup")
|
tg_secs = table("tgroup")
|
||||||
if tg_secs:
|
if tg_secs:
|
||||||
for itg, tg_sec in enumerate(tg_secs):
|
for itg, tg_sec in enumerate(tg_secs):
|
||||||
|
if not isinstance(tg_sec, Tag):
|
||||||
|
continue
|
||||||
tg_range = tgs_range[itg]
|
tg_range = tgs_range[itg]
|
||||||
row_secs = tg_sec.find_all(["row", "tr"])
|
row_secs = tg_sec(["row", "tr"])
|
||||||
|
|
||||||
if row_secs:
|
if row_secs:
|
||||||
for row_sec in row_secs:
|
for row_sec in row_secs:
|
||||||
entry_secs = row_sec.find_all(["entry", "td"])
|
if not isinstance(row_sec, Tag):
|
||||||
is_header: bool = row_sec.parent.name in ["thead"]
|
continue
|
||||||
|
entry_secs = row_sec(["entry", "td"])
|
||||||
|
is_header: bool = (
|
||||||
|
row_sec.parent is not None
|
||||||
|
and row_sec.parent.name == "thead"
|
||||||
|
)
|
||||||
|
|
||||||
ncols = 0
|
ncols = 0
|
||||||
local_row: list[TableCell] = []
|
local_row: list[TableCell] = []
|
||||||
@ -1582,23 +1600,26 @@ class XmlTable:
|
|||||||
if entry_secs:
|
if entry_secs:
|
||||||
wrong_nbr_cols = False
|
wrong_nbr_cols = False
|
||||||
for ientry, entry_sec in enumerate(entry_secs):
|
for ientry, entry_sec in enumerate(entry_secs):
|
||||||
|
if not isinstance(entry_sec, Tag):
|
||||||
|
continue
|
||||||
text = entry_sec.get_text().strip()
|
text = entry_sec.get_text().strip()
|
||||||
|
|
||||||
# start-end
|
# start-end
|
||||||
namest = entry_sec.attrs.get("namest", None)
|
namest = entry_sec.get("namest")
|
||||||
nameend = entry_sec.attrs.get("nameend", None)
|
nameend = entry_sec.get("nameend")
|
||||||
if isinstance(namest, str) and namest.isnumeric():
|
start = (
|
||||||
namest = int(namest)
|
int(namest)
|
||||||
else:
|
if isinstance(namest, str) and namest.isnumeric()
|
||||||
namest = ientry + 1
|
else ientry + 1
|
||||||
|
)
|
||||||
if isinstance(nameend, str) and nameend.isnumeric():
|
if isinstance(nameend, str) and nameend.isnumeric():
|
||||||
nameend = int(nameend)
|
end = int(nameend)
|
||||||
shift = 0
|
shift = 0
|
||||||
else:
|
else:
|
||||||
nameend = ientry + 2
|
end = ientry + 2
|
||||||
shift = 1
|
shift = 1
|
||||||
|
|
||||||
if nameend > len(tg_range["cell_offst"]):
|
if end > len(tg_range["cell_offst"]):
|
||||||
wrong_nbr_cols = True
|
wrong_nbr_cols = True
|
||||||
self.nbr_messages += 1
|
self.nbr_messages += 1
|
||||||
if self.nbr_messages <= self.max_nbr_messages:
|
if self.nbr_messages <= self.max_nbr_messages:
|
||||||
@ -1608,8 +1629,8 @@ class XmlTable:
|
|||||||
break
|
break
|
||||||
|
|
||||||
range_ = [
|
range_ = [
|
||||||
tg_range["cell_offst"][namest - 1],
|
tg_range["cell_offst"][start - 1],
|
||||||
tg_range["cell_offst"][nameend - 1] - shift,
|
tg_range["cell_offst"][end - 1] - shift,
|
||||||
]
|
]
|
||||||
|
|
||||||
# add row and replicate cell if needed
|
# add row and replicate cell if needed
|
||||||
@ -1668,7 +1689,7 @@ class XmlTable:
|
|||||||
A docling table data.
|
A docling table data.
|
||||||
"""
|
"""
|
||||||
section = self._soup.find("table")
|
section = self._soup.find("table")
|
||||||
if section is not None:
|
if isinstance(section, Tag):
|
||||||
table = self._parse_table(section)
|
table = self._parse_table(section)
|
||||||
if table.num_rows == 0 or table.num_cols == 0:
|
if table.num_rows == 0 or table.num_cols == 0:
|
||||||
_log.warning("The parsed USPTO table is empty")
|
_log.warning("The parsed USPTO table is empty")
|
||||||
|
39
poetry.lock
generated
39
poetry.lock
generated
@ -282,17 +282,18 @@ testing = ["jaraco.test", "pytest (!=8.0.*)", "pytest (>=6,!=8.1.*)", "pytest-ch
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "beautifulsoup4"
|
name = "beautifulsoup4"
|
||||||
version = "4.12.3"
|
version = "4.13.3"
|
||||||
description = "Screen-scraping library"
|
description = "Screen-scraping library"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.6.0"
|
python-versions = ">=3.7.0"
|
||||||
files = [
|
files = [
|
||||||
{file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"},
|
{file = "beautifulsoup4-4.13.3-py3-none-any.whl", hash = "sha256:99045d7d3f08f91f0d656bc9b7efbae189426cd913d830294a15eefa0ea4df16"},
|
||||||
{file = "beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051"},
|
{file = "beautifulsoup4-4.13.3.tar.gz", hash = "sha256:1bd32405dacc920b42b83ba01644747ed77456a65760e285fbc47633ceddaf8b"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
soupsieve = ">1.2"
|
soupsieve = ">1.2"
|
||||||
|
typing-extensions = ">=4.0.0"
|
||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
cchardet = ["cchardet"]
|
cchardet = ["cchardet"]
|
||||||
@ -820,13 +821,13 @@ files = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "docling-core"
|
name = "docling-core"
|
||||||
version = "2.19.0"
|
version = "2.19.1"
|
||||||
description = "A python library to define and validate data types in Docling."
|
description = "A python library to define and validate data types in Docling."
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = "<4.0,>=3.9"
|
python-versions = "<4.0,>=3.9"
|
||||||
files = [
|
files = [
|
||||||
{file = "docling_core-2.19.0-py3-none-any.whl", hash = "sha256:caa1e13d98fa9a00608091c386609c75b3560c7291e842c252f0b6f8d5812dbd"},
|
{file = "docling_core-2.19.1-py3-none-any.whl", hash = "sha256:ca7bd4dacd75611c5ea4f205192b71a8f22205e615eff1a16aac7082644d3b2e"},
|
||||||
{file = "docling_core-2.19.0.tar.gz", hash = "sha256:ebf3062e31155bb5f0e6132056a2d239a0e6e693a75c5758886909bb9fef461a"},
|
{file = "docling_core-2.19.1.tar.gz", hash = "sha256:e2769b816c669cdf27024dd3b219d3ecaf2161691dd5e8e5e8ce439557ea0928"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
@ -1317,13 +1318,13 @@ colorama = ">=0.4"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "griffe-pydantic"
|
name = "griffe-pydantic"
|
||||||
version = "1.1.0"
|
version = "1.1.2"
|
||||||
description = "Griffe extension for Pydantic."
|
description = "Griffe extension for Pydantic."
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.9"
|
python-versions = ">=3.9"
|
||||||
files = [
|
files = [
|
||||||
{file = "griffe_pydantic-1.1.0-py3-none-any.whl", hash = "sha256:ac9cc2d9b016cf302d8d9f577c9b3ca2793d88060f500d0b2a65f33a4a785cf1"},
|
{file = "griffe_pydantic-1.1.2-py3-none-any.whl", hash = "sha256:8ad53218ca6e9c24ccec83588eb435f562b30355f641fe336e81b1e00ea05f3c"},
|
||||||
{file = "griffe_pydantic-1.1.0.tar.gz", hash = "sha256:9c5a701cc485dab087857c1ac960b44671acee5008aaae0752f610b2aa82b068"},
|
{file = "griffe_pydantic-1.1.2.tar.gz", hash = "sha256:381eacd8854a85811522b4f6dc9a1ef0fb5931825081379d70ff3a425b0d4ea1"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
@ -7021,18 +7022,18 @@ vision = ["Pillow (>=10.0.1,<=15.0)"]
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "transformers"
|
name = "transformers"
|
||||||
version = "4.48.3"
|
version = "4.49.0"
|
||||||
description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow"
|
description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.9.0"
|
python-versions = ">=3.9.0"
|
||||||
files = [
|
files = [
|
||||||
{file = "transformers-4.48.3-py3-none-any.whl", hash = "sha256:78697f990f5ef350c23b46bf86d5081ce96b49479ab180b2de7687267de8fd36"},
|
{file = "transformers-4.49.0-py3-none-any.whl", hash = "sha256:6b4fded1c5fee04d384b1014495b4235a2b53c87503d7d592423c06128cbbe03"},
|
||||||
{file = "transformers-4.48.3.tar.gz", hash = "sha256:a5e8f1e9a6430aa78215836be70cecd3f872d99eeda300f41ad6cc841724afdb"},
|
{file = "transformers-4.49.0.tar.gz", hash = "sha256:7e40e640b5b8dc3f48743f5f5adbdce3660c82baafbd3afdfc04143cdbd2089e"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
filelock = "*"
|
filelock = "*"
|
||||||
huggingface-hub = ">=0.24.0,<1.0"
|
huggingface-hub = ">=0.26.0,<1.0"
|
||||||
numpy = ">=1.17"
|
numpy = ">=1.17"
|
||||||
packaging = ">=20.0"
|
packaging = ">=20.0"
|
||||||
pyyaml = ">=5.1"
|
pyyaml = ">=5.1"
|
||||||
@ -7045,13 +7046,13 @@ tqdm = ">=4.27"
|
|||||||
[package.extras]
|
[package.extras]
|
||||||
accelerate = ["accelerate (>=0.26.0)"]
|
accelerate = ["accelerate (>=0.26.0)"]
|
||||||
agents = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "datasets (!=2.5.0)", "diffusers", "opencv-python", "sentencepiece (>=0.1.91,!=0.1.92)", "torch (>=2.0)"]
|
agents = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "datasets (!=2.5.0)", "diffusers", "opencv-python", "sentencepiece (>=0.1.91,!=0.1.92)", "torch (>=2.0)"]
|
||||||
all = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "av (==9.2.0)", "codecarbon (>=2.8.1)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm (<=1.0.11)", "tokenizers (>=0.21,<0.22)", "torch (>=2.0)", "torchaudio", "torchvision"]
|
all = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "av", "codecarbon (>=2.8.1)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm (<=1.0.11)", "tokenizers (>=0.21,<0.22)", "torch (>=2.0)", "torchaudio", "torchvision"]
|
||||||
audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
|
audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
|
||||||
benchmark = ["optimum-benchmark (>=0.3.0)"]
|
benchmark = ["optimum-benchmark (>=0.3.0)"]
|
||||||
codecarbon = ["codecarbon (>=2.8.1)"]
|
codecarbon = ["codecarbon (>=2.8.1)"]
|
||||||
deepspeed = ["accelerate (>=0.26.0)", "deepspeed (>=0.9.3)"]
|
deepspeed = ["accelerate (>=0.26.0)", "deepspeed (>=0.9.3)"]
|
||||||
deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.26.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "nltk (<=3.8.1)", "optuna", "parameterized", "protobuf", "psutil", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-asyncio", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"]
|
deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.26.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "nltk (<=3.8.1)", "optuna", "parameterized", "protobuf", "psutil", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-asyncio", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"]
|
||||||
dev = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "av (==9.2.0)", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "libcst", "librosa", "nltk (<=3.8.1)", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-asyncio", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "timm (<=1.0.11)", "tokenizers (>=0.21,<0.22)", "torch (>=2.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"]
|
dev = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "av", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "libcst", "librosa", "nltk (<=3.8.1)", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-asyncio", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "timm (<=1.0.11)", "tokenizers (>=0.21,<0.22)", "torch (>=2.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"]
|
||||||
dev-tensorflow = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "libcst", "librosa", "nltk (<=3.8.1)", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-asyncio", "pytest-rich", "pytest-timeout", "pytest-xdist", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "tokenizers (>=0.21,<0.22)", "urllib3 (<2.0.0)"]
|
dev-tensorflow = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "libcst", "librosa", "nltk (<=3.8.1)", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-asyncio", "pytest-rich", "pytest-timeout", "pytest-xdist", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "tokenizers (>=0.21,<0.22)", "urllib3 (<2.0.0)"]
|
||||||
dev-torch = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "libcst", "librosa", "nltk (<=3.8.1)", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-asyncio", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (<=1.0.11)", "tokenizers (>=0.21,<0.22)", "torch (>=2.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"]
|
dev-torch = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "libcst", "librosa", "nltk (<=3.8.1)", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-asyncio", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (<=1.0.11)", "tokenizers (>=0.21,<0.22)", "torch (>=2.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"]
|
||||||
flax = ["flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "optax (>=0.0.8,<=0.1.4)", "scipy (<1.13.0)"]
|
flax = ["flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "optax (>=0.0.8,<=0.1.4)", "scipy (<1.13.0)"]
|
||||||
@ -7084,8 +7085,8 @@ tokenizers = ["tokenizers (>=0.21,<0.22)"]
|
|||||||
torch = ["accelerate (>=0.26.0)", "torch (>=2.0)"]
|
torch = ["accelerate (>=0.26.0)", "torch (>=2.0)"]
|
||||||
torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"]
|
torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"]
|
||||||
torch-vision = ["Pillow (>=10.0.1,<=15.0)", "torchvision"]
|
torch-vision = ["Pillow (>=10.0.1,<=15.0)", "torchvision"]
|
||||||
torchhub = ["filelock", "huggingface-hub (>=0.24.0,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.21,<0.22)", "torch (>=2.0)", "tqdm (>=4.27)"]
|
torchhub = ["filelock", "huggingface-hub (>=0.26.0,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.21,<0.22)", "torch (>=2.0)", "tqdm (>=4.27)"]
|
||||||
video = ["av (==9.2.0)"]
|
video = ["av"]
|
||||||
vision = ["Pillow (>=10.0.1,<=15.0)"]
|
vision = ["Pillow (>=10.0.1,<=15.0)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -7810,4 +7811,4 @@ vlm = ["transformers", "transformers"]
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = "^3.9"
|
python-versions = "^3.9"
|
||||||
content-hash = "b19c39233b5c7ca2a4feed4886542395492ed43f4957f9c6f097b03e8d5b6148"
|
content-hash = "3f657e7af78058e75dfb9f32e373f7f70e5e68a42a5b3603189e2251be90f349"
|
||||||
|
@ -45,7 +45,7 @@ scipy = [
|
|||||||
typer = "^0.12.5"
|
typer = "^0.12.5"
|
||||||
python-docx = "^1.1.2"
|
python-docx = "^1.1.2"
|
||||||
python-pptx = "^1.0.2"
|
python-pptx = "^1.0.2"
|
||||||
beautifulsoup4 = ">=4.12.3,<4.13.0"
|
beautifulsoup4 = "^4.12.3"
|
||||||
pandas = "^2.1.4"
|
pandas = "^2.1.4"
|
||||||
marko = "^2.1.2"
|
marko = "^2.1.2"
|
||||||
openpyxl = "^3.1.5"
|
openpyxl = "^3.1.5"
|
||||||
@ -164,7 +164,6 @@ module = [
|
|||||||
"easyocr.*",
|
"easyocr.*",
|
||||||
"ocrmac.*",
|
"ocrmac.*",
|
||||||
"lxml.*",
|
"lxml.*",
|
||||||
"bs4.*",
|
|
||||||
"huggingface_hub.*",
|
"huggingface_hub.*",
|
||||||
"transformers.*",
|
"transformers.*",
|
||||||
]
|
]
|
||||||
|
@ -410,68 +410,65 @@ item-0 at level 0: unspecified: group _root_
|
|||||||
item-396 at level 3: list: group list
|
item-396 at level 3: list: group list
|
||||||
item-397 at level 4: list_item: list of books (useful looking abstracts)
|
item-397 at level 4: list_item: list of books (useful looking abstracts)
|
||||||
item-398 at level 4: list_item: Ducks on postage stamps Archived 2013-05-13 at the Wayback Machine
|
item-398 at level 4: list_item: Ducks on postage stamps Archived 2013-05-13 at the Wayback Machine
|
||||||
item-399 at level 4: list_item:
|
item-399 at level 4: list_item: Ducks at a Distance, by Rob Hine ... uide to identification of US waterfowl
|
||||||
item-400 at level 4: list_item: Ducks at a Distance, by Rob Hine ... uide to identification of US waterfowl
|
item-400 at level 3: table with [3x2]
|
||||||
item-401 at level 3: table with [3x2]
|
item-401 at level 3: picture
|
||||||
item-402 at level 3: picture
|
item-402 at level 3: list: group list
|
||||||
item-403 at level 3: list: group list
|
item-403 at level 4: list_item: Ducks
|
||||||
item-404 at level 4: list_item: Ducks
|
item-404 at level 4: list_item: Game birds
|
||||||
item-405 at level 4: list_item: Game birds
|
item-405 at level 4: list_item: Bird common names
|
||||||
item-406 at level 4: list_item: Bird common names
|
item-406 at level 3: list: group list
|
||||||
item-407 at level 3: list: group list
|
item-407 at level 4: list_item: All accuracy disputes
|
||||||
item-408 at level 4: list_item: All accuracy disputes
|
item-408 at level 4: list_item: Accuracy disputes from February 2020
|
||||||
item-409 at level 4: list_item: Accuracy disputes from February 2020
|
item-409 at level 4: list_item: CS1 Finnish-language sources (fi)
|
||||||
item-410 at level 4: list_item: CS1 Finnish-language sources (fi)
|
item-410 at level 4: list_item: CS1 Latvian-language sources (lv)
|
||||||
item-411 at level 4: list_item: CS1 Latvian-language sources (lv)
|
item-411 at level 4: list_item: CS1 Swedish-language sources (sv)
|
||||||
item-412 at level 4: list_item: CS1 Swedish-language sources (sv)
|
item-412 at level 4: list_item: Articles with short description
|
||||||
item-413 at level 4: list_item: Articles with short description
|
item-413 at level 4: list_item: Short description is different from Wikidata
|
||||||
item-414 at level 4: list_item: Short description is different from Wikidata
|
item-414 at level 4: list_item: Wikipedia indefinitely move-protected pages
|
||||||
item-415 at level 4: list_item: Wikipedia indefinitely move-protected pages
|
item-415 at level 4: list_item: Wikipedia indefinitely semi-protected pages
|
||||||
item-416 at level 4: list_item: Wikipedia indefinitely semi-protected pages
|
item-416 at level 4: list_item: Articles with 'species' microformats
|
||||||
item-417 at level 4: list_item: Articles with 'species' microformats
|
item-417 at level 4: list_item: Articles containing Old English (ca. 450-1100)-language text
|
||||||
item-418 at level 4: list_item: Articles containing Old English (ca. 450-1100)-language text
|
item-418 at level 4: list_item: Articles containing Dutch-language text
|
||||||
item-419 at level 4: list_item: Articles containing Dutch-language text
|
item-419 at level 4: list_item: Articles containing German-language text
|
||||||
item-420 at level 4: list_item: Articles containing German-language text
|
item-420 at level 4: list_item: Articles containing Norwegian-language text
|
||||||
item-421 at level 4: list_item: Articles containing Norwegian-language text
|
item-421 at level 4: list_item: Articles containing Lithuanian-language text
|
||||||
item-422 at level 4: list_item: Articles containing Lithuanian-language text
|
item-422 at level 4: list_item: Articles containing Ancient Greek (to 1453)-language text
|
||||||
item-423 at level 4: list_item: Articles containing Ancient Greek (to 1453)-language text
|
item-423 at level 4: list_item: All articles with self-published sources
|
||||||
item-424 at level 4: list_item: All articles with self-published sources
|
item-424 at level 4: list_item: Articles with self-published sources from February 2020
|
||||||
item-425 at level 4: list_item: Articles with self-published sources from February 2020
|
item-425 at level 4: list_item: All articles with unsourced statements
|
||||||
item-426 at level 4: list_item: All articles with unsourced statements
|
item-426 at level 4: list_item: Articles with unsourced statements from January 2022
|
||||||
item-427 at level 4: list_item: Articles with unsourced statements from January 2022
|
item-427 at level 4: list_item: CS1: long volume value
|
||||||
item-428 at level 4: list_item: CS1: long volume value
|
item-428 at level 4: list_item: Pages using Sister project links with wikidata mismatch
|
||||||
item-429 at level 4: list_item: Pages using Sister project links with wikidata mismatch
|
item-429 at level 4: list_item: Pages using Sister project links with hidden wikidata
|
||||||
item-430 at level 4: list_item: Pages using Sister project links with hidden wikidata
|
item-430 at level 4: list_item: Webarchive template wayback links
|
||||||
item-431 at level 4: list_item: Webarchive template wayback links
|
item-431 at level 4: list_item: Articles with Project Gutenberg links
|
||||||
item-432 at level 4: list_item: Articles with Project Gutenberg links
|
item-432 at level 4: list_item: Articles containing video clips
|
||||||
item-433 at level 4: list_item: Articles containing video clips
|
item-433 at level 3: list: group list
|
||||||
item-434 at level 3: list: group list
|
item-434 at level 4: list_item: This page was last edited on 21 September 2024, at 12:11 (UTC).
|
||||||
item-435 at level 4: list_item: This page was last edited on 21 September 2024, at 12:11 (UTC).
|
item-435 at level 4: list_item: Text is available under the Crea ... tion, Inc., a non-profit organization.
|
||||||
item-436 at level 4: list_item: Text is available under the Crea ... tion, Inc., a non-profit organization.
|
item-436 at level 3: list: group list
|
||||||
item-437 at level 3: list: group list
|
item-437 at level 4: list_item: Privacy policy
|
||||||
item-438 at level 4: list_item: Privacy policy
|
item-438 at level 4: list_item: About Wikipedia
|
||||||
item-439 at level 4: list_item: About Wikipedia
|
item-439 at level 4: list_item: Disclaimers
|
||||||
item-440 at level 4: list_item: Disclaimers
|
item-440 at level 4: list_item: Contact Wikipedia
|
||||||
item-441 at level 4: list_item: Contact Wikipedia
|
item-441 at level 4: list_item: Code of Conduct
|
||||||
item-442 at level 4: list_item: Code of Conduct
|
item-442 at level 4: list_item: Developers
|
||||||
item-443 at level 4: list_item: Developers
|
item-443 at level 4: list_item: Statistics
|
||||||
item-444 at level 4: list_item: Statistics
|
item-444 at level 4: list_item: Cookie statement
|
||||||
item-445 at level 4: list_item: Cookie statement
|
item-445 at level 4: list_item: Mobile view
|
||||||
item-446 at level 4: list_item: Mobile view
|
item-446 at level 3: list: group list
|
||||||
item-447 at level 3: list: group list
|
item-447 at level 3: list: group list
|
||||||
item-448 at level 4: list_item:
|
item-448 at level 1: caption: Pacific black duck displaying the characteristic upending "duck"
|
||||||
item-449 at level 4: list_item:
|
item-449 at level 1: caption: Male mallard.
|
||||||
item-450 at level 3: list: group list
|
item-450 at level 1: caption: Wood ducks.
|
||||||
item-451 at level 1: caption: Pacific black duck displaying the characteristic upending "duck"
|
item-451 at level 1: caption: Mallard landing in approach
|
||||||
item-452 at level 1: caption: Male mallard.
|
item-452 at level 1: caption: Male Mandarin duck
|
||||||
item-453 at level 1: caption: Wood ducks.
|
item-453 at level 1: caption: Flying steamer ducks in Ushuaia, Argentina
|
||||||
item-454 at level 1: caption: Mallard landing in approach
|
item-454 at level 1: caption: Female mallard in Cornwall, England
|
||||||
item-455 at level 1: caption: Male Mandarin duck
|
item-455 at level 1: caption: Pecten along the bill
|
||||||
item-456 at level 1: caption: Flying steamer ducks in Ushuaia, Argentina
|
item-456 at level 1: caption: Mallard duckling preening
|
||||||
item-457 at level 1: caption: Female mallard in Cornwall, England
|
item-457 at level 1: caption: A Muscovy duckling
|
||||||
item-458 at level 1: caption: Pecten along the bill
|
item-458 at level 1: caption: Ringed teal
|
||||||
item-459 at level 1: caption: Mallard duckling preening
|
item-459 at level 1: caption: Indian Runner ducks, a common breed of domestic ducks
|
||||||
item-460 at level 1: caption: A Muscovy duckling
|
item-460 at level 1: caption: Three black-colored ducks in the coat of arms of Maaninka[49]
|
||||||
item-461 at level 1: caption: Ringed teal
|
|
||||||
item-462 at level 1: caption: Indian Runner ducks, a common breed of domestic ducks
|
|
||||||
item-463 at level 1: caption: Three black-colored ducks in the coat of arms of Maaninka[49]
|
|
@ -1413,9 +1413,6 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"$ref": "#/texts/350"
|
"$ref": "#/texts/350"
|
||||||
},
|
|
||||||
{
|
|
||||||
"$ref": "#/texts/351"
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
@ -1428,14 +1425,14 @@
|
|||||||
"$ref": "#/texts/341"
|
"$ref": "#/texts/341"
|
||||||
},
|
},
|
||||||
"children": [
|
"children": [
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/351"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"$ref": "#/texts/352"
|
"$ref": "#/texts/352"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"$ref": "#/texts/353"
|
"$ref": "#/texts/353"
|
||||||
},
|
|
||||||
{
|
|
||||||
"$ref": "#/texts/354"
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
@ -1448,6 +1445,9 @@
|
|||||||
"$ref": "#/texts/341"
|
"$ref": "#/texts/341"
|
||||||
},
|
},
|
||||||
"children": [
|
"children": [
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/354"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"$ref": "#/texts/355"
|
"$ref": "#/texts/355"
|
||||||
},
|
},
|
||||||
@ -1522,9 +1522,6 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"$ref": "#/texts/379"
|
"$ref": "#/texts/379"
|
||||||
},
|
|
||||||
{
|
|
||||||
"$ref": "#/texts/380"
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
@ -1538,10 +1535,10 @@
|
|||||||
},
|
},
|
||||||
"children": [
|
"children": [
|
||||||
{
|
{
|
||||||
"$ref": "#/texts/381"
|
"$ref": "#/texts/380"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"$ref": "#/texts/382"
|
"$ref": "#/texts/381"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
@ -1554,6 +1551,9 @@
|
|||||||
"$ref": "#/texts/341"
|
"$ref": "#/texts/341"
|
||||||
},
|
},
|
||||||
"children": [
|
"children": [
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/382"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"$ref": "#/texts/383"
|
"$ref": "#/texts/383"
|
||||||
},
|
},
|
||||||
@ -1577,9 +1577,6 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"$ref": "#/texts/390"
|
"$ref": "#/texts/390"
|
||||||
},
|
|
||||||
{
|
|
||||||
"$ref": "#/texts/391"
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
@ -1591,14 +1588,7 @@
|
|||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/texts/341"
|
"$ref": "#/texts/341"
|
||||||
},
|
},
|
||||||
"children": [
|
"children": [],
|
||||||
{
|
|
||||||
"$ref": "#/texts/392"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"$ref": "#/texts/393"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"name": "list",
|
"name": "list",
|
||||||
"label": "list"
|
"label": "list"
|
||||||
@ -6774,27 +6764,13 @@
|
|||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "list_item",
|
"label": "list_item",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
|
||||||
"text": "",
|
|
||||||
"enumerated": false,
|
|
||||||
"marker": "-"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"self_ref": "#/texts/351",
|
|
||||||
"parent": {
|
|
||||||
"$ref": "#/groups/42"
|
|
||||||
},
|
|
||||||
"children": [],
|
|
||||||
"content_layer": "body",
|
|
||||||
"label": "list_item",
|
|
||||||
"prov": [],
|
|
||||||
"orig": "Ducks at a Distance, by Rob Hines at Project Gutenberg - A modern illustrated guide to identification of US waterfowl",
|
"orig": "Ducks at a Distance, by Rob Hines at Project Gutenberg - A modern illustrated guide to identification of US waterfowl",
|
||||||
"text": "Ducks at a Distance, by Rob Hines at Project Gutenberg - A modern illustrated guide to identification of US waterfowl",
|
"text": "Ducks at a Distance, by Rob Hines at Project Gutenberg - A modern illustrated guide to identification of US waterfowl",
|
||||||
"enumerated": false,
|
"enumerated": false,
|
||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/352",
|
"self_ref": "#/texts/351",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/43"
|
"$ref": "#/groups/43"
|
||||||
},
|
},
|
||||||
@ -6808,7 +6784,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/353",
|
"self_ref": "#/texts/352",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/43"
|
"$ref": "#/groups/43"
|
||||||
},
|
},
|
||||||
@ -6822,7 +6798,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/354",
|
"self_ref": "#/texts/353",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/43"
|
"$ref": "#/groups/43"
|
||||||
},
|
},
|
||||||
@ -6836,7 +6812,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/355",
|
"self_ref": "#/texts/354",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/44"
|
"$ref": "#/groups/44"
|
||||||
},
|
},
|
||||||
@ -6850,7 +6826,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/356",
|
"self_ref": "#/texts/355",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/44"
|
"$ref": "#/groups/44"
|
||||||
},
|
},
|
||||||
@ -6864,7 +6840,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/357",
|
"self_ref": "#/texts/356",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/44"
|
"$ref": "#/groups/44"
|
||||||
},
|
},
|
||||||
@ -6878,7 +6854,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/358",
|
"self_ref": "#/texts/357",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/44"
|
"$ref": "#/groups/44"
|
||||||
},
|
},
|
||||||
@ -6892,7 +6868,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/359",
|
"self_ref": "#/texts/358",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/44"
|
"$ref": "#/groups/44"
|
||||||
},
|
},
|
||||||
@ -6906,7 +6882,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/360",
|
"self_ref": "#/texts/359",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/44"
|
"$ref": "#/groups/44"
|
||||||
},
|
},
|
||||||
@ -6920,7 +6896,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/361",
|
"self_ref": "#/texts/360",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/44"
|
"$ref": "#/groups/44"
|
||||||
},
|
},
|
||||||
@ -6934,7 +6910,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/362",
|
"self_ref": "#/texts/361",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/44"
|
"$ref": "#/groups/44"
|
||||||
},
|
},
|
||||||
@ -6948,7 +6924,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/363",
|
"self_ref": "#/texts/362",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/44"
|
"$ref": "#/groups/44"
|
||||||
},
|
},
|
||||||
@ -6962,7 +6938,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/364",
|
"self_ref": "#/texts/363",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/44"
|
"$ref": "#/groups/44"
|
||||||
},
|
},
|
||||||
@ -6976,7 +6952,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/365",
|
"self_ref": "#/texts/364",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/44"
|
"$ref": "#/groups/44"
|
||||||
},
|
},
|
||||||
@ -6990,7 +6966,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/366",
|
"self_ref": "#/texts/365",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/44"
|
"$ref": "#/groups/44"
|
||||||
},
|
},
|
||||||
@ -7004,7 +6980,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/367",
|
"self_ref": "#/texts/366",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/44"
|
"$ref": "#/groups/44"
|
||||||
},
|
},
|
||||||
@ -7018,7 +6994,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/368",
|
"self_ref": "#/texts/367",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/44"
|
"$ref": "#/groups/44"
|
||||||
},
|
},
|
||||||
@ -7032,7 +7008,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/369",
|
"self_ref": "#/texts/368",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/44"
|
"$ref": "#/groups/44"
|
||||||
},
|
},
|
||||||
@ -7046,7 +7022,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/370",
|
"self_ref": "#/texts/369",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/44"
|
"$ref": "#/groups/44"
|
||||||
},
|
},
|
||||||
@ -7060,7 +7036,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/371",
|
"self_ref": "#/texts/370",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/44"
|
"$ref": "#/groups/44"
|
||||||
},
|
},
|
||||||
@ -7074,7 +7050,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/372",
|
"self_ref": "#/texts/371",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/44"
|
"$ref": "#/groups/44"
|
||||||
},
|
},
|
||||||
@ -7088,7 +7064,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/373",
|
"self_ref": "#/texts/372",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/44"
|
"$ref": "#/groups/44"
|
||||||
},
|
},
|
||||||
@ -7102,7 +7078,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/374",
|
"self_ref": "#/texts/373",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/44"
|
"$ref": "#/groups/44"
|
||||||
},
|
},
|
||||||
@ -7116,7 +7092,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/375",
|
"self_ref": "#/texts/374",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/44"
|
"$ref": "#/groups/44"
|
||||||
},
|
},
|
||||||
@ -7130,7 +7106,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/376",
|
"self_ref": "#/texts/375",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/44"
|
"$ref": "#/groups/44"
|
||||||
},
|
},
|
||||||
@ -7144,7 +7120,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/377",
|
"self_ref": "#/texts/376",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/44"
|
"$ref": "#/groups/44"
|
||||||
},
|
},
|
||||||
@ -7158,7 +7134,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/378",
|
"self_ref": "#/texts/377",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/44"
|
"$ref": "#/groups/44"
|
||||||
},
|
},
|
||||||
@ -7172,7 +7148,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/379",
|
"self_ref": "#/texts/378",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/44"
|
"$ref": "#/groups/44"
|
||||||
},
|
},
|
||||||
@ -7186,7 +7162,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/380",
|
"self_ref": "#/texts/379",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/44"
|
"$ref": "#/groups/44"
|
||||||
},
|
},
|
||||||
@ -7200,7 +7176,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/381",
|
"self_ref": "#/texts/380",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/45"
|
"$ref": "#/groups/45"
|
||||||
},
|
},
|
||||||
@ -7214,7 +7190,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/382",
|
"self_ref": "#/texts/381",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/45"
|
"$ref": "#/groups/45"
|
||||||
},
|
},
|
||||||
@ -7228,7 +7204,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/383",
|
"self_ref": "#/texts/382",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/46"
|
"$ref": "#/groups/46"
|
||||||
},
|
},
|
||||||
@ -7242,7 +7218,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/384",
|
"self_ref": "#/texts/383",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/46"
|
"$ref": "#/groups/46"
|
||||||
},
|
},
|
||||||
@ -7256,7 +7232,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/385",
|
"self_ref": "#/texts/384",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/46"
|
"$ref": "#/groups/46"
|
||||||
},
|
},
|
||||||
@ -7270,7 +7246,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/386",
|
"self_ref": "#/texts/385",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/46"
|
"$ref": "#/groups/46"
|
||||||
},
|
},
|
||||||
@ -7284,7 +7260,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/387",
|
"self_ref": "#/texts/386",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/46"
|
"$ref": "#/groups/46"
|
||||||
},
|
},
|
||||||
@ -7298,7 +7274,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/388",
|
"self_ref": "#/texts/387",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/46"
|
"$ref": "#/groups/46"
|
||||||
},
|
},
|
||||||
@ -7312,7 +7288,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/389",
|
"self_ref": "#/texts/388",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/46"
|
"$ref": "#/groups/46"
|
||||||
},
|
},
|
||||||
@ -7326,7 +7302,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/390",
|
"self_ref": "#/texts/389",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/46"
|
"$ref": "#/groups/46"
|
||||||
},
|
},
|
||||||
@ -7340,7 +7316,7 @@
|
|||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"self_ref": "#/texts/391",
|
"self_ref": "#/texts/390",
|
||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/groups/46"
|
"$ref": "#/groups/46"
|
||||||
},
|
},
|
||||||
@ -7352,34 +7328,6 @@
|
|||||||
"text": "Mobile view",
|
"text": "Mobile view",
|
||||||
"enumerated": false,
|
"enumerated": false,
|
||||||
"marker": "-"
|
"marker": "-"
|
||||||
},
|
|
||||||
{
|
|
||||||
"self_ref": "#/texts/392",
|
|
||||||
"parent": {
|
|
||||||
"$ref": "#/groups/47"
|
|
||||||
},
|
|
||||||
"children": [],
|
|
||||||
"content_layer": "body",
|
|
||||||
"label": "list_item",
|
|
||||||
"prov": [],
|
|
||||||
"orig": "",
|
|
||||||
"text": "",
|
|
||||||
"enumerated": false,
|
|
||||||
"marker": "-"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"self_ref": "#/texts/393",
|
|
||||||
"parent": {
|
|
||||||
"$ref": "#/groups/47"
|
|
||||||
},
|
|
||||||
"children": [],
|
|
||||||
"content_layer": "body",
|
|
||||||
"label": "list_item",
|
|
||||||
"prov": [],
|
|
||||||
"orig": "",
|
|
||||||
"text": "",
|
|
||||||
"enumerated": false,
|
|
||||||
"marker": "-"
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"pictures": [
|
"pictures": [
|
||||||
|
@ -473,7 +473,6 @@ The 1992 Disney film The Mighty Ducks, starring Emilio Estevez, chose the duck a
|
|||||||
|
|
||||||
- list of books (useful looking abstracts)
|
- list of books (useful looking abstracts)
|
||||||
- Ducks on postage stamps Archived 2013-05-13 at the Wayback Machine
|
- Ducks on postage stamps Archived 2013-05-13 at the Wayback Machine
|
||||||
-
|
|
||||||
- Ducks at a Distance, by Rob Hines at Project Gutenberg - A modern illustrated guide to identification of US waterfowl
|
- Ducks at a Distance, by Rob Hines at Project Gutenberg - A modern illustrated guide to identification of US waterfowl
|
||||||
|
|
||||||
| Authority control databases | Authority control databases |
|
| Authority control databases | Authority control databases |
|
||||||
@ -526,7 +525,4 @@ additional terms may apply. By using this site, you agree to the Terms of Use an
|
|||||||
- Developers
|
- Developers
|
||||||
- Statistics
|
- Statistics
|
||||||
- Cookie statement
|
- Cookie statement
|
||||||
- Mobile view
|
- Mobile view
|
||||||
|
|
||||||
-
|
|
||||||
-
|
|
Loading…
Reference in New Issue
Block a user