fix(HTML): ensure correct concatenation of child strings in table cells and list items

Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>
This commit is contained in:
Cesar Berrospi Ramis 2025-07-23 17:18:38 +02:00
parent 98e2fcff63
commit 53e68d3dc6
5 changed files with 162 additions and 134 deletions

View File

@ -5,7 +5,7 @@ from io import BytesIO
from pathlib import Path
from typing import Final, Optional, Union, cast
from bs4 import BeautifulSoup, NavigableString, Tag
from bs4 import BeautifulSoup, NavigableString, PageElement, Tag
from bs4.element import PreformattedString
from docling_core.types.doc import (
DocItem,
@ -297,7 +297,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
):
parts.append(child)
elif isinstance(child, Tag) and child.name not in ("ul", "ol"):
text_part = child.get_text()
text_part = HTMLDocumentBackend.get_text(child)
if text_part:
parts.append(text_part)
li_text = re.sub(r"\s+|\n+", " ", "".join(parts)).strip()
@ -417,6 +417,36 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
content_layer=self.content_layer,
)
@staticmethod
def get_text(item: PageElement) -> str:
"""Concatenate all child strings of a PageElement.
This method is equivalent to `PageElement.get_text()` but also considers
certain tags. When called on a <p> or <li> tags, it returns the text with a
trailing space, otherwise the text is concatenated without separators.
"""
def _extract_text_recursively(item: PageElement) -> list[str]:
"""Recursively extract text from all child nodes."""
result: list[str] = []
if isinstance(item, NavigableString):
result = [item]
elif isinstance(item, Tag):
tag = cast(Tag, item)
parts: list[str] = []
for child in tag:
parts.extend(_extract_text_recursively(child))
result.append(
"".join(parts) + " " if tag.name in {"p", "li"} else "".join(parts)
)
return result
parts: list[str] = _extract_text_recursively(item)
return "".join(parts)
@staticmethod
def _get_cell_spans(cell: Tag) -> tuple[int, int]:
"""Extract colspan and rowspan values from a table cell tag.
@ -510,9 +540,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
formula.replace_with(NavigableString(math_formula))
# TODO: extract content correctly from table-cells with lists
text = html_cell.text
# label = html_cell.name
text = HTMLDocumentBackend.get_text(html_cell).strip()
col_span, row_span = HTMLDocumentBackend._get_cell_spans(html_cell)
if row_header:
row_span -= 1

View File

@ -5839,7 +5839,7 @@
"end_row_offset_idx": 4,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": " Number of total districts",
"text": "Number of total districts",
"column_header": false,
"row_header": false,
"row_section": false
@ -6642,7 +6642,7 @@
"end_row_offset_idx": 4,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": " Number of total districts",
"text": "Number of total districts",
"column_header": false,
"row_header": false,
"row_section": false

View File

@ -4166,7 +4166,7 @@
"end_row_offset_idx": 6,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Ground corn",
"text": "Ground corn",
"column_header": false,
"row_header": false,
"row_section": false
@ -4298,7 +4298,7 @@
"end_row_offset_idx": 7,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Soybean meal",
"text": "Soybean meal",
"column_header": false,
"row_header": false,
"row_section": false
@ -4430,7 +4430,7 @@
"end_row_offset_idx": 8,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Corn silage",
"text": "Corn silage",
"column_header": false,
"row_header": false,
"row_section": false
@ -4562,7 +4562,7 @@
"end_row_offset_idx": 9,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Ann temperate pasture",
"text": "Ann temperate pasture",
"column_header": false,
"row_header": false,
"row_section": false
@ -4694,7 +4694,7 @@
"end_row_offset_idx": 10,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Ann tropical pasture",
"text": "Ann tropical pasture",
"column_header": false,
"row_header": false,
"row_section": false
@ -4826,7 +4826,7 @@
"end_row_offset_idx": 11,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Perenn tropical pasture",
"text": "Perenn tropical pasture",
"column_header": false,
"row_header": false,
"row_section": false
@ -4970,7 +4970,7 @@
"end_row_offset_idx": 13,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Organic matter",
"text": "Organic matter",
"column_header": false,
"row_header": false,
"row_section": false
@ -5102,7 +5102,7 @@
"end_row_offset_idx": 14,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Crude protein",
"text": "Crude protein",
"column_header": false,
"row_header": false,
"row_section": false
@ -5234,7 +5234,7 @@
"end_row_offset_idx": 15,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Neutral detergent fibre",
"text": "Neutral detergent fibre",
"column_header": false,
"row_header": false,
"row_section": false
@ -5366,7 +5366,7 @@
"end_row_offset_idx": 16,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Acid detergent fibre",
"text": "Acid detergent fibre",
"column_header": false,
"row_header": false,
"row_section": false
@ -5498,7 +5498,7 @@
"end_row_offset_idx": 17,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Ether extract",
"text": "Ether extract",
"column_header": false,
"row_header": false,
"row_section": false
@ -5642,7 +5642,7 @@
"end_row_offset_idx": 19,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    OM digestibility, %",
"text": "OM digestibility, %",
"column_header": false,
"row_header": false,
"row_section": false
@ -5774,7 +5774,7 @@
"end_row_offset_idx": 20,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    NEL, Mcal (kg DM)-1",
"text": "NEL, Mcal (kg DM)-1",
"column_header": false,
"row_header": false,
"row_section": false
@ -5906,7 +5906,7 @@
"end_row_offset_idx": 21,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    MP, g (kg DM)-1",
"text": "MP, g (kg DM)-1",
"column_header": false,
"row_header": false,
"row_section": false
@ -6713,7 +6713,7 @@
"end_row_offset_idx": 6,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Ground corn",
"text": "Ground corn",
"column_header": false,
"row_header": false,
"row_section": false
@ -6847,7 +6847,7 @@
"end_row_offset_idx": 7,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Soybean meal",
"text": "Soybean meal",
"column_header": false,
"row_header": false,
"row_section": false
@ -6981,7 +6981,7 @@
"end_row_offset_idx": 8,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Corn silage",
"text": "Corn silage",
"column_header": false,
"row_header": false,
"row_section": false
@ -7115,7 +7115,7 @@
"end_row_offset_idx": 9,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Ann temperate pasture",
"text": "Ann temperate pasture",
"column_header": false,
"row_header": false,
"row_section": false
@ -7249,7 +7249,7 @@
"end_row_offset_idx": 10,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Ann tropical pasture",
"text": "Ann tropical pasture",
"column_header": false,
"row_header": false,
"row_section": false
@ -7383,7 +7383,7 @@
"end_row_offset_idx": 11,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Perenn tropical pasture",
"text": "Perenn tropical pasture",
"column_header": false,
"row_header": false,
"row_section": false
@ -7651,7 +7651,7 @@
"end_row_offset_idx": 13,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Organic matter",
"text": "Organic matter",
"column_header": false,
"row_header": false,
"row_section": false
@ -7785,7 +7785,7 @@
"end_row_offset_idx": 14,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Crude protein",
"text": "Crude protein",
"column_header": false,
"row_header": false,
"row_section": false
@ -7919,7 +7919,7 @@
"end_row_offset_idx": 15,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Neutral detergent fibre",
"text": "Neutral detergent fibre",
"column_header": false,
"row_header": false,
"row_section": false
@ -8053,7 +8053,7 @@
"end_row_offset_idx": 16,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Acid detergent fibre",
"text": "Acid detergent fibre",
"column_header": false,
"row_header": false,
"row_section": false
@ -8187,7 +8187,7 @@
"end_row_offset_idx": 17,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Ether extract",
"text": "Ether extract",
"column_header": false,
"row_header": false,
"row_section": false
@ -8455,7 +8455,7 @@
"end_row_offset_idx": 19,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    OM digestibility, %",
"text": "OM digestibility, %",
"column_header": false,
"row_header": false,
"row_section": false
@ -8589,7 +8589,7 @@
"end_row_offset_idx": 20,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    NEL, Mcal (kg DM)-1",
"text": "NEL, Mcal (kg DM)-1",
"column_header": false,
"row_header": false,
"row_section": false
@ -8723,7 +8723,7 @@
"end_row_offset_idx": 21,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    MP, g (kg DM)-1",
"text": "MP, g (kg DM)-1",
"column_header": false,
"row_header": false,
"row_section": false
@ -8998,7 +8998,7 @@
"end_row_offset_idx": 3,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Corn grain",
"text": "Corn grain",
"column_header": false,
"row_header": false,
"row_section": false
@ -9058,7 +9058,7 @@
"end_row_offset_idx": 4,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Soybean",
"text": "Soybean",
"column_header": false,
"row_header": false,
"row_section": false
@ -9178,7 +9178,7 @@
"end_row_offset_idx": 6,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Corn silageb",
"text": "Corn silageb",
"column_header": false,
"row_header": false,
"row_section": false
@ -9238,7 +9238,7 @@
"end_row_offset_idx": 7,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Annual ryegrassc",
"text": "Annual ryegrassc",
"column_header": false,
"row_header": false,
"row_section": false
@ -9298,7 +9298,7 @@
"end_row_offset_idx": 8,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Pearl milletd",
"text": "Pearl milletd",
"column_header": false,
"row_header": false,
"row_section": false
@ -9358,7 +9358,7 @@
"end_row_offset_idx": 9,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Kikuyu grasse",
"text": "Kikuyu grasse",
"column_header": false,
"row_header": false,
"row_section": false
@ -9547,7 +9547,7 @@
"end_row_offset_idx": 3,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Corn grain",
"text": "Corn grain",
"column_header": false,
"row_header": false,
"row_section": false
@ -9609,7 +9609,7 @@
"end_row_offset_idx": 4,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Soybean",
"text": "Soybean",
"column_header": false,
"row_header": false,
"row_section": false
@ -9733,7 +9733,7 @@
"end_row_offset_idx": 6,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Corn silageb",
"text": "Corn silageb",
"column_header": false,
"row_header": false,
"row_section": false
@ -9795,7 +9795,7 @@
"end_row_offset_idx": 7,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Annual ryegrassc",
"text": "Annual ryegrassc",
"column_header": false,
"row_header": false,
"row_section": false
@ -9857,7 +9857,7 @@
"end_row_offset_idx": 8,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Pearl milletd",
"text": "Pearl milletd",
"column_header": false,
"row_header": false,
"row_section": false
@ -9919,7 +9919,7 @@
"end_row_offset_idx": 9,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Kikuyu grasse",
"text": "Kikuyu grasse",
"column_header": false,
"row_header": false,
"row_section": false
@ -10182,7 +10182,7 @@
"end_row_offset_idx": 4,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    N organic fertilizer, kg ha-1a",
"text": "N organic fertilizer, kg ha-1a",
"column_header": false,
"row_header": false,
"row_section": false
@ -10242,7 +10242,7 @@
"end_row_offset_idx": 5,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    N synthetic fertilizer",
"text": "N synthetic fertilizer",
"column_header": false,
"row_header": false,
"row_section": false
@ -10302,7 +10302,7 @@
"end_row_offset_idx": 6,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    N from residual DM, kg ha-1b",
"text": "N from residual DM, kg ha-1b",
"column_header": false,
"row_header": false,
"row_section": false
@ -10362,7 +10362,7 @@
"end_row_offset_idx": 7,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Emission fator, kg N2O-N (kg N)-1c",
"text": "Emission fator, kg N2O-N (kg N)-1c",
"column_header": false,
"row_header": false,
"row_section": false
@ -10422,7 +10422,7 @@
"end_row_offset_idx": 8,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    kg N2O ha-1 from direct emissions",
"text": "kg N2O ha-1 from direct emissions",
"column_header": false,
"row_header": false,
"row_section": false
@ -10542,7 +10542,7 @@
"end_row_offset_idx": 10,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    kg NH3-N+NOx-N (kg organic N)-1b",
"text": "kg NH3-N+NOx-N (kg organic N)-1b",
"column_header": false,
"row_header": false,
"row_section": false
@ -10602,7 +10602,7 @@
"end_row_offset_idx": 11,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    kg NH3-N+NOx-N (kg synthetic N)-1b",
"text": "kg NH3-N+NOx-N (kg synthetic N)-1b",
"column_header": false,
"row_header": false,
"row_section": false
@ -10662,7 +10662,7 @@
"end_row_offset_idx": 12,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    kg N2O-N (kg NH3-N+NOx-N)-1b",
"text": "kg N2O-N (kg NH3-N+NOx-N)-1b",
"column_header": false,
"row_header": false,
"row_section": false
@ -10722,7 +10722,7 @@
"end_row_offset_idx": 13,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    kg N2O ha-1 from NH3+NOx volatilized",
"text": "kg N2O ha-1 from NH3+NOx volatilized",
"column_header": false,
"row_header": false,
"row_section": false
@ -10842,7 +10842,7 @@
"end_row_offset_idx": 15,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    kg N losses by leaching (kg N)-1b",
"text": "kg N losses by leaching (kg N)-1b",
"column_header": false,
"row_header": false,
"row_section": false
@ -10902,7 +10902,7 @@
"end_row_offset_idx": 16,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    kg N2O-N (kg N leaching)-1",
"text": "kg N2O-N (kg N leaching)-1",
"column_header": false,
"row_header": false,
"row_section": false
@ -10962,7 +10962,7 @@
"end_row_offset_idx": 17,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    kg N2O ha-1 from N losses by leaching",
"text": "kg N2O ha-1 from N losses by leaching",
"column_header": false,
"row_header": false,
"row_section": false
@ -11873,7 +11873,7 @@
"end_row_offset_idx": 4,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    N organic fertilizer, kg ha-1a",
"text": "N organic fertilizer, kg ha-1a",
"column_header": false,
"row_header": false,
"row_section": false
@ -11935,7 +11935,7 @@
"end_row_offset_idx": 5,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    N synthetic fertilizer",
"text": "N synthetic fertilizer",
"column_header": false,
"row_header": false,
"row_section": false
@ -11997,7 +11997,7 @@
"end_row_offset_idx": 6,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    N from residual DM, kg ha-1b",
"text": "N from residual DM, kg ha-1b",
"column_header": false,
"row_header": false,
"row_section": false
@ -12059,7 +12059,7 @@
"end_row_offset_idx": 7,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Emission fator, kg N2O-N (kg N)-1c",
"text": "Emission fator, kg N2O-N (kg N)-1c",
"column_header": false,
"row_header": false,
"row_section": false
@ -12121,7 +12121,7 @@
"end_row_offset_idx": 8,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    kg N2O ha-1 from direct emissions",
"text": "kg N2O ha-1 from direct emissions",
"column_header": false,
"row_header": false,
"row_section": false
@ -12245,7 +12245,7 @@
"end_row_offset_idx": 10,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    kg NH3-N+NOx-N (kg organic N)-1b",
"text": "kg NH3-N+NOx-N (kg organic N)-1b",
"column_header": false,
"row_header": false,
"row_section": false
@ -12307,7 +12307,7 @@
"end_row_offset_idx": 11,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    kg NH3-N+NOx-N (kg synthetic N)-1b",
"text": "kg NH3-N+NOx-N (kg synthetic N)-1b",
"column_header": false,
"row_header": false,
"row_section": false
@ -12369,7 +12369,7 @@
"end_row_offset_idx": 12,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    kg N2O-N (kg NH3-N+NOx-N)-1b",
"text": "kg N2O-N (kg NH3-N+NOx-N)-1b",
"column_header": false,
"row_header": false,
"row_section": false
@ -12431,7 +12431,7 @@
"end_row_offset_idx": 13,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    kg N2O ha-1 from NH3+NOx volatilized",
"text": "kg N2O ha-1 from NH3+NOx volatilized",
"column_header": false,
"row_header": false,
"row_section": false
@ -12555,7 +12555,7 @@
"end_row_offset_idx": 15,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    kg N losses by leaching (kg N)-1b",
"text": "kg N losses by leaching (kg N)-1b",
"column_header": false,
"row_header": false,
"row_section": false
@ -12617,7 +12617,7 @@
"end_row_offset_idx": 16,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    kg N2O-N (kg N leaching)-1",
"text": "kg N2O-N (kg N leaching)-1",
"column_header": false,
"row_header": false,
"row_section": false
@ -12679,7 +12679,7 @@
"end_row_offset_idx": 17,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    kg N2O ha-1 from N losses by leaching",
"text": "kg N2O ha-1 from N losses by leaching",
"column_header": false,
"row_header": false,
"row_section": false
@ -13780,7 +13780,7 @@
"end_row_offset_idx": 8,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Fuel for manure handling",
"text": "Fuel for manure handling",
"column_header": false,
"row_header": false,
"row_section": false
@ -13828,7 +13828,7 @@
"end_row_offset_idx": 9,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Machinery for manure handling",
"text": "Machinery for manure handling",
"column_header": false,
"row_header": false,
"row_section": false
@ -13924,7 +13924,7 @@
"end_row_offset_idx": 11,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Electricity for milking",
"text": "Electricity for milking",
"column_header": false,
"row_header": false,
"row_section": false
@ -13972,7 +13972,7 @@
"end_row_offset_idx": 12,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Electricity for lightingd",
"text": "Electricity for lightingd",
"column_header": false,
"row_header": false,
"row_section": false
@ -14375,7 +14375,7 @@
"end_row_offset_idx": 8,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Fuel for manure handling",
"text": "Fuel for manure handling",
"column_header": false,
"row_header": false,
"row_section": false
@ -14425,7 +14425,7 @@
"end_row_offset_idx": 9,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Machinery for manure handling",
"text": "Machinery for manure handling",
"column_header": false,
"row_header": false,
"row_section": false
@ -14525,7 +14525,7 @@
"end_row_offset_idx": 11,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Electricity for milking",
"text": "Electricity for milking",
"column_header": false,
"row_header": false,
"row_section": false
@ -14575,7 +14575,7 @@
"end_row_offset_idx": 12,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Electricity for lightingd",
"text": "Electricity for lightingd",
"column_header": false,
"row_header": false,
"row_section": false

View File

@ -8410,7 +8410,7 @@
"end_row_offset_idx": 1,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Duck\n",
"text": "Duck",
"column_header": true,
"row_header": false,
"row_section": false
@ -8422,7 +8422,7 @@
"end_row_offset_idx": 2,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "\n",
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
@ -8434,7 +8434,7 @@
"end_row_offset_idx": 3,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Bufflehead\n(Bucephala albeola)\n",
"text": "Bufflehead\n(Bucephala albeola)",
"column_header": false,
"row_header": false,
"row_section": false
@ -8446,7 +8446,7 @@
"end_row_offset_idx": 4,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Scientific classification \n",
"text": "Scientific classification",
"column_header": true,
"row_header": false,
"row_section": false
@ -8458,7 +8458,7 @@
"end_row_offset_idx": 5,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Domain:\n",
"text": "Domain:",
"column_header": false,
"row_header": false,
"row_section": false
@ -8470,7 +8470,7 @@
"end_row_offset_idx": 5,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Eukaryota\n",
"text": "Eukaryota",
"column_header": false,
"row_header": false,
"row_section": false
@ -8482,7 +8482,7 @@
"end_row_offset_idx": 6,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Kingdom:\n",
"text": "Kingdom:",
"column_header": false,
"row_header": false,
"row_section": false
@ -8494,7 +8494,7 @@
"end_row_offset_idx": 6,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Animalia\n",
"text": "Animalia",
"column_header": false,
"row_header": false,
"row_section": false
@ -8506,7 +8506,7 @@
"end_row_offset_idx": 7,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Phylum:\n",
"text": "Phylum:",
"column_header": false,
"row_header": false,
"row_section": false
@ -8518,7 +8518,7 @@
"end_row_offset_idx": 7,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Chordata\n",
"text": "Chordata",
"column_header": false,
"row_header": false,
"row_section": false
@ -8530,7 +8530,7 @@
"end_row_offset_idx": 8,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Class:\n",
"text": "Class:",
"column_header": false,
"row_header": false,
"row_section": false
@ -8542,7 +8542,7 @@
"end_row_offset_idx": 8,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Aves\n",
"text": "Aves",
"column_header": false,
"row_header": false,
"row_section": false
@ -8554,7 +8554,7 @@
"end_row_offset_idx": 9,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Order:\n",
"text": "Order:",
"column_header": false,
"row_header": false,
"row_section": false
@ -8566,7 +8566,7 @@
"end_row_offset_idx": 9,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Anseriformes\n",
"text": "Anseriformes",
"column_header": false,
"row_header": false,
"row_section": false
@ -8578,7 +8578,7 @@
"end_row_offset_idx": 10,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Superfamily:\n",
"text": "Superfamily:",
"column_header": false,
"row_header": false,
"row_section": false
@ -8590,7 +8590,7 @@
"end_row_offset_idx": 10,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Anatoidea\n",
"text": "Anatoidea",
"column_header": false,
"row_header": false,
"row_section": false
@ -8602,7 +8602,7 @@
"end_row_offset_idx": 11,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Family:\n",
"text": "Family:",
"column_header": false,
"row_header": false,
"row_section": false
@ -8614,7 +8614,7 @@
"end_row_offset_idx": 11,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Anatidae\n",
"text": "Anatidae",
"column_header": false,
"row_header": false,
"row_section": false
@ -8626,7 +8626,7 @@
"end_row_offset_idx": 12,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Subfamilies\n",
"text": "Subfamilies",
"column_header": true,
"row_header": false,
"row_section": false
@ -8638,7 +8638,7 @@
"end_row_offset_idx": 13,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "\nSee text\n\n",
"text": "See text",
"column_header": false,
"row_header": false,
"row_section": false
@ -8655,7 +8655,7 @@
"end_row_offset_idx": 1,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Duck\n",
"text": "Duck",
"column_header": true,
"row_header": false,
"row_section": false
@ -8667,7 +8667,7 @@
"end_row_offset_idx": 1,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Duck\n",
"text": "Duck",
"column_header": true,
"row_header": false,
"row_section": false
@ -8681,7 +8681,7 @@
"end_row_offset_idx": 2,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "\n",
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
@ -8693,7 +8693,7 @@
"end_row_offset_idx": 2,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "\n",
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
@ -8707,7 +8707,7 @@
"end_row_offset_idx": 3,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Bufflehead\n(Bucephala albeola)\n",
"text": "Bufflehead\n(Bucephala albeola)",
"column_header": false,
"row_header": false,
"row_section": false
@ -8719,7 +8719,7 @@
"end_row_offset_idx": 3,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Bufflehead\n(Bucephala albeola)\n",
"text": "Bufflehead\n(Bucephala albeola)",
"column_header": false,
"row_header": false,
"row_section": false
@ -8733,7 +8733,7 @@
"end_row_offset_idx": 4,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Scientific classification \n",
"text": "Scientific classification",
"column_header": true,
"row_header": false,
"row_section": false
@ -8745,7 +8745,7 @@
"end_row_offset_idx": 4,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Scientific classification \n",
"text": "Scientific classification",
"column_header": true,
"row_header": false,
"row_section": false
@ -8759,7 +8759,7 @@
"end_row_offset_idx": 5,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Domain:\n",
"text": "Domain:",
"column_header": false,
"row_header": false,
"row_section": false
@ -8771,7 +8771,7 @@
"end_row_offset_idx": 5,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Eukaryota\n",
"text": "Eukaryota",
"column_header": false,
"row_header": false,
"row_section": false
@ -8785,7 +8785,7 @@
"end_row_offset_idx": 6,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Kingdom:\n",
"text": "Kingdom:",
"column_header": false,
"row_header": false,
"row_section": false
@ -8797,7 +8797,7 @@
"end_row_offset_idx": 6,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Animalia\n",
"text": "Animalia",
"column_header": false,
"row_header": false,
"row_section": false
@ -8811,7 +8811,7 @@
"end_row_offset_idx": 7,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Phylum:\n",
"text": "Phylum:",
"column_header": false,
"row_header": false,
"row_section": false
@ -8823,7 +8823,7 @@
"end_row_offset_idx": 7,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Chordata\n",
"text": "Chordata",
"column_header": false,
"row_header": false,
"row_section": false
@ -8837,7 +8837,7 @@
"end_row_offset_idx": 8,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Class:\n",
"text": "Class:",
"column_header": false,
"row_header": false,
"row_section": false
@ -8849,7 +8849,7 @@
"end_row_offset_idx": 8,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Aves\n",
"text": "Aves",
"column_header": false,
"row_header": false,
"row_section": false
@ -8863,7 +8863,7 @@
"end_row_offset_idx": 9,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Order:\n",
"text": "Order:",
"column_header": false,
"row_header": false,
"row_section": false
@ -8875,7 +8875,7 @@
"end_row_offset_idx": 9,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Anseriformes\n",
"text": "Anseriformes",
"column_header": false,
"row_header": false,
"row_section": false
@ -8889,7 +8889,7 @@
"end_row_offset_idx": 10,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Superfamily:\n",
"text": "Superfamily:",
"column_header": false,
"row_header": false,
"row_section": false
@ -8901,7 +8901,7 @@
"end_row_offset_idx": 10,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Anatoidea\n",
"text": "Anatoidea",
"column_header": false,
"row_header": false,
"row_section": false
@ -8915,7 +8915,7 @@
"end_row_offset_idx": 11,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Family:\n",
"text": "Family:",
"column_header": false,
"row_header": false,
"row_section": false
@ -8927,7 +8927,7 @@
"end_row_offset_idx": 11,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Anatidae\n",
"text": "Anatidae",
"column_header": false,
"row_header": false,
"row_section": false
@ -8941,7 +8941,7 @@
"end_row_offset_idx": 12,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Subfamilies\n",
"text": "Subfamilies",
"column_header": true,
"row_header": false,
"row_section": false
@ -8953,7 +8953,7 @@
"end_row_offset_idx": 12,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Subfamilies\n",
"text": "Subfamilies",
"column_header": true,
"row_header": false,
"row_section": false
@ -8967,7 +8967,7 @@
"end_row_offset_idx": 13,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "\nSee text\n\n",
"text": "See text",
"column_header": false,
"row_header": false,
"row_section": false
@ -8979,7 +8979,7 @@
"end_row_offset_idx": 13,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "\nSee text\n\n",
"text": "See text",
"column_header": false,
"row_header": false,
"row_section": false
@ -9010,7 +9010,7 @@
"end_row_offset_idx": 1,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Authority control databases ",
"text": "Authority control databases",
"column_header": true,
"row_header": false,
"row_section": false
@ -9034,7 +9034,7 @@
"end_row_offset_idx": 2,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "United StatesFranceBnF dataJapanLatviaIsrael",
"text": "United States France BnF data Japan Latvia Israel",
"column_header": false,
"row_header": false,
"row_section": false
@ -9075,7 +9075,7 @@
"end_row_offset_idx": 1,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Authority control databases ",
"text": "Authority control databases",
"column_header": true,
"row_header": false,
"row_section": false
@ -9087,7 +9087,7 @@
"end_row_offset_idx": 1,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Authority control databases ",
"text": "Authority control databases",
"column_header": true,
"row_header": false,
"row_section": false
@ -9113,7 +9113,7 @@
"end_row_offset_idx": 2,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "United StatesFranceBnF dataJapanLatviaIsrael",
"text": "United States France BnF data Japan Latvia Israel",
"column_header": false,
"row_header": false,
"row_section": false

View File

@ -511,10 +511,10 @@ Duck at Wikipedia's sister projects
<!-- image -->
| Authority control databases | Authority control databases |
|--------------------------------|----------------------------------------------|
| National | United StatesFranceBnF dataJapanLatviaIsrael |
| Other | IdRef |
| Authority control databases | Authority control databases |
|-------------------------------|---------------------------------------------------|
| National | United States France BnF data Japan Latvia Israel |
| Other | IdRef |
Retrieved from "https://en.wikipedia.org/w/index.php?title=Duck&amp;oldid=1246843351"