diff --git a/tests/data/groundtruth/docling_v2/elife-56337.xml.json b/tests/data/groundtruth/docling_v2/elife-56337.xml.json index 63b89d7a..a9a02154 100644 --- a/tests/data/groundtruth/docling_v2/elife-56337.xml.json +++ b/tests/data/groundtruth/docling_v2/elife-56337.xml.json @@ -10,6 +10,7 @@ "furniture": { "self_ref": "#/furniture", "children": [], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -59,6 +60,7 @@ "$ref": "#/texts/63" } ], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -238,6 +240,7 @@ "$ref": "#/texts/120" } ], + "content_layer": "body", "name": "list", "label": "list" } @@ -277,6 +280,7 @@ "$ref": "#/texts/64" } ], + "content_layer": "body", "label": "title", "prov": [], "orig": "KRAB-zinc finger protein gene expansion in response to active retrotransposons in the murine lineage", @@ -288,6 +292,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Wolf Gernot; 1: The Eunice Kennedy Shriver National Institute of Child Health and Human Development, The National Institutes of Health: Bethesda: United States; de Iaco Alberto; 2: School of Life Sciences, \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne (EPFL): Lausanne: Switzerland; Sun Ming-An; 1: The Eunice Kennedy Shriver National Institute of Child Health and Human Development, The National Institutes of Health: Bethesda: United States; Bruno Melania; 1: The Eunice Kennedy Shriver National Institute of Child Health and Human Development, The National Institutes of Health: Bethesda: United States; Tinkham Matthew; 1: The Eunice Kennedy Shriver National Institute of Child Health and Human Development, The National Institutes of Health: Bethesda: United States; Hoang Don; 1: The Eunice Kennedy Shriver National Institute of Child Health and Human Development, The National Institutes of Health: Bethesda: United States; Mitra Apratim; 1: The Eunice Kennedy Shriver National Institute of Child Health and Human Development, The National Institutes of Health: Bethesda: United States; Ralls Sherry; 1: The Eunice Kennedy Shriver National Institute of Child Health and Human Development, The National Institutes of Health: Bethesda: United States; Trono Didier; 2: School of Life Sciences, \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne (EPFL): Lausanne: Switzerland; Macfarlan Todd S; 1: The Eunice Kennedy Shriver National Institute of Child Health and Human Development, The National Institutes of Health: Bethesda: United States", @@ -303,6 +308,7 @@ "$ref": "#/texts/3" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Abstract", @@ -315,6 +321,7 @@ "$ref": "#/texts/2" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "The Kr\u00fcppel-associated box zinc finger protein (KRAB-ZFP) family diversified in mammals. The majority of human KRAB-ZFPs bind transposable elements (TEs), however, since most TEs are inactive in humans it is unclear whether KRAB-ZFPs emerged to suppress TEs. We demonstrate that many recently emerged murine KRAB-ZFPs also bind to TEs, including the active ETn, IAP, and L1 families. Using a CRISPR/Cas9-based engineering approach, we genetically deleted five large clusters of KRAB-ZFPs and demonstrate that target TEs are de-repressed, unleashing TE-encoded enhancers. Homozygous knockout mice lacking one of two KRAB-ZFP gene clusters on chromosome 2 and chromosome 4 were nonetheless viable. In pedigrees of chromosome 4 cluster KRAB-ZFP mutants, we identified numerous novel ETn insertions with a modest increase in mutants. Our data strongly support the current model that recent waves of retrotransposon activity drove the expansion of KRAB-ZFP genes in mice and that many KRAB-ZFPs play a redundant role restricting TE activity.", @@ -333,6 +340,7 @@ "$ref": "#/texts/6" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Introduction", @@ -345,6 +353,7 @@ "$ref": "#/texts/4" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "Nearly half of the human and mouse genomes consist of transposable elements (TEs). TEs shape the evolution of species, serving as a source for genetic innovation (Chuong et al., 2016; Frank and Feschotte, 2017). However, TEs also potentially harm their hosts by insertional mutagenesis, gene deregulation and activation of innate immunity (Maksakova et al., 2006; Kano et al., 2007; Brodziak et al., 2012; Hancks and Kazazian, 2016). To protect themselves from TE activity, host organisms have developed a wide range of defense mechanisms targeting virtually all steps of the TE life cycle (Dewannieux and Heidmann, 2013). In tetrapods, KRAB zinc finger protein (KRAB-ZFP) genes have amplified and diversified, likely in response to TE colonization (Thomas and Schneider, 2011; Najafabadi et al., 2015; Wolf et al., 2015a; Wolf et al., 2015b; Imbeault et al., 2017). Conventional ZFPs bind DNA using tandem arrays of C2H2 zinc finger domains, each capable of specifically interacting with three nucleotides, whereas some zinc fingers can bind two or four nucleotides and include DNA backbone interactions depending on target DNA structure (Patel et al., 2018). This allows KRAB-ZFPs to flexibly bind to large stretches of DNA with high affinity. The KRAB domain binds the corepressor KAP1, which in turn recruits histone modifying enzymes including the NuRD histone deacetylase complex and the H3K9-specific methylase SETDB1 (Schultz et al., 2002; Sripathy et al., 2006), which induces persistent and heritable gene silencing (Groner et al., 2010). Deletion of KAP1 (Rowe et al., 2010) or SETDB1 (Matsui et al., 2010) in mouse embryonic stem (ES) cells induces TE reactivation and cell death, but only minor phenotypes in differentiated cells, suggesting KRAB-ZFPs are most important during early embryogenesis where they mark TEs for stable epigenetic silencing that persists through development. However, SETDB1-containing complexes are also required to repress TEs in primordial germ cells (Liu et al., 2014) and adult tissues (Ecco et al., 2016), indicating KRAB-ZFPs are active beyond early development.", @@ -356,6 +365,7 @@ "$ref": "#/texts/4" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "TEs, especially long terminal repeat (LTR) retrotransposons, also known as endogenous retroviruses (ERVs), can affect expression of neighboring genes through their promoter and enhancer functions (Macfarlan et al., 2012; Wang et al., 2014; Thompson et al., 2016). KAP1 deletion in mouse ES cells causes rapid gene deregulation (Rowe et al., 2013), indicating that KRAB-ZFPs may regulate gene expression by recruiting KAP1 to TEs. Indeed, Zfp809 knock-out (KO) in mice resulted in transcriptional activation of a handful of genes in various tissues adjacent to ZFP809-targeted VL30-Pro elements (Wolf et al., 2015b). It has therefore been speculated that KRAB-ZFPs bind to TE sequences to domesticate them for gene regulatory innovation (Ecco et al., 2017). This idea is supported by the observation that many human KRAB-ZFPs target TE groups that have lost their coding potential millions of years ago and that KRAB-ZFP target sequences within TEs are in some cases under purifying selection (Imbeault et al., 2017). However, there are also clear signs of an evolutionary arms-race between human TEs and KRAB-ZFPs (Jacobs et al., 2014), indicating that some KRAB-ZFPs may limit TE mobility for stretches of evolutionary time, prior to their ultimate loss from the genome or adaptation for other regulatory functions. Here we use the laboratory mouse, which has undergone a recent expansion of the KRAB-ZFP family, to determine the in vivo requirement of the majority of evolutionarily young KRAB-ZFP genes.", @@ -380,6 +390,7 @@ "$ref": "#/texts/19" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Results", @@ -408,6 +419,7 @@ "$ref": "#/texts/13" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Mouse KRAB-ZFPs target retrotransposons", @@ -420,6 +432,7 @@ "$ref": "#/texts/8" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "We analyzed the RNA expression profiles of mouse KRAB-ZFPs across a wide range of tissues to identify candidates active in early embryos/ES cells. While the majority of KRAB-ZFPs are expressed at low levels and uniformly across tissues, a group of KRAB-ZFPs are highly and almost exclusively expressed in ES cells (Figure 1\u2014figure supplement 1A). About two thirds of these KRAB-ZFPs are physically linked in two clusters on chromosome 2 (Chr2-cl) and 4 (Chr4-cl) (Figure 1\u2014figure supplement 1B). These two clusters encode 40 and 21 KRAB-ZFP annotated genes, respectively, which, with one exception on Chr4-cl, do not have orthologues in rat or any other sequenced mammals (Supplementary file 1). The KRAB-ZFPs within these two genomic clusters also group together phylogenetically (Figure 1\u2014figure supplement 1C), indicating these gene clusters arose by a series of recent segmental gene duplications (Kauzlaric et al., 2017).", @@ -431,6 +444,7 @@ "$ref": "#/texts/8" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "To determine the binding sites of the KRAB-ZFPs within these and other gene clusters, we expressed epitope-tagged KRAB-ZFPs using stably integrating vectors in mouse embryonic carcinoma (EC) or ES cells (Table 1, Supplementary file 1) and performed chromatin immunoprecipitation followed by deep sequencing (ChIP-seq). We then determined whether the identified binding sites are significantly enriched over annotated TEs and used the non-repetitive peak fraction to identify binding motifs. We discarded 7 of 68 ChIP-seq datasets because we could not obtain a binding motif or a target TE and manual inspection confirmed low signal to noise ratio. Of the remaining 61 KRAB-ZFPs, 51 significantly overlapped at least one TE subfamily (adjusted p-value<1e-5). Altogether, 81 LTR retrotransposon, 18 LINE, 10 SINE and one DNA transposon subfamilies were targeted by at least one of the 51 KRAB-ZFPs (Figure 1A and Supplementary file 1). Chr2-cl KRAB-ZFPs preferably bound IAPEz retrotransposons and L1-type LINEs, while Chr4-cl KRAB-ZFPs targeted various retrotransposons, including the closely related MMETn (hereafter referred to as ETn) and ETnERV (also known as MusD) elements (Figure 1A). ETn elements are non-autonomous LTR retrotransposons that require trans-complementation by the fully coding ETnERV elements that contain Gag, Pro and Pol genes (Ribet et al., 2004). These elements have accumulated to\u00a0~240 and~100 copies in the reference C57BL/6 genome, respectively, with\u00a0~550 solitary LTRs (Baust et al., 2003). Both ETn and ETnERVs are still active, generating polymorphisms and mutations in several mouse strains (Gagnier et al., 2019). The validity of our ChIP-seq screen was confirmed by the identification of binding motifs - which often resembled the computationally predicted motifs (Figure 1\u2014figure supplement 2A) - for the majority of screened KRAB-ZFPs (Supplementary file 1). Moreover, predicted and experimentally determined motifs were found in targeted TEs in most cases (Supplementary file 1), and reporter repression assays confirmed KRAB-ZFP induced silencing for all the tested sequences (Figure 1\u2014figure supplement 2B). Finally, we observed KAP1 and H3K9me3 enrichment at most of the targeted TEs in wild type ES cells, indicating that most of these KRAB-ZFPs are functionally active in the early embryo (Figure 1A).", @@ -442,6 +456,7 @@ "$ref": "#/texts/8" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "We generally observed that KRAB-ZFPs present exclusively in mouse target TEs that are restricted to the mouse genome, indicating KRAB-ZFPs and their targets emerged together. For example, several mouse-specific KRAB-ZFPs in Chr2-cl and Chr4-cl target IAP and ETn elements which are only found in the mouse genome and are highly active. This is the strongest data to date supporting that recent KRAB-ZFP expansions in these young clusters is a response to recent TE activity. Likewise, ZFP599 and ZFP617, both conserved in Muroidea, bind to various ORR1-type LTRs which are present in the rat genome (Supplementary file 1). However, ZFP961, a KRAB-ZFP encoded on a small gene cluster on chromosome 8 that is conserved in Muroidea targets TEs that are only found in the mouse genome (e.g. ETn), a paradox we have previously observed with ZFP809, which also targets TEs that are evolutionarily younger than itself (Wolf et al., 2015b). The ZFP961 binding site is located at the 5\u2019 end of the internal region of ETn and ETnERV elements, a sequence that usually contains the primer binding site (PBS), which is required to prime retroviral reverse transcription. Indeed, the ZFP961 motif closely resembles the PBSLys1,2 (Figure 1\u2014figure supplement 3A), which had been previously identified as a KAP1-dependent target of retroviral repression (Yamauchi et al., 1995; Wolf et al., 2008). Repression of the PBSLys1,2 by ZFP961 was also confirmed in reporter assays (Figure 1\u2014figure supplement 2B), indicating that ZFP961 is likely responsible for this silencing effect.", @@ -453,6 +468,7 @@ "$ref": "#/texts/8" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "To further test the hypothesis that KRAB-ZFPs target sites necessary for retrotransposition, we utilized previously generated ETn and ETnERV retrotransposition reporters in which we mutated KRAB-ZFP binding sites (Ribet et al., 2004). Whereas the ETnERV reporters are sufficient for retrotransposition, the ETn reporter requires ETnERV genes supplied in trans. We tested and confirmed that the REX2/ZFP600 and GM13051 binding sites within these TEs are required for efficient retrotransposition (Figure 1\u2014figure supplement 3B). REX2 and ZFP600 both bind a target about 200 bp from the start of the internal region (Figure 1B), a region that often encodes the packaging signal. GM13051 binds a target coding for part of a highly structured mRNA export signal (Legiewicz et al., 2010) near the 3\u2019 end of the internal region of ETn (Figure 1\u2014figure supplement 3C). Both signals are characterized by stem-loop intramolecular base-pairing in which a single mutation can disrupt loop formation. This indicates that at least some KRAB-ZFPs evolved to bind functionally essential target sequences which cannot easily evade repression by mutation.", @@ -464,6 +480,7 @@ "$ref": "#/texts/8" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "Our KRAB-ZFP ChIP-seq dataset also provided unique insights into the emergence of new KRAB-ZFPs and binding patterns. The Chr4-cl KRAB-ZFPs REX2 and ZFP600 bind to the same target within ETn but with varying affinity (Figure 1C). Comparison of the amino acids responsible for DNA contact revealed a high similarity between REX2 and ZFP600, with the main differences at the most C-terminal zinc fingers. Additionally, we found that GM30910, another KRAB-ZFP encoded in the Chr4-cl, also shows a strong similarity to both KRAB-ZFPs yet targets entirely different groups of TEs (Figure 1C and Supplementary file 1). Together with previously shown data (Ecco et al., 2016), this example highlights how addition of a few new zinc fingers to an existing array can entirely shift the mode of DNA binding.", @@ -479,6 +496,7 @@ "$ref": "#/texts/15" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Genetic deletion of KRAB-ZFP gene clusters leads to retrotransposon reactivation", @@ -491,6 +509,7 @@ "$ref": "#/texts/14" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "The majority of KRAB-ZFP genes are harbored in large, highly repetitive clusters that have formed by successive complex segmental duplications (Kauzlaric et al., 2017), rendering them inaccessible to conventional gene targeting. We therefore developed a strategy to delete entire KRAB-ZFP gene clusters in ES cells (including the Chr2-cl and Chr4-cl as well as two clusters on chromosome 13 and a cluster on chromosome 10) using two CRISPR/Cas9 gRNAs targeting unique regions flanking each cluster, and short single-stranded repair oligos with homologies to both sides of the projected cut sites. Using this approach, we generated five cluster KO ES cell lines in at least two biological replicates and performed RNA sequencing (RNA-seq) to determine TE expression levels. Strikingly, four of the five cluster KO ES cells exhibited distinct TE reactivation phenotypes (Figure 2A). Chr2-cl KO resulted in reactivation of several L1 subfamilies as well as RLTR10 (up to more than 100-fold as compared to WT) and IAPEz ERVs. In contrast, the most strongly upregulated TEs in Chr4-cl KO cells were ETn/ETnERV (up to 10-fold as compared to WT), with several other ERV groups modestly reactivated. ETn/ETnERV elements were also upregulated in Chr13.2-cl KO ES cells while the only upregulated ERVs in Chr13.1-cl KO ES cells were MMERVK10C elements (Figure 2A). Most reactivated retrotransposons were targeted by at least one KRAB-ZFP that was encoded in the deleted cluster (Figure 2A and Supplementary file 1), indicating a direct effect of these KRAB-ZFPs on TE expression levels. Furthermore, we observed a loss of KAP1 binding and H3K9me3 at several TE subfamilies that are targeted by at least one KRAB-ZFP within the deleted Chr2-cl and Chr4-cl (Figure 2B, Figure 2\u2014figure supplement 1A), including L1, ETn and IAPEz elements. Using reduced representation bisulfite sequencing (RRBS-seq), we found that a subset of KRAB-ZFP bound TEs were partially hypomethylated in Chr4-cl KO ES cells, but only when grown in genome-wide hypomethylation-inducing conditions (Blaschke et al., 2013; Figure 2C and Supplementary file 2). These data are consistent with the hypothesis that KRAB-ZFPs/KAP1 are not required to establish DNA methylation, but under certain conditions they protect specific TEs and imprint control regions from genome-wide demethylation (Leung et al., 2014; Deniz et al., 2018).", @@ -509,6 +528,7 @@ "$ref": "#/texts/18" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "KRAB-ZFP cluster deletions license TE-borne enhancers", @@ -521,6 +541,7 @@ "$ref": "#/texts/16" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "We next used our RNA-seq datasets to determine the effect of KRAB-ZFP cluster deletions on gene expression. We identified 195 significantly upregulated and 130 downregulated genes in Chr4-cl KO ES cells, and 108 upregulated and 59 downregulated genes in Chr2-cl KO ES cells (excluding genes on the deleted cluster) (Figure 3A). To address whether gene deregulation in Chr2-cl and Chr4-cl KO ES cells is caused by nearby TE reactivation, we determined whether genes near certain TE subfamilies are more frequently deregulated than random genes. We found a strong correlation of gene upregulation and TE proximity for several TE subfamilies, of which many became transcriptionally activated themselves (Figure 3B). For example, nearly 10% of genes that are located within 100 kb (up- or downstream of the TSS) of an ETn element are upregulated in Chr4-cl KO ES cells, as compared to 0.8% of all genes. In Chr2-cl KO ES cells, upregulated genes were significantly enriched near various LINE groups but also IAPEz-int and RLTR10-int elements, indicating that TE-binding KRAB-ZFPs in these clusters limit the potential activating effects of TEs on nearby genes.", @@ -532,6 +553,7 @@ "$ref": "#/texts/16" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "While we generally observed that TE-associated gene reactivation is not caused by elongated or spliced transcription starting at the retrotransposons, we did observe that the strength of the effect of ETn elements on gene expression is stronger on genes in closer proximity. About 25% of genes located within 20 kb of an ETn element, but only 5% of genes located at a distance between 50 and 100 kb from the nearest ETn insertion, become upregulated in Chr4-cl KO ES cells. Importantly however, the correlation is still significant for genes that are located at distances between 50 and 100 kb from the nearest ETn insertion, indicating that ETn elements can act as long-range enhancers of gene expression in the absence of KRAB-ZFPs that target them. To confirm that Chr4-cl KRAB-ZFPs such as GM13051 block ETn-borne enhancers, we tested the ability of a putative ETn enhancer to activate transcription in a reporter assay. For this purpose, we cloned a 5 kb fragment spanning from the GM13051 binding site within the internal region of a truncated ETn insertion to the first exon of the Cd59a gene, which is strongly activated in Chr4-cl KO ES cells (Figure 2\u2014figure supplement 1B). We observed strong transcriptional activity of this fragment which was significantly higher in Chr4-cl KO ES cells. Surprisingly, this activity was reduced to background when the internal segment of the ETn element was not included in the fragment, suggesting the internal segment of the ETn element, but not its LTR, contains a Chr4-cl KRAB-ZFP sensitive enhancer. To further corroborate these findings, we genetically deleted an ETn element that is located about 60 kb from the TSS of Chst1, one of the top-upregulated genes in Chr4-cl KO ES cells (Figure 3C). RT-qPCR analysis revealed that the Chst1 upregulation phenotype in Chr4-cl KO ES cells diminishes when the ETn insertion is absent, providing direct evidence that a KRAB-ZFP controlled ETn-borne enhancer regulates Chst1 expression (Figure 3D). Furthermore, ChIP-seq confirmed a general increase of H3K4me3, H3K4me1 and H3K27ac marks at ETn elements in Chr4-cl KO ES cells (Figure 3E). Notably, enhancer marks were most pronounced around the GM13051 binding site near the 3\u2019 end of the internal region, confirming that the enhancer activity of ETn is located on the internal region and not on the LTR.", @@ -562,6 +584,7 @@ "$ref": "#/texts/25" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "ETn retrotransposition in Chr4-cl KO and WT mice", @@ -574,6 +597,7 @@ "$ref": "#/texts/19" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "IAP, ETn/ETnERV and MuLV/RLTR4 retrotransposons are highly polymorphic in inbred mouse strains (Nell\u00e5ker et al., 2012), indicating that these elements are able to mobilize in the germ line. Since these retrotransposons are upregulated in Chr2-cl and Chr4-cl KO ES cells, we speculated that these KRAB-ZFP clusters evolved to minimize the risks of insertional mutagenesis by retrotransposition. To test this, we generated Chr2-cl and Chr4-cl KO mice via ES cell injection into blastocysts, and after germ line transmission we genotyped the offspring of heterozygous breeding pairs. While the offspring of Chr4-cl KO/WT parents were born close to Mendelian ratios in pure C57BL/6 and mixed C57BL/6 129Sv matings, one Chr4-cl KO/WT breeding pair gave birth to significantly fewer KO mice than expected (p-value=0.022) (Figure 4\u2014figure supplement 1A). Likewise, two out of four Chr2-cl KO breeding pairs on mixed C57BL/6 129Sv matings failed to give birth to a single KO offspring (p-value<0.01) while the two other mating pairs produced KO offspring at near Mendelian ratios (Figure 4\u2014figure supplement 1A). Altogether, these data indicate that KRAB-ZFP clusters are not absolutely essential in mice, but that genetic and/or epigenetic factors may contribute to reduced viability.", @@ -585,6 +609,7 @@ "$ref": "#/texts/19" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "We reasoned that retrotransposon activation could account for the reduced viability of Chr2-cl and Chr4-cl KO mice in some matings. However, since only rare matings produced non-viable KO embryos, we instead turned to the viable KO mice to assay for increased transposon activity. RNA-seq in blood, brain and testis revealed that, with a few exceptions, retrotransposons upregulated in Chr2 and Chr4 KRAB-ZFP cluster KO ES cells are not expressed at higher levels in adult tissues (Figure 4\u2014figure supplement 1B). Likewise, no strong transcriptional TE reactivation phenotype was observed in liver and kidney of Chr4-cl KO mice (data not shown) and ChIP-seq with antibodies against H3K4me1, H3K4me3 and H3K27ac in testis of Chr4-cl WT and KO mice revealed no increase of active histone marks at ETn elements or other TEs (data not shown). This indicates that Chr2-cl and Chr4-cl KRAB-ZFPs are primarily required for TE repression during early development. This is consistent with the high expression of these KRAB-ZFPs uniquely in ES cells (Figure 1\u2014figure supplement 1A). To determine whether retrotransposition occurs at a higher frequency in Chr4-cl KO mice during development, we screened for novel ETn (ETn/ETnERV) and MuLV (MuLV/RLTR4_MM) insertions in viable Chr4-cl KO mice. For this purpose, we developed a capture-sequencing approach to enrich for ETn/MuLV DNA and flanking sequences from genomic DNA using probes that hybridize with the 5\u2019 and 3\u2019 ends of ETn and MuLV LTRs prior to deep sequencing. We screened genomic DNA samples from a total of 76 mice, including 54 mice from ancestry-controlled Chr4-cl KO matings in various strain backgrounds, the two ES cell lines the Chr4-cl KO mice were generated from, and eight mice from a Chr2-cl KO mating which served as a control (since ETn and MuLVs are not activated in Chr2-cl KO ES cells) (Supplementary file 4). Using this approach, we were able to enrich reads mapping to ETn/MuLV LTRs about 2,000-fold compared to genome sequencing without capture. ETn/MuLV insertions were determined by counting uniquely mapped reads that were paired with reads mapping to ETn/MuLV elements (see materials and methods for details). To assess the efficiency of the capture approach, we determined what proportion of a set of 309 largely intact (two LTRs flanking an internal sequence) reference ETn elements could be identified using our sequencing data. 95% of these insertions were called with high confidence in the majority of our samples (data not shown), indicating that we are able to identify ETn insertions at a high recovery rate.", @@ -596,6 +621,7 @@ "$ref": "#/texts/19" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "Using this dataset, we first confirmed the polymorphic nature of both ETn and MuLV retrotransposons in laboratory mouse strains (Figure 4\u2014figure supplement 2A), highlighting the potential of these elements to retrotranspose. To identify novel insertions, we filtered out insertions that were supported by ETn/MuLV-paired reads in more than one animal. While none of the 54 ancestry-controlled mice showed a single novel MuLV insertion, we observed greatly varying numbers of up to 80 novel ETn insertions in our pedigree (Figure 4A).", @@ -607,6 +633,7 @@ "$ref": "#/texts/19" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "To validate some of the novel ETn insertions, we designed specific PCR primers for five of the insertions and screened genomic DNA of the mice in which they were identified as well as their parents. For all tested insertions, we were able to amplify their flanking sequence and show that these insertions are absent in their parents (Figure 4\u2014figure supplement 3A). To confirm their identity, we amplified and sequenced three of the novel full-length ETn insertions. Two of these elements (Genbank accession: MH449667-68) resembled typical ETnII elements with identical 5\u2019 and 3\u2019 LTRs and target site duplications (TSD) of 4 or 6 bp, respectively. The third sequenced element (MH449669) represented a hybrid element that contains both ETnI and MusD (ETnERV) sequences. Similar insertions can be found in the B6 reference genome; however, the identified novel insertion has a 2.5 kb deletion of the 5\u2019 end of the internal region. Additionally, the 5\u2019 and 3\u2019 LTR of this element differ in one nucleotide near the start site and contain an unusually large 248 bp TSD (containing a SINE repeat) indicating that an improper integration process might have truncated this element.", @@ -618,6 +645,7 @@ "$ref": "#/texts/19" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "Besides novel ETn insertions that were only identified in one specific animal, we also observed three ETn insertions that could be detected in several siblings but not in their parents or any of the other screened mice. This strongly indicates that these retrotransposition events occurred in the germ line of the parents from which they were passed on to some of their offspring. One of these germ line insertions was evidently passed on from the offspring to the next generation (Figure 4A). As expected, the read numbers supporting these novel germ line insertions were comparable to the read numbers that were found in the flanking regions of annotated B6 ETn insertions (Figure 4\u2014figure supplement 3B). In contrast, virtually all novel insertions that were only found in one animal were supported by significantly fewer reads (Figure 4\u2014figure supplement 3B). This indicates that these elements resulted from retrotransposition events in the developing embryo and not in the zygote or parental germ cells. Indeed, we detected different sets of insertions in various tissues from the same animal (Figure 4\u2014figure supplement 3C). Even between tail samples that were collected from the same animal at different ages, only a fraction of the new insertions were present in both samples, while technical replicates from the same genomic DNA samples showed a nearly complete overlap in insertions (Figure 4\u2014figure supplement 3D).", @@ -629,6 +657,7 @@ "$ref": "#/texts/19" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "Finally, we asked whether there were more novel ETn insertions in mice lacking the Chr4-cl relative to their wild type and heterozygous littermates in our pedigree. Interestingly, only one out of the eight Chr4-cl KO mice in a pure C57BL/6 strain background and none of the eight offspring from a Chr2-cl mating carried a single novel ETn insertion (Figure 4A). When crossing into a 129Sv background for a single generation before intercrossing heterozygous mice (F1), we observed 4 out of 8 Chr4-cl KO mice that contained at least one new ETn insertion, whereas none of 3 heterozygous mice contained any insertions. After crossing to the 129Sv background for a second generation (F2), we determined the number of novel ETn insertions in the offspring of one KO/WT x KO and two KO/WT x KO/WT matings, excluding all samples that were not derived from juvenile tail tissue. Only in the offspring of the KO/WT x KO mating, we observed a statistically significant higher average number of ETn insertions in KO vs. KO/WT animals (7.3 vs. 29.6, p=0.045, Figure 4B). Other than that, only a non-significant trend towards greater average numbers of ETn insertions in KO (11 vs. 27.8, p=0.192, Figure 4B) was apparent in one of the WT/KO x KO/WT matings whereas no difference in ETn insertion numbers between WT and KO mice could be observed in the second mating WT/KO x KO/WT (26 vs. 31, p=0.668, Figure 4B). When comparing all KO with all WT and WT/KO mice from these three matings, a trend towards more ETn insertions in KO remained but was not supported by strong significance (26 vs. 13, p=0.057, Figure 4B). Altogether, we observed a high variability in the number of new ETn insertions in both KO and WT but our data suggest that the Chr4-cl KRAB-ZFPs may have a modest effect on ETn retrotransposition rates in some mouse strains but other genetic and epigenetic effects clearly also play an important role.", @@ -647,6 +676,7 @@ "$ref": "#/texts/28" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Discussion", @@ -659,6 +689,7 @@ "$ref": "#/texts/26" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "C2H2 zinc finger proteins, about half of which contain a KRAB repressor domain, represent the largest DNA-binding protein family in mammals. Nevertheless, most of these factors have not been investigated using loss-of-function studies. The most comprehensive characterization of human KRAB-ZFPs revealed a strong preference to bind TEs (Imbeault et al., 2017;\u00a0Najafabadi et al., 2015) yet their function remains unknown. In humans, very few TEs are capable of retrotransposition yet many of them, often tens of million years old, are bound by KRAB-ZFPs. While this suggests that human KRAB-ZFPs mainly serve to control TE-borne enhancers and may have potentially transcription-independent functions, we were interested in the biological significance of KRAB-ZFPs in restricting potentially active TEs. The mouse is an ideal model for such studies since the mouse genome contains several active TE families, including IAP, ETn and L1 elements. We found that many of the young KRAB-ZFPs present in the genomic clusters of KRAB-ZFPs on chromosomes 2 and 4, which are highly expressed in a restricted pattern in ES cells, bound redundantly to these three active TE families. In several cases, KRAB-ZFPs bound to functionally constrained sequence elements we and others have demonstrated to be necessary for retrotransposition, including PBS and viral packaging signals. Targeting such sequences may help the host defense system keep pace with rapidly evolving mouse transposons. This provides strong evidence that many young KRAB-ZFPs are indeed expanding in response to TE activity. But do these young KRAB-ZFP genes limit the mobilization of TEs? Despite the large number of polymorphic ETn elements in mouse strains (Nell\u00e5ker et al., 2012) and several reports of phenotype-causing novel ETn germ line insertions, no new ETn insertions were reported in recent screens of C57BL/6 mouse genomes (Richardson et al., 2017; Gagnier et al., 2019), indicating that the overall rate of ETn germ line mobilization in inbred mice is rather low. We have demonstrated that Chr4-cl KRAB-ZFPs control ETn/ETnERV expression in ES cells, but this does not lead to widespread ETn mobility in viable C57BL/6 mice. In contrast, we found numerous novel, including several germ line, ETn insertions in both WT and Chr4-cl KO mice in a C57BL/6 129Sv mixed genetic background, with generally more insertions in KO mice and in mice with more 129Sv DNA. This is consistent with a report detecting ETn insertions in FVB.129 mice (Schauer et al., 2018). Notably, there was a large variation in the number of new insertions in these mice, possibly caused by hyperactive polymorphic ETn insertions that varied from individual to individual, epigenetic variation at ETn insertions between individuals and/or the general stochastic nature of ETn mobilization. Furthermore, recent reports have suggested that KRAB-ZFP gene content is distinct in different strains of laboratory mice (Lilue et al., 2018; Treger et al., 2019), and reduced KRAB-ZFP gene content could contribute to increased activity in individual mice. Although we have yet to find obvious phenotypes in the mice carrying new insertions, novel ETn germ line insertions have been shown to cause phenotypes from short tails (Lugani et al., 2013; Semba et al., 2013; Vlangos et al., 2013) to limb malformation (Kano et al., 2007) and severe morphogenetic defects including polypodia (Lehoczky et al., 2013) depending upon their insertion site.", @@ -670,6 +701,7 @@ "$ref": "#/texts/26" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "Despite a lack of widespread ETn activation in Chr4-cl KO mice, it still remains to be determined whether other TEs, like L1, IAP or other LTR retrotransposons are activated in any of the KRAB-ZFP cluster KO mice, which will require the development of additional capture-seq based assays. Notably, two of the heterozygous matings from Chr2-cl KO mice failed to produce viable knockout offspring, which could indicate a TE-reactivation phenotype. It may also be necessary to generate compound homozygous mutants of distinct KRAB-ZFP clusters to eliminate redundancy before TEs become unleashed. The KRAB-ZFP cluster knockouts produced here will be useful reagents to test such hypotheses. In sum, our data supports that a major driver of KRAB-ZFP gene expansion in mice is recent retrotransposon insertions, and that redundancy within the KRAB-ZFP gene family and with other TE restriction pathways provides protection against widespread TE mobility, explaining the non-essential function of the majority of KRAB-ZFP genes.", @@ -709,6 +741,7 @@ "$ref": "#/texts/47" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Materials and methods", @@ -725,6 +758,7 @@ "$ref": "#/texts/31" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Cell lines and transgenic mice", @@ -737,6 +771,7 @@ "$ref": "#/texts/30" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "Mouse ES cells and F9 EC cells were cultivated as described previously (Wolf et al., 2015b) unless stated otherwise. Chr4-cl KO ES cells originate from B6;129\u2010 Gt(ROSA)26Sortm1(cre/ERT)Nat/J mice (Jackson lab), all other KRAB-ZFP cluster KO ES cell lines originate from JM8A3.N1 C57BL/6N-Atm1Brd ES cells (KOMP Repository). Chr2-cl KO and WT ES cells were initially grown in serum-containing media (Wolf et al., 2015b) but changed to 2i media (De Iaco et al., 2017) for several weeks before analysis. To generate Chr4-cl and Chr2-cl KO mice, the cluster deletions were repeated in B6 ES (KOMP repository) or R1 (Nagy lab) ES cells, respectively, and heterozygous clones were injected into B6 albino blastocysts. Chr2-cl KO mice were therefore kept on a mixed B6/Svx129/Sv-CP strain background while Chr4-cl KO mice were initially derived on a pure C57BL/6 background. For capture-seq screens, Chr4-cl KO mice were crossed with 129 \u00d7 1/SvJ mice (Jackson lab) to produce the founder mice for Chr4-cl KO and WT (B6/129 F1) offspring. Chr4-cl KO/WT (B6/129 F1) were also crossed with 129 \u00d7 1/SvJ mice to get Chr4-cl KO/WT (B6/129 F1) mice, which were intercrossed to give rise to the parents of Chr4-cl KO/KO and KO/WT (B6/129 F2) offspring.", @@ -752,6 +787,7 @@ "$ref": "#/texts/33" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Generation of KRAB-ZFP expressing cell lines", @@ -764,6 +800,7 @@ "$ref": "#/texts/32" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "KRAB-ZFP ORFs were PCR-amplified from cDNA or synthesized with codon-optimization (Supplementary file 1), and stably expressed with 3XFLAG or 3XHA tags in F9 EC or ES cells using Sleeping beauty transposon-based (Wolf et al., 2015b) or lentiviral expression vectors (Imbeault et al., 2017; Supplementary file 1). Cells were selected with puromycin (1 \u00b5g/ml) and resistant clones were pooled and further expanded for ChIP-seq.", @@ -779,6 +816,7 @@ "$ref": "#/texts/35" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "CRISPR/Cas9 mediated deletion of KRAB-ZFP clusters and an MMETn insertion", @@ -791,6 +829,7 @@ "$ref": "#/texts/34" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "All gRNAs were expressed from the pX330-U6-Chimeric_BB-CBh-hSpCas9 vector (RRID:Addgene_42230) and nucleofected into 106 ES cells using Amaxa nucleofection in the following amounts: 5 \u00b5g of each pX330-gRNA plasmid, 1 \u00b5g pPGK-puro and 500 pmoles single-stranded repair oligos (Supplementary file 3). One day after nucleofection, cells were kept under puromycin selection (1 \u00b5g/ml) for 24 hr. Individual KO and WT clones were picked 7\u20138 days after nucleofection and expanded for PCR genotyping (Supplementary file 3).", @@ -809,6 +848,7 @@ "$ref": "#/texts/38" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "ChIP-seq analysis", @@ -821,6 +861,7 @@ "$ref": "#/texts/36" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "For ChIP-seq analysis of KRAB-ZFP expressing cells, 5\u201310 \u00d7 107 cells were crosslinked and immunoprecipitated with anti-FLAG (Sigma-Aldrich Cat# F1804, RRID:AB_262044) or anti-HA (Abcam Cat# ab9110, RRID:AB_307019 or Covance Cat# MMS-101P-200, RRID:AB_10064068) antibody using one of two previously described protocols (O'Geen et al., 2010; Imbeault et al., 2017) as indicated in Supplementary file 1. H3K9me3 distribution in Chr4-cl, Chr10-cl, Chr13.1-cl and Chr13.2-cl KO ES cells was determined by native ChIP-seq with anti-H3K9me3 serum (Active Motif Cat# 39161, RRID:AB_2532132) as described previously (Karimi et al., 2011). In Chr2-cl KO ES cells, H3K9me3 and KAP1 ChIP-seq was performed as previously described (Ecco et al., 2016). In Chr4-cl KO and WT ES cells KAP1 binding was determined by endogenous tagging of KAP1 with C-terminal GFP (Supplementary file 3), followed by FACS to enrich for GFP-positive cells and ChIP with anti-GFP (Thermo Fisher Scientific Cat# A-11122, RRID:AB_221569) using a previously described protocol (O'Geen et al., 2010). For ChIP-seq analysis of active histone marks, cross-linked chromatin from ES cells or testis (from two-week old mice) was immunoprecipitated with antibodies against H3K4me3 (Abcam Cat# ab8580, RRID:AB_306649), H3K4me1 (Abcam Cat# ab8895, RRID:AB_306847) and H3K27ac (Abcam Cat# ab4729, RRID:AB_2118291) following the protocol developed by O'Geen et al., 2010 or Khil et al., 2012 respectively.", @@ -832,6 +873,7 @@ "$ref": "#/texts/36" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "ChIP-seq libraries were constructed and sequenced as indicated in Supplementary file 4. Reads were mapped to the mm9 genome using Bowtie (RRID:SCR_005476; settings: --best) or Bowtie2 (Langmead and Salzberg, 2012) as indicated in Supplementary file 4. Under these settings, reads that map to multiple genomic regions are assigned to the top-scored match and, if a set of equally good choices is encountered, a pseudo-random number is used to choose one location. Peaks were called using MACS14 (RRID:SCR_013291) under high stringency settings (p<1e-10, peak enrichment\u00a0>20) (Zhang et al., 2008). Peaks were called both over the Input control and a FLAG or HA control ChIP (unless otherwise stated in Supplementary file 4) and only peaks that were called in both settings were kept for further analysis. In cases when the stringency settings did not result in at least 50 peaks, the settings were changed to medium (p<1e-10, peak enrichment\u00a0>10) or low (p<1e-5, peak enrichment\u00a0>10) stringency (Supplementary file 4). For further analysis, all peaks were scaled to 200 bp regions centered around the peak summits. The overlap of the scaled peaks to each repeat element in UCSC Genome Browser (RRID:SCR_005780) were calculated by using the bedfisher function (settings: -f 0.25) from BEDTools (RRID:SCR_006646). The right-tailed p-values between pair-wise comparison of each ChIP-seq peak and repeat element were extracted, and then adjusted using the Benjamini-Hochberg approach implemented in the R function p.adjust(). Binding motifs were determined using only nonrepetitive (<10% repeat content) peaks with MEME (Bailey et al., 2009). MEME motifs were compared with in silico predicted motifs (Najafabadi et al., 2015) using Tomtom (Bailey et al., 2009) and considered as significantly overlapping with a False Discovery Rate (FDR) below 0.1. To find MEME and predicted motifs in repetitive peaks, we used FIMO (Bailey et al., 2009). Differential H3K9me3 and KAP1 distribution in WT and Chr2-cl or Chr4-cl KO ES cells at TEs was determined by counting ChIP-seq reads overlapping annotated insertions of each TE group using BEDTools (MultiCovBed). Additionally, ChIP-seq reads were counted at the TE fraction that was bound by Chr2-cl or Chr4-cl KRAB-ZFPs (overlapping with 200 bp peaks). Count tables were concatenated and analyzed using DESeq2 (Love et al., 2014). The previously published ChIP-seq datasets for KAP1 (Castro-Diaz et al., 2014) and H3K9me3 (Dan et al., 2014) were re-mapped using Bowtie (--best).", @@ -847,6 +889,7 @@ "$ref": "#/texts/40" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Luciferase reporter assays", @@ -859,6 +902,7 @@ "$ref": "#/texts/39" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "For KRAB-ZFP repression assays, double-stranded DNA oligos containing KRAB-ZFP target sequences (Supplementary file 3) were cloned upstream of the SV40 promoter of the pGL3-Promoter vector (Promega) between the restriction sites for NheI and XhoI. 33 ng of reporter vectors were co-transfected (Lipofectamine 2000, Thermofisher) with 33 ng pRL-SV40 (Promega) for normalization and 33 ng of transient KRAB-ZFP expression vectors (in pcDNA3.1) or empty pcDNA3.1 into 293 T cells seeded one day earlier in 96-well plates. Cells were lysed 48 hr after transfection and luciferase/Renilla luciferase activity was measured using the Dual-Luciferase Reporter Assay System (Promega). To measure the transcriptional activity of the MMETn element upstream of the Cd59a gene, fragments of varying sizes (Supplementary file 3) were cloned into the promoter-less pGL3-basic vector (Promega) using NheI and NcoI sites. 70 ng of reporter vectors were cotransfected with 30 ng pRL-SV40 into feeder-depleted Chr4-cl WT and KO ES cells, seeded into a gelatinized 96-well plate 2 hr before transfection. Luciferase activity was measured 48 hr after transfection as described above.", @@ -874,6 +918,7 @@ "$ref": "#/texts/42" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "RNA-seq analysis", @@ -886,6 +931,7 @@ "$ref": "#/texts/41" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "Whole RNA was purified using RNeasy columns (Qiagen) with on column DNase treatment or the High Pure RNA Isolation Kit (Roche) (Supplementary file 4). Tissues were first lysed in TRIzol reagent (ThermoFisher) and RNA was purified after the isopropanol precipitation step using RNeasy columns (Qiagen) with on column DNase treatment. Libraries were generated using the SureSelect Strand-Specific RNA Library Prep kit (Agilent) or Illumina\u2019s TruSeq RNA Library Prep Kit (with polyA selection) and sequenced as 50 or 100 bp paired-end reads on an Illumina HiSeq2500 (RRID:SCR_016383) or HiSeq3000 (RRID:SCR_016386) machine (Supplementary file 4). RNA-seq reads were mapped to the mouse genome (mm9) using Tophat (RRID:SCR_013035; settings: --I 200000 g 1) unless otherwise stated. These settings allow each mappable read to be reported once, in case the read maps to multiple locations equally well, one match is randomly chosen. For differential transposon expression, mapped reads that overlap with TEs annotated in Repeatmasker (RRID:SCR_012954) were counted using BEDTools MultiCovBed (setting: -split). Reads mapping to multiple fragments that belong to the same TE insertion (as indicated by the repeat ID) were summed up. Only transposons with a total of at least 20 (for two biological replicates) or 30 (for three biological replicates) mapped reads across WT and KO samples were considered for differential expression analysis. Transposons within the deleted KRAB-ZFP cluster were excluded from the analysis. Read count tables were used for differential expression analysis with DESeq2 (RRID:SCR_015687). For differential gene expression analysis, reads overlapping with gene exons were counted using HTSeq-count and analyzed using DESeq2. To test if KRAB-ZFP peaks are significantly enriched near up- or down-regulated genes, a binomial test was performed. Briefly, the proportion of the peaks that are located within a certain distance up- or downstream to the TSS of genes was determined using the windowBed function of BED tools. The probability p in the binomial distribution was estimated as the fraction of all genes overlapped with KRAB-ZFP peaks. Then, given n which is the number of specific groups of genes, and x which is the number of this group of genes overlapped with peaks, the R function binom.test() was used to estimate the p-value based on right-tailed Binomial test. Finally, the adjusted p-values were determined separately for LTR and LINE retrotransposon groups using the Benjamini-Hochberg approach implemented in the R function p.adjust().", @@ -901,6 +947,7 @@ "$ref": "#/texts/44" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Reduced representation bisulfite sequencing (RRBS-seq)", @@ -913,6 +960,7 @@ "$ref": "#/texts/43" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "For RRBS-seq analysis, Chr4-cl WT and KO ES cells were grown in either standard ES cell media containing FCS or for one week in 2i media containing vitamin C as described previously (Blaschke et al., 2013). Genomic DNA was purified from WT and Chr4-cl KO ES cells using the Quick-gDNA purification kit (Zymo Research) and bisulfite-converted with the NEXTflex Bisulfite-Seq Kit (Bio Scientific) using Msp1 digestion to fragment DNA. Libraries were sequenced as 50 bp paired-end reads on an Illumina HiSeq. The reads were processed using Trim Galore (--illumina --paired \u2013rrbs) to trim poor quality bases and adaptors. Additionally, the first 5 nt of R2 and the last 3 nt of R1 and R2 were trimmed. Reads were then mapped to the reference genome (mm9) using Bismark (Krueger and Andrews, 2011) to extract methylation calling results. The CpG methylation pattern for each covered CpG dyads (two complementary CG dinucleotides) was calculated using a custom script (Source code 1: get_CpG_ML.pl). For comparison of CpG methylation between WT and Chr4-cl KO ES cells (in serum or 2i + Vitamin C conditions) only CpG sites with at least 10-fold coverage in each sample were considered for analysis.", @@ -928,6 +976,7 @@ "$ref": "#/texts/46" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Retrotransposition assay", @@ -940,6 +989,7 @@ "$ref": "#/texts/45" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "The retrotransposition vectors pCMV-MusD2, pCMV-MusD2-neoTNF and pCMV-ETnI1-neoTNF (Ribet et al., 2004) were a kind gift from Dixie Mager. To partially delete the Gm13051 binding site within pCMV-MusD2-neoTNF, the vector was cut with KpnI and re-ligated using a repair oligo, leaving a 24 bp deletion within the Gm13051 binding site. The Rex2 binding site in pCMV-ETnI1-neoTNF was deleted by cutting the vector with EcoRI and XbaI followed by re-ligation using two overlapping PCR products, leaving a 45 bp deletion while maintaining the rest of the vector unchanged (see Supplementary file 3 for primer sequences). For MusD retrotransposition assays, 5 \u00d7 104 HeLa cells (ATCC CCL-2) were transfected in a 24-well dish with 100 ng pCMV-MusD2-neoTNF or pCMV-MusD2-neoTNF (\u0394Gm13051-m) using Lipofectamine 2000. For ETn retrotransposition assays, 50 ng of pCMV-ETnI1-neoTNF or pCMV-ETnI1-neoTNF (\u0394Rex2) vectors were cotransfected with 50 ng pCMV-MusD2 to provide gag and pol proteins in trans. G418 (0.6 mg/ml) was added five days after transfection and cells were grown under selection until colonies were readily visible by eye. G418-resistant colonies were stained with Amido Black (Sigma).", @@ -955,6 +1005,7 @@ "$ref": "#/texts/48" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Capture-seq screen", @@ -967,6 +1018,7 @@ "$ref": "#/texts/47" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "To identify novel retrotransposon insertions, genomic DNA from various tissues (Supplementary file 4) was purified and used for library construction with target enrichment using the SureSelectQXT Target Enrichment kit (Agilent). Custom RNA capture probes were designed to hybridize with the 120 bp 5\u2019 ends of the 5\u2019 LTRs and the 120 bp 3\u2019 ends of the 3\u2019 LTR of about 600 intact (internal region flanked by two LTRs) MMETn/RLTRETN retrotransposons or of 140 RLTR4_MM/RLTR4 retrotransposons that were upregulated in Chr4-cl KO ES cells (Figure 4\u2014source data 2). Enriched libraries were sequenced on an Illumina HiSeq as paired-end 50 bp reads. R1 and R2 reads were mapped to the mm9 genome separately, using settings that only allow non-duplicated, uniquely mappable reads (Bowtie -m 1 --best --strata; samtools rmdup -s) and under settings that allow multimapping and duplicated reads (Bowtie --best). Of the latter, only reads that overlap (min. 50% of read) with RLTRETN, MMETn-int, ETnERV-int, ETnERV2-int or ETnERV3-int repeats (ETn) or RLTR4, RLTR4_MM-int or MuLV-int repeats (RLTR4) were kept. Only uniquely mappable reads whose paired reads were overlapping with the repeats mentioned above were used for further analysis. All ETn- and RLTR4-paired reads were then clustered (as bed files) using BEDTools (bedtools merge -i -n -d 1000) to receive a list of all potential annotated and non-annotated new ETn or RLTR4 insertion sites and all overlapping ETn- or RLTR4-paired reads were counted for each sample at each locus. Finally, all regions that were located within 1 kb of an annotated RLTRETN, MMETn-int, ETnERV-int, ETnERV2-int or ETnERV3-int repeat as well as regions overlapping with previously identified polymorphic ETn elements (Nell\u00e5ker et al., 2012) were removed. Genomic loci with at least 10 reads per million unique ETn- or RLTR4-paired reads were considered as insertion sites. To qualify for a de-novo insertion, we allowed no called insertions in any of the other screened mice at the locus and not a single read at the locus in the ancestors of the mouse. Insertions at the same locus in at least two siblings from the same offspring were considered as germ line insertions, if the insertion was absent in the parents and mice who were not direct descendants from these siblings. Full-length sequencing of new ETn insertions was done by Sanger sequencing of short PCR products in combination with Illumina sequencing of a large PCR product (Supplementary file 3), followed by de-novo assembly using the Unicycler software.", @@ -985,6 +1037,7 @@ "$ref": "#/tables/1" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Tables", @@ -997,6 +1050,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Table 1.: * Number of protein-coding KRAB-ZFP genes identified in a previously published screen (Imbeault et al., 2017) and the ChIP-seq data column indicates the number of KRAB-ZFPs for which ChIP-seq was performed in this study.", @@ -1008,6 +1062,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Key resources table: ", @@ -1053,6 +1108,7 @@ "$ref": "#/pictures/10" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Figures", @@ -1065,6 +1121,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Figure 1.: Genome-wide binding patterns of mouse KRAB-ZFPs.\n(A) Probability heatmap of KRAB-ZFP binding to TEs. Blue color intensity (main field) corresponds to -log10 (adjusted p-value) enrichment of ChIP-seq peak overlap with TE groups (Fisher\u2019s exact test). The green/red color intensity (top panel) represents mean KAP1 (GEO accession: GSM1406445) and H3K9me3 (GEO accession: GSM1327148) enrichment (respectively) at peaks overlapping significantly targeted TEs (adjusted p-value<1e-5) in WT ES cells. (B) Summarized ChIP-seq signal for indicated KRAB-ZFPs and previously published KAP1 and H3K9me3 in WT ES cells across 127 intact ETn elements. (C) Heatmaps of KRAB-ZFP ChIP-seq signal at ChIP-seq peaks. For better comparison, peaks for all three KRAB-ZFPs were called with the same parameters (p<1e-10, peak enrichment\u00a0>20). The top panel shows a schematic of the arrangement of the contact amino acid composition of each zinc finger. Zinc fingers are grouped and colored according to similarity, with amino acid differences relative to the five consensus fingers highlighted in white.\nFigure 1\u2014source data 1.KRAB-ZFP expression in 40 mouse tissues and cell lines (ENCODE).Mean values of replicates are shown as log2 transcripts per million.\nFigure 1\u2014source data 2.Probability heatmap of KRAB-ZFP binding to TEs.Values corresponds to -log10 (adjusted p-value) enrichment of ChIP-seq peak overlap with TE groups (Fisher\u2019s exact test).", @@ -1076,6 +1133,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Figure 1\u2014figure supplement 1.: ES cell-specific expression of KRAB-ZFP gene clusters.\n(A) Heatmap showing expression patterns of mouse KRAB-ZFPs in 40 mouse tissues and cell lines (ENCODE). Heatmap colors indicate gene expression levels in log2 transcripts per million (TPM). The asterisk indicates a group of 30 KRAB-ZFPs that are exclusively expressed in ES cells. (B) Physical location of the genes encoding for the 30 KRAB-ZFPs that are exclusively expressed in ES cells. (C) Phylogenetic (Maximum likelihood) tree of the KRAB domains of mouse KRAB-ZFPs. KRAB-ZFPs encoded on the gene clusters on chromosome 2 and 4 are highlighted. The scale bar at the bottom indicates amino acid substitutions per site.", @@ -1087,6 +1145,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Figure 1\u2014figure supplement 2.: KRAB-ZFP binding motifs and their repression activity.\n(A) Comparison of computationally predicted (bottom) and experimentally determined (top) KRAB-ZFP binding motifs. Only significant pairs are shown (FDR\u00a0<\u00a00.1). (B) Luciferase reporter assays to confirm KRAB-ZFP repression of the identified target sites. Bars show the luciferase activity (normalized to Renilla luciferase) of reporter plasmids containing the indicated target sites cloned upstream of the SV40 promoter. Reporter plasmids were co-transfected into 293 T cells with a Renilla luciferase plasmid for normalization and plasmids expressing the targeting KRAB-ZFP. Normalized mean luciferase activity (from three replicates) is shown relative to luciferase activity of the reporter plasmid co-transfected with an empty pcDNA3.1 vector.", @@ -1098,6 +1157,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Figure 1\u2014figure supplement 3.: KRAB-ZFP binding to ETn retrotransposons.\n(A) Comparison of the PBSLys1,2 sequence with Zfp961 binding motifs in nonrepetitive peaks (Nonrep) and peaks at ETn elements. (B) Retrotransposition assays of original (ETnI1-neoTNF and MusD2-neoTNF Ribet et al., 2004) and modified reporter vectors where the Rex2 or Gm13051 binding motifs where removed. Schematic of reporter vectors are displayed at the top. HeLa cells were transfected as described in the Materials and Methods section and neo-resistant colonies, indicating retrotransposition events, were selected and stained. (C) Stem-loop structure of the ETn RNA export signal, the Gm13051 motif on the corresponding DNA is marked with red circles, the part of the motif that was deleted is indicated with grey crosses (adapted from Legiewicz et al., 2010).", @@ -1109,6 +1169,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Figure 2.: Retrotransposon reactivation in KRAB-ZFP cluster KO ES cells.\n(A) RNA-seq analysis of TE expression in five KRAB-ZFP cluster KO ES cells. Green and grey squares on top of the panel represent KRAB-ZFPs with or without ChIP-seq data, respectively, within each deleted gene cluster. Reactivated TEs that are bound by one or several KRAB-ZFPs are indicated by green squares in the panel. Significantly up- and downregulated elements (adjusted p-value<0.05) are highlighted in red and green, respectively. (B) Differential KAP1 binding and H3K9me3 enrichment at TE groups (summarized across all insertions) in Chr2-cl and Chr4-cl KO ES cells. TE groups targeted by one or several KRAB-ZFPs encoded within the deleted clusters are highlighted in blue (differential enrichment over the entire TE sequences) and red (differential enrichment at TE regions that overlap with KRAB-ZFP ChIP-seq peaks). (C) DNA methylation status of CpG sites at indicated TE groups in WT and Chr4-cl KO ES cells grown in serum containing media or in hypomethylation-inducing media (2i + Vitamin C). P-values were calculated using paired t-test.\nFigure 2\u2014source data 1.Differential H3K9me3 and KAP1 distribution in WT and KRAB-ZFP cluster KO ES cells at TE families and KRAB-ZFP bound TE insertions.Differential read counts and statistical testing were determined by DESeq2.", @@ -1120,6 +1181,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Figure 2\u2014figure supplement 1.: Epigenetic changes at TEs and TE-borne enhancers in KRAB-ZFP cluster KO ES cells.\n(A) Differential analysis of summative (all individual insertions combined) H3K9me3 enrichment at TE groups in Chr10-cl, Chr13.1-cl and Chr13.2-cl KO ES cells.\u00a0TE groups targeted by one or several KRAB-ZFPs encoded within the deleted clusters are highlighted in orange (differential enrichment over the entire TE sequences) and red (differential enrichment at TE regions that overlap with KRAB-ZFP ChIP-seq peaks). (B) Top: Schematic view of the Cd59a/Cd59b locus with a 5\u2019 truncated ETn insertion. ChIP-seq (Input subtracted from ChIP) data for overexpressed epitope-tagged Gm13051 (a Chr4-cl KRAB-ZFP) in F9 EC cells, and re-mapped KAP1 (GEO accession: GSM1406445) and H3K9me3 (GEO accession: GSM1327148) in WT ES cells are shown together with RNA-seq data from Chr4-cl WT and KO ES cells (mapped using Bowtie (-a -m 1 --strata -v 2) to exclude reads that cannot be uniquely mapped). Bottom: Transcriptional activity of a 5 kb fragment with or without fragments of the ETn insertion was tested by luciferase reporter assay in Chr4-cl WT and KO ES cells.", @@ -1131,6 +1193,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Figure 3.: TE-dependent gene activation in KRAB-ZFP cluster KO ES cells.\n(A) Differential gene expression in Chr2-cl and Chr4-cl KO ES cells. Significantly up- and downregulated genes (adjusted p-value<0.05) are highlighted in red and green, respectively, KRAB-ZFP genes within the deleted clusters are shown in blue. (B) Correlation of TEs and gene deregulation. Plots show enrichment of TE groups within 100 kb of up- and downregulated genes relative to all genes. Significantly overrepresented LTR and LINE groups (adjusted p-value<0.1) are highlighted in blue and red, respectively. (C) Schematic view of the downstream region of Chst1 where a 5\u2019 truncated ETn insertion is located. ChIP-seq (Input subtracted from ChIP) data for overexpressed epitope-tagged Gm13051 (a Chr4-cl KRAB-ZFP) in F9 EC cells, and re-mapped KAP1 (GEO accession: GSM1406445) and H3K9me3 (GEO accession: GSM1327148) in WT ES cells are shown together with RNA-seq data from Chr4-cl WT and KO ES cells (mapped using Bowtie (-a -m 1 --strata -v 2) to exclude reads that cannot be uniquely mapped). (D) RT-qPCR analysis of Chst1 mRNA expression in Chr4-cl WT and KO ES cells with or without the CRISPR/Cas9 deleted ETn insertion near Chst1. Values represent mean expression (normalized to Gapdh) from three biological replicates per sample (each performed in three technical replicates) in arbitrary units. Error bars represent standard deviation and asterisks indicate significance (p<0.01, Student\u2019s t-test). n.s.: not significant. (E) Mean coverage of ChIP-seq data (Input subtracted from ChIP) in Chr4-cl WT and KO ES cells over 127 full-length ETn insertions. The binding sites of the Chr4-cl KRAB-ZFPs Rex2 and Gm13051 are indicated by dashed lines.", @@ -1142,6 +1205,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Figure 4.: ETn retrotransposition in Chr4-cl KO mice.\n(A) Pedigree of mice used for transposon insertion screening by capture-seq in mice of different strain backgrounds. The number of novel ETn insertions (only present in one animal) are indicated. For animals whose direct ancestors have not been screened, the ETn insertions are shown in parentheses since parental inheritance cannot be excluded in that case. Germ line insertions are indicated by asterisks. All DNA samples were prepared from tail tissues unless noted (-S: spleen, -E: ear, -B:Blood) (B) Statistical analysis of ETn insertion frequency in tail tissue from 30 Chr4-cl KO, KO/WT and WT mice that were derived from one Chr4-c KO x KO/WT and two Chr4-cl KO/WT x KO/WT matings. Only DNA samples that were collected from juvenile tails were considered for this analysis. P-values were calculated using one-sided Wilcoxon Rank Sum Test. In the last panel, KO, WT and KO/WT mice derived from all matings were combined for the statistical analysis.\nFigure 4\u2014source data 1.Coordinates of identified novel ETn insertions and supporting capture-seq read counts.Genomic regions indicate cluster of supporting reads.\nFigure 4\u2014source data 2.Sequences of capture-seq probes used to enrich genomic DNA for ETn and MuLV (RLTR4) insertions.", @@ -1153,6 +1217,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Figure 4\u2014figure supplement 1.: Birth statistics of KRAB-ZFP cluster KO mice and TE reactivation in adult tissues.\n(A) Birth statistics of Chr4- and Chr2-cl mice derived from KO/WT x KO/WT matings in different strain backgrounds.\u00a0(B) RNA-seq analysis of TE expression in Chr2- (left) and Chr4-cl (right) KO tissues. TE groups with the highest reactivation phenotype in ES cells are shown separately. Significantly up- and downregulated elements (adjusted p-value<0.05) are highlighted in red and green, respectively. Experiments were performed in at least two biological replicates.", @@ -1164,6 +1229,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Figure 4\u2014figure supplement 2.: Identification of polymorphic ETn and MuLV retrotransposon insertions in Chr4-cl KO and WT mice.\nHeatmaps show normalized capture-seq read counts in RPM (Read Per Million) for identified polymorphic ETn (A) and MuLV (B) loci in different mouse strains. Only loci with strong support for germ line ETn or MuLV insertions (at least 100 or 3000 ETn or MuLV RPM, respectively) in at least two animals are shown. Non-polymorphic insertion loci with high read counts in all screened mice were excluded for better visibility. The sample information (sample name and cell type/tissue) is annotated at the bottom, with the strain information indicated by color at the top. The color gradient indicates log10(RPM+1).", @@ -1175,6 +1241,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Figure 4\u2014figure supplement 3.: Confirmation of novel ETn insertions identified by capture-seq.\n(A) PCR validation of novel ETn insertions in genomic DNA of three littermates (IDs: T09673, T09674 and T00436) and their parents (T3913 and T3921). Primer sequences are shown in Supplementary file 3. (B) ETn capture-seq read counts (RPM) at putative novel somatic (loci identified exclusively in one single animal), novel germ line (loci identified in several littermates) insertions, and at B6 reference ETn elements. (C) Heatmap shows capture-seq read counts (RPM) of a Chr4-cl KO mouse (ID: C6733) as determined in different tissues. Each row represents a novel ETn locus that was identified in at least one tissue. The color gradient indicates log10(RPM+1). (D) Heatmap shows the capture-seq RPM in technical replicates using the same Chr4-cl KO DNA sample (rep1/rep2) or replicates with DNA samples prepared from different sections of the tail from the same mouse at different ages (tail1/tail2). Each row represents a novel ETn locus that was identified in at least one of the displayed samples. The color gradient indicates log10(RPM+1).", @@ -1190,6 +1257,7 @@ "$ref": "#/groups/0" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "References", @@ -1202,6 +1270,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "TL Bailey; M Boden; FA Buske; M Frith; CE Grant; L Clementi; J Ren; WW Li; WS Noble. MEME SUITE: tools for motif discovery and searching. Nucleic Acids Research (2009)", @@ -1215,6 +1284,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "C Baust; L Gagnier; GJ Baillie; MJ Harris; DM Juriloff; DL Mager. Structure and expression of mobile ETnII retroelements and their coding-competent MusD relatives in the mouse. Journal of Virology (2003)", @@ -1228,6 +1298,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "K Blaschke; KT Ebata; MM Karimi; JA Zepeda-Mart\u00ednez; P Goyal; S Mahapatra; A Tam; DJ Laird; M Hirst; A Rao; MC Lorincz; M Ramalho-Santos. Vitamin C induces Tet-dependent DNA demethylation and a blastocyst-like state in ES cells. Nature (2013)", @@ -1241,6 +1312,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "A Brodziak; E Zi\u00f3\u0142ko; M Muc-Wierzgo\u0144; E Nowakowska-Zajdel; T Kokot; K Klakla. The role of human endogenous retroviruses in the pathogenesis of autoimmune diseases. Medical Science Monitor : International Medical Journal of Experimental and Clinical Research (2012)", @@ -1254,6 +1326,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "N Castro-Diaz; G Ecco; A Coluccio; A Kapopoulou; B Yazdanpanah; M Friedli; J Duc; SM Jang; P Turelli; D Trono. Evolutionally dynamic L1 regulation in embryonic stem cells. Genes & Development (2014)", @@ -1267,6 +1340,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "EB Chuong; NC Elde; C Feschotte. Regulatory evolution of innate immunity through co-option of endogenous retroviruses. Science (2016)", @@ -1280,6 +1354,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "J Dan; Y Liu; N Liu; M Chiourea; M Okuka; T Wu; X Ye; C Mou; L Wang; L Wang; Y Yin; J Yuan; B Zuo; F Wang; Z Li; X Pan; Z Yin; L Chen; DL Keefe; S Gagos; A Xiao; L Liu. Rif1 maintains telomere length homeostasis of ESCs by mediating heterochromatin silencing. Developmental Cell (2014)", @@ -1293,6 +1368,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "A De Iaco; E Planet; A Coluccio; S Verp; J Duc; D Trono. DUX-family transcription factors regulate zygotic genome activation in placental mammals. Nature Genetics (2017)", @@ -1306,6 +1382,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u00d6 Deniz; L de la Rica; KCL Cheng; D Spensberger; MR Branco. SETDB1 prevents TET2-dependent activation of IAP retroelements in na\u00efve embryonic stem cells. Genome Biology (2018)", @@ -1319,6 +1396,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "M Dewannieux; T Heidmann. Endogenous retroviruses: acquisition, amplification and taming of genome invaders. Current Opinion in Virology (2013)", @@ -1332,6 +1410,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "G Ecco; M Cassano; A Kauzlaric; J Duc; A Coluccio; S Offner; M Imbeault; HM Rowe; P Turelli; D Trono. Transposable elements and their KRAB-ZFP controllers regulate gene expression in adult tissues. Developmental Cell (2016)", @@ -1345,6 +1424,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "G Ecco; M Imbeault; D Trono. KRAB zinc finger proteins. Development (2017)", @@ -1358,6 +1438,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "JA Frank; C Feschotte. Co-option of endogenous viral sequences for host cell function. Current Opinion in Virology (2017)", @@ -1371,6 +1452,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "L Gagnier; VP Belancio; DL Mager. Mouse germ line mutations due to retrotransposon insertions. Mobile DNA (2019)", @@ -1384,6 +1466,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "AC Groner; S Meylan; A Ciuffi; N Zangger; G Ambrosini; N D\u00e9nervaud; P Bucher; D Trono. KRAB-zinc finger proteins and KAP1 can mediate long-range transcriptional repression through heterochromatin spreading. PLOS Genetics (2010)", @@ -1397,6 +1480,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "DC Hancks; HH Kazazian. Roles for retrotransposon insertions in human disease. Mobile DNA (2016)", @@ -1410,6 +1494,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "M Imbeault; PY Helleboid; D Trono. KRAB zinc-finger proteins contribute to the evolution of gene regulatory networks. Nature (2017)", @@ -1423,6 +1508,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "FM Jacobs; D Greenberg; N Nguyen; M Haeussler; AD Ewing; S Katzman; B Paten; SR Salama; D Haussler. An evolutionary arms race between KRAB zinc-finger genes ZNF91/93 and SVA/L1 retrotransposons. Nature (2014)", @@ -1436,6 +1522,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "H Kano; H Kurahashi; T Toda. Genetically regulated epigenetic transcriptional activation of retrotransposon insertion confers mouse dactylaplasia phenotype. PNAS (2007)", @@ -1449,6 +1536,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "MM Karimi; P Goyal; IA Maksakova; M Bilenky; D Leung; JX Tang; Y Shinkai; DL Mager; S Jones; M Hirst; MC Lorincz. DNA methylation and SETDB1/H3K9me3 regulate predominantly distinct sets of genes, retroelements, and chimeric transcripts in mESCs. Cell Stem Cell (2011)", @@ -1462,6 +1550,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "A Kauzlaric; G Ecco; M Cassano; J Duc; M Imbeault; D Trono. The mouse genome displays highly dynamic populations of KRAB-zinc finger protein genes and related genetic units. PLOS ONE (2017)", @@ -1475,6 +1564,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "PP Khil; F Smagulova; KM Brick; RD Camerini-Otero; GV Petukhova. Sensitive mapping of recombination hotspots using sequencing-based detection of ssDNA. Genome Research (2012)", @@ -1488,6 +1578,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "F Krueger; SR Andrews. Bismark: a flexible aligner and methylation caller for Bisulfite-Seq applications. Bioinformatics (2011)", @@ -1501,6 +1592,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "B Langmead; SL Salzberg. Fast gapped-read alignment with bowtie 2. Nature Methods (2012)", @@ -1514,6 +1606,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "M Legiewicz; AS Zolotukhin; GR Pilkington; KJ Purzycka; M Mitchell; H Uranishi; J Bear; GN Pavlakis; SF Le Grice; BK Felber. The RNA transport element of the murine musD retrotransposon requires long-range intramolecular interactions for function. Journal of Biological Chemistry (2010)", @@ -1527,6 +1620,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "JA Lehoczky; PE Thomas; KM Patrie; KM Owens; LM Villarreal; K Galbraith; J Washburn; CN Johnson; B Gavino; AD Borowsky; KJ Millen; P Wakenight; W Law; ML Van Keuren; G Gavrilina; ED Hughes; TL Saunders; L Brihn; JH Nadeau; JW Innis. A novel intergenic ETnII-\u03b2 insertion mutation causes multiple malformations in Polypodia mice. PLOS Genetics (2013)", @@ -1540,6 +1634,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "D Leung; T Du; U Wagner; W Xie; AY Lee; P Goyal; Y Li; KE Szulwach; P Jin; MC Lorincz; B Ren. Regulation of DNA methylation turnover at LTR retrotransposons and imprinted loci by the histone methyltransferase Setdb1. PNAS (2014)", @@ -1553,6 +1648,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "J Lilue; AG Doran; IT Fiddes; M Abrudan; J Armstrong; R Bennett; W Chow; J Collins; S Collins; A Czechanski; P Danecek; M Diekhans; DD Dolle; M Dunn; R Durbin; D Earl; A Ferguson-Smith; P Flicek; J Flint; A Frankish; B Fu; M Gerstein; J Gilbert; L Goodstadt; J Harrow; K Howe; X Ibarra-Soria; M Kolmogorov; CJ Lelliott; DW Logan; J Loveland; CE Mathews; R Mott; P Muir; S Nachtweide; FCP Navarro; DT Odom; N Park; S Pelan; SK Pham; M Quail; L Reinholdt; L Romoth; L Shirley; C Sisu; M Sjoberg-Herrera; M Stanke; C Steward; M Thomas; G Threadgold; D Thybert; J Torrance; K Wong; J Wood; B Yalcin; F Yang; DJ Adams; B Paten; TM Keane. Sixteen diverse laboratory mouse reference genomes define strain-specific haplotypes and novel functional loci. Nature Genetics (2018)", @@ -1566,6 +1662,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "S Liu; J Brind'Amour; MM Karimi; K Shirane; A Bogutz; L Lefebvre; H Sasaki; Y Shinkai; MC Lorincz. Setdb1 is required for germline development and silencing of H3K9me3-marked endogenous retroviruses in primordial germ cells. Genes & Development (2014)", @@ -1579,6 +1676,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "MI Love; W Huber; S Anders. Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biology (2014)", @@ -1592,6 +1690,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "F Lugani; R Arora; N Papeta; A Patel; Z Zheng; R Sterken; RA Singer; G Caridi; C Mendelsohn; L Sussel; VE Papaioannou; AG Gharavi. A retrotransposon insertion in the 5' regulatory domain of Ptf1a results in ectopic gene expression and multiple congenital defects in Danforth's short tail mouse. PLOS Genetics (2013)", @@ -1605,6 +1704,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "TS Macfarlan; WD Gifford; S Driscoll; K Lettieri; HM Rowe; D Bonanomi; A Firth; O Singer; D Trono; SL Pfaff. Embryonic stem cell potency fluctuates with endogenous retrovirus activity. Nature (2012)", @@ -1618,6 +1718,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "IA Maksakova; MT Romanish; L Gagnier; CA Dunn; LN van de Lagemaat; DL Mager. Retroviral elements and their hosts: insertional mutagenesis in the mouse germ line. PLOS Genetics (2006)", @@ -1631,6 +1732,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "T Matsui; D Leung; H Miyashita; IA Maksakova; H Miyachi; H Kimura; M Tachibana; MC Lorincz; Y Shinkai. Proviral silencing in embryonic stem cells requires the histone methyltransferase ESET. Nature (2010)", @@ -1644,6 +1746,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "HS Najafabadi; S Mnaimneh; FW Schmitges; M Garton; KN Lam; A Yang; M Albu; MT Weirauch; E Radovani; PM Kim; J Greenblatt; BJ Frey; TR Hughes. C2H2 zinc finger proteins greatly expand the human regulatory lexicon. Nature Biotechnology (2015)", @@ -1657,6 +1760,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "C Nell\u00e5ker; TM Keane; B Yalcin; K Wong; A Agam; TG Belgard; J Flint; DJ Adams; WN Frankel; CP Ponting. The genomic landscape shaped by selection on transposable elements across 18 mouse strains. Genome Biology (2012)", @@ -1670,6 +1774,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "H O'Geen; S Frietze; PJ Farnham. Using ChIP-seq technology to identify targets of zinc finger transcription factors. Methods in Molecular Biology (2010)", @@ -1683,6 +1788,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "A Patel; P Yang; M Tinkham; M Pradhan; M-A Sun; Y Wang; D Hoang; G Wolf; JR Horton; X Zhang; T Macfarlan; X Cheng. DNA conformation induces adaptable binding by tandem zinc finger proteins. Cell (2018)", @@ -1696,6 +1802,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "D Ribet; M Dewannieux; T Heidmann. An active murine transposon family pair: retrotransposition of \"master\" MusD copies and ETn trans-mobilization. Genome Research (2004)", @@ -1709,6 +1816,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "SR Richardson; P Gerdes; DJ Gerhardt; FJ Sanchez-Luque; GO Bodea; M Mu\u00f1oz-Lopez; JS Jesuadian; MHC Kempen; PE Carreira; JA Jeddeloh; JL Garcia-Perez; HH Kazazian; AD Ewing; GJ Faulkner. Heritable L1 retrotransposition in the mouse primordial germline and early embryo. Genome Research (2017)", @@ -1722,6 +1830,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "HM Rowe; J Jakobsson; D Mesnard; J Rougemont; S Reynard; T Aktas; PV Maillard; H Layard-Liesching; S Verp; J Marquis; F Spitz; DB Constam; D Trono. KAP1 controls endogenous retroviruses in embryonic stem cells. Nature (2010)", @@ -1735,6 +1844,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "HM Rowe; A Kapopoulou; A Corsinotti; L Fasching; TS Macfarlan; Y Tarabay; S Viville; J Jakobsson; SL Pfaff; D Trono. TRIM28 repression of retrotransposon-based enhancers is necessary to preserve transcriptional dynamics in embryonic stem cells. Genome Research (2013)", @@ -1748,6 +1858,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "SN Schauer; PE Carreira; R Shukla; DJ Gerhardt; P Gerdes; FJ Sanchez-Luque; P Nicoli; M Kindlova; S Ghisletti; AD Santos; D Rapoud; D Samuel; J Faivre; AD Ewing; SR Richardson; GJ Faulkner. L1 retrotransposition is a common feature of mammalian hepatocarcinogenesis. Genome Research (2018)", @@ -1761,6 +1872,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "DC Schultz; K Ayyanathan; D Negorev; GG Maul; FJ Rauscher. SETDB1: a novel KAP-1-associated histone H3, lysine 9-specific methyltransferase that contributes to HP1-mediated silencing of euchromatic genes by KRAB zinc-finger proteins. Genes & Development (2002)", @@ -1774,6 +1886,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "K Semba; K Araki; K Matsumoto; H Suda; T Ando; A Sei; H Mizuta; K Takagi; M Nakahara; M Muta; G Yamada; N Nakagata; A Iida; S Ikegawa; Y Nakamura; M Araki; K Abe; K Yamamura. Ectopic expression of Ptf1a induces spinal defects, urogenital defects, and anorectal malformations in Danforth's short tail mice. PLOS Genetics (2013)", @@ -1787,6 +1900,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "SP Sripathy; J Stevens; DC Schultz. The KAP1 corepressor functions to coordinate the assembly of de novo HP1-demarcated microenvironments of heterochromatin required for KRAB zinc finger protein-mediated transcriptional repression. Molecular and Cellular Biology (2006)", @@ -1800,6 +1914,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "JH Thomas; S Schneider. Coevolution of retroelements and tandem zinc finger genes. Genome Research (2011)", @@ -1813,6 +1928,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "PJ Thompson; TS Macfarlan; MC Lorincz. Long terminal repeats: from parasitic elements to building blocks of the transcriptional regulatory repertoire. Molecular Cell (2016)", @@ -1826,6 +1942,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "RS Treger; SD Pope; Y Kong; M Tokuyama; M Taura; A Iwasaki. The lupus susceptibility locus Sgp3 encodes the suppressor of endogenous retrovirus expression SNERV. Immunity (2019)", @@ -1839,6 +1956,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "CN Vlangos; AN Siuniak; D Robinson; AM Chinnaiyan; RH Lyons; JD Cavalcoli; CE Keegan. Next-generation sequencing identifies the Danforth's short tail mouse mutation as a retrotransposon insertion affecting Ptf1a expression. PLOS Genetics (2013)", @@ -1852,6 +1970,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "J Wang; G Xie; M Singh; AT Ghanbarian; T Rask\u00f3; A Szvetnik; H Cai; D Besser; A Prigione; NV Fuchs; GG Schumann; W Chen; MC Lorincz; Z Ivics; LD Hurst; Z Izsv\u00e1k. Primate-specific endogenous retrovirus-driven transcription defines naive-like stem cells. Nature (2014)", @@ -1865,6 +1984,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "D Wolf; K Hug; SP Goff. TRIM28 mediates primer binding site-targeted silencing of Lys1,2 tRNA-utilizing retroviruses in embryonic cells. PNAS (2008)", @@ -1878,6 +1998,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "G Wolf; D Greenberg; TS Macfarlan. Spotting the enemy within: targeted silencing of foreign DNA in mammalian genomes by the Kr\u00fcppel-associated box zinc finger protein family. Mobile DNA (2015a)", @@ -1891,6 +2012,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "G Wolf; P Yang; AC F\u00fcchtbauer; EM F\u00fcchtbauer; AM Silva; C Park; W Wu; AL Nielsen; FS Pedersen; TS Macfarlan. The KRAB zinc finger protein ZFP809 is required to initiate epigenetic silencing of endogenous retroviruses. Genes & Development (2015b)", @@ -1904,6 +2026,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "M Yamauchi; B Freitag; C Khan; B Berwin; E Barklis. Stem cell factor binding to retrovirus primer binding site silencers. Journal of Virology (1995)", @@ -1917,6 +2040,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Y Zhang; T Liu; CA Meyer; J Eeckhoute; DS Johnson; BE Bernstein; C Nusbaum; RM Myers; M Brown; W Li; XS Liu. Model-based analysis of ChIP-Seq (MACS). Genome Biology (2008)", @@ -1932,6 +2056,7 @@ "$ref": "#/texts/52" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -1949,6 +2074,7 @@ "$ref": "#/texts/52" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -1966,6 +2092,7 @@ "$ref": "#/texts/52" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -1983,6 +2110,7 @@ "$ref": "#/texts/52" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -2000,6 +2128,7 @@ "$ref": "#/texts/52" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -2017,6 +2146,7 @@ "$ref": "#/texts/52" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -2034,6 +2164,7 @@ "$ref": "#/texts/52" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -2051,6 +2182,7 @@ "$ref": "#/texts/52" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -2068,6 +2200,7 @@ "$ref": "#/texts/52" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -2085,6 +2218,7 @@ "$ref": "#/texts/52" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -2102,6 +2236,7 @@ "$ref": "#/texts/52" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -2121,6 +2256,7 @@ "$ref": "#/texts/49" }, "children": [], + "content_layer": "body", "label": "table", "prov": [], "captions": [ @@ -3243,6 +3379,7 @@ "$ref": "#/texts/49" }, "children": [], + "content_layer": "body", "label": "table", "prov": [], "captions": [ diff --git a/tests/data/groundtruth/docling_v2/example_01.html.json b/tests/data/groundtruth/docling_v2/example_01.html.json index 044287c1..56e8b3aa 100644 --- a/tests/data/groundtruth/docling_v2/example_01.html.json +++ b/tests/data/groundtruth/docling_v2/example_01.html.json @@ -10,6 +10,7 @@ "furniture": { "self_ref": "#/furniture", "children": [], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -20,6 +21,7 @@ "$ref": "#/texts/0" } ], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -37,6 +39,7 @@ "$ref": "#/texts/5" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -53,6 +56,7 @@ "$ref": "#/texts/7" } ], + "content_layer": "body", "name": "ordered list", "label": "ordered_list" } @@ -71,6 +75,7 @@ "$ref": "#/texts/2" } ], + "content_layer": "body", "label": "title", "prov": [], "orig": "Introduction", @@ -82,6 +87,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "This is the first paragraph of the introduction.", @@ -106,6 +112,7 @@ "$ref": "#/groups/1" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Background", @@ -118,6 +125,7 @@ "$ref": "#/texts/2" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Some background information here.", @@ -129,6 +137,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "First item in unordered list", @@ -142,6 +151,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Second item in unordered list", @@ -155,6 +165,7 @@ "$ref": "#/groups/1" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "First item in ordered list", @@ -168,6 +179,7 @@ "$ref": "#/groups/1" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Second item in ordered list", @@ -183,6 +195,7 @@ "$ref": "#/texts/2" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [], diff --git a/tests/data/groundtruth/docling_v2/example_02.html.json b/tests/data/groundtruth/docling_v2/example_02.html.json index 0dbff9f3..179d043a 100644 --- a/tests/data/groundtruth/docling_v2/example_02.html.json +++ b/tests/data/groundtruth/docling_v2/example_02.html.json @@ -10,6 +10,7 @@ "furniture": { "self_ref": "#/furniture", "children": [], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -20,6 +21,7 @@ "$ref": "#/texts/0" } ], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -37,6 +39,7 @@ "$ref": "#/texts/5" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -53,6 +56,7 @@ "$ref": "#/texts/7" } ], + "content_layer": "body", "name": "ordered list", "label": "ordered_list" } @@ -71,6 +75,7 @@ "$ref": "#/texts/2" } ], + "content_layer": "body", "label": "title", "prov": [], "orig": "Introduction", @@ -82,6 +87,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "This is the first paragraph of the introduction.", @@ -103,6 +109,7 @@ "$ref": "#/groups/1" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Background", @@ -115,6 +122,7 @@ "$ref": "#/texts/2" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Some background information here.", @@ -126,6 +134,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "First item in unordered list", @@ -139,6 +148,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Second item in unordered list", @@ -152,6 +162,7 @@ "$ref": "#/groups/1" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "First item in ordered list", @@ -165,6 +176,7 @@ "$ref": "#/groups/1" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Second item in ordered list", diff --git a/tests/data/groundtruth/docling_v2/example_03.html.json b/tests/data/groundtruth/docling_v2/example_03.html.json index 206048da..08d89b08 100644 --- a/tests/data/groundtruth/docling_v2/example_03.html.json +++ b/tests/data/groundtruth/docling_v2/example_03.html.json @@ -10,6 +10,7 @@ "furniture": { "self_ref": "#/furniture", "children": [], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -20,6 +21,7 @@ "$ref": "#/texts/0" } ], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -37,6 +39,7 @@ "$ref": "#/texts/8" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -53,6 +56,7 @@ "$ref": "#/texts/7" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -69,6 +73,7 @@ "$ref": "#/texts/12" } ], + "content_layer": "body", "name": "ordered list", "label": "ordered_list" }, @@ -85,6 +90,7 @@ "$ref": "#/texts/11" } ], + "content_layer": "body", "name": "ordered list", "label": "ordered_list" } @@ -106,6 +112,7 @@ "$ref": "#/texts/13" } ], + "content_layer": "body", "label": "title", "prov": [], "orig": "Example Document", @@ -121,6 +128,7 @@ "$ref": "#/texts/2" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Introduction", @@ -133,6 +141,7 @@ "$ref": "#/texts/1" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "This is the first paragraph of the introduction.", @@ -154,6 +163,7 @@ "$ref": "#/groups/2" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Background", @@ -166,6 +176,7 @@ "$ref": "#/texts/3" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Some background information here.", @@ -181,6 +192,7 @@ "$ref": "#/groups/1" } ], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "First item in unordered list", @@ -194,6 +206,7 @@ "$ref": "#/groups/1" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Nested item 1", @@ -207,6 +220,7 @@ "$ref": "#/groups/1" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Nested item 2", @@ -220,6 +234,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Second item in unordered list", @@ -237,6 +252,7 @@ "$ref": "#/groups/3" } ], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "First item in ordered list", @@ -250,6 +266,7 @@ "$ref": "#/groups/3" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Nested ordered item 1", @@ -263,6 +280,7 @@ "$ref": "#/groups/3" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Nested ordered item 2", @@ -276,6 +294,7 @@ "$ref": "#/groups/2" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Second item in ordered list", @@ -293,6 +312,7 @@ "$ref": "#/tables/0" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Data Table", @@ -308,6 +328,7 @@ "$ref": "#/texts/13" }, "children": [], + "content_layer": "body", "label": "table", "prov": [], "captions": [], diff --git a/tests/data/groundtruth/docling_v2/example_04.html.json b/tests/data/groundtruth/docling_v2/example_04.html.json index c7d6af05..31ce2cb0 100644 --- a/tests/data/groundtruth/docling_v2/example_04.html.json +++ b/tests/data/groundtruth/docling_v2/example_04.html.json @@ -10,6 +10,7 @@ "furniture": { "self_ref": "#/furniture", "children": [], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -20,6 +21,7 @@ "$ref": "#/texts/0" } ], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -35,6 +37,7 @@ "$ref": "#/tables/0" } ], + "content_layer": "body", "label": "title", "prov": [], "orig": "Data Table with Rowspan and Colspan", @@ -49,6 +52,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "table", "prov": [], "captions": [], diff --git a/tests/data/groundtruth/docling_v2/example_05.html.json b/tests/data/groundtruth/docling_v2/example_05.html.json index ae311397..8d80476a 100644 --- a/tests/data/groundtruth/docling_v2/example_05.html.json +++ b/tests/data/groundtruth/docling_v2/example_05.html.json @@ -10,6 +10,7 @@ "furniture": { "self_ref": "#/furniture", "children": [], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -20,6 +21,7 @@ "$ref": "#/texts/0" } ], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -35,6 +37,7 @@ "$ref": "#/tables/0" } ], + "content_layer": "body", "label": "title", "prov": [], "orig": "Omitted html and body tags", @@ -49,6 +52,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "table", "prov": [], "captions": [], diff --git a/tests/data/groundtruth/docling_v2/ipa20180000016.json b/tests/data/groundtruth/docling_v2/ipa20180000016.json index d5d0d3ec..8f9df01b 100644 --- a/tests/data/groundtruth/docling_v2/ipa20180000016.json +++ b/tests/data/groundtruth/docling_v2/ipa20180000016.json @@ -10,6 +10,7 @@ "furniture": { "self_ref": "#/furniture", "children": [], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -20,6 +21,7 @@ "$ref": "#/texts/0" } ], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -68,6 +70,7 @@ "$ref": "#/texts/171" } ], + "content_layer": "body", "label": "title", "prov": [], "orig": "LIGHT EMITTING DEVICE AND PLANT CULTIVATION METHOD", @@ -83,6 +86,7 @@ "$ref": "#/texts/2" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "ABSTRACT", @@ -95,6 +99,7 @@ "$ref": "#/texts/1" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Provided is a light emitting device that includes a light emitting element having a light emission peak wavelength ranging from 380 nm to 490 nm, and a fluorescent material excited by light from the light emitting element and emitting light having at a light emission peak wavelength ranging from 580 nm or more to less than 680 nm. The light emitting device emits light having a ratio R/B of a photon flux density R to a photon flux density B ranging from 2.0 to 4.0 and a ratio R/FR of the photon flux density R to a photon flux density FR ranging from 0.7 to 13.0, the photon flux density R being in a wavelength range of 620 nm or more and less than 700 nm, the photon flux density B being in a wavelength range of 380 nm or more and 490 nm or less, and the photon flux density FR being in a wavelength range of 700 nm or more and 780 nm or less.", @@ -110,6 +115,7 @@ "$ref": "#/texts/4" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "CROSS-REFERENCE TO RELATED APPLICATION", @@ -122,6 +128,7 @@ "$ref": "#/texts/3" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "The application claims benefit of Japanese Patent Application No. 2016-128835 filed on Jun. 29, 2016, the entire disclosure of which is hereby incorporated by reference in its entirety.", @@ -133,6 +140,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "BACKGROUND", @@ -149,6 +157,7 @@ "$ref": "#/texts/7" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Technical Field", @@ -161,6 +170,7 @@ "$ref": "#/texts/6" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "The present disclosure relates to a light emitting device and a plant cultivation method.", @@ -188,6 +198,7 @@ "$ref": "#/texts/13" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Description of Related Art", @@ -200,6 +211,7 @@ "$ref": "#/texts/8" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "With environmental changes due to climate change and other artificial disruptions, plant factories are expected to increase production efficiency of vegetables and be capable of adjusting production in order to make it possible to stably supply vegetables. Plant factories that are capable of artificial management can stably supply clean and safe vegetables to markets, and therefore are expected to be the next-generation industries.", @@ -211,6 +223,7 @@ "$ref": "#/texts/8" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Plant factories that are completely isolated from external environment make it possible to artificially control and collect various data such as growth method, growth rate data, yield data, depending on classification of plants. Based on those data, plant factories are able to plan production according to the balance between supply and demand in markets, and supply plants such as vegetables without depending on surrounding conditions such as climatic environment. Particularly, an increase in food production is indispensable with world population growth. If plants can be systematically produced without the influence by surrounding conditions such as climatic environment, vegetables produced in plant factories can be stably supplied within a country, and additionally can be exported abroad as viable products.", @@ -222,6 +235,7 @@ "$ref": "#/texts/8" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "In general, vegetables that are grown outdoors get sunlight, grow while conducting photosynthesis, and are gathered. On the other hand, vegetables that are grown in plant factories are required to be harvested in a short period of time, or are required to grow in larger than normal sizes even in an ordinary growth period.", @@ -233,6 +247,7 @@ "$ref": "#/texts/8" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "In plant factories, the light source used in place of sunlight affect a growth period, growth of plants. LED lighting is being used in place of conventional fluorescent lamps, from a standpoint of power consumption reduction.", @@ -244,6 +259,7 @@ "$ref": "#/texts/8" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "For example, Japanese Unexamined Patent Publication No. 2009-125007 discloses a plant growth method. In this method, the plants is irradiated with light emitted from a first LED light emitting element and/or a second LED light emitting element at predetermined timings using a lighting apparatus including the first LED light emitting element emitting light having a wavelength region of 625 to 690 nm and the second LED light emitting element emitting light having a wavelength region of 420 to 490 nm in order to emit lights having sufficient intensities and different wavelengths from each other.", @@ -274,6 +290,7 @@ "$ref": "#/texts/20" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "SUMMARY", @@ -286,6 +303,7 @@ "$ref": "#/texts/14" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "However, even though plants are merely irradiated with lights having different wavelengths as in the plant growth method disclosed in Japanese Unexamined Patent Publication No. 2009-125007, the effect of promoting plant growth is not sufficient. Further improvement is required in promotion of plant growth.", @@ -297,6 +315,7 @@ "$ref": "#/texts/14" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Accordingly, an object of the present disclosure is to provide a light emitting device capable of promoting growth of plants and a plant cultivation method.", @@ -308,6 +327,7 @@ "$ref": "#/texts/14" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Means for solving the above problems are as follows, and the present disclosure includes the following embodiments.", @@ -319,6 +339,7 @@ "$ref": "#/texts/14" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "A first embodiment of the present disclosure is a light emitting device including a light emitting element having a light emission peak wavelength in a range of 380 nm or more and 490 nm or less, and a fluorescent material that is excited by light from the light emitting element and emits light having at least one light emission peak wavelength in a range of 580 nm or more and less than 680 nm. The light emitting device emits light having a ratio R/B of a photon flux density R to a photon flux density B within a range of 2.0 or more and 4.0 or less, and a ratio R/FR of a photon flux density R to a photon flux density FR within a range of 0.7 or more and 13.0 or less, where the photon flux density R is the number of light quanta (\u03bcmol\u00b7m\u207b\u00b2\u00b7g\u207b\u00b9) incident per unit time and unit area in a wavelength range of 620 nm or more and less than 700 nm, the photon flux density B is the number of light quanta (\u03bcmol\u00b7m\u207b\u00b2\u00b7g\u207b\u00b9) incident per unit time and unit area in a wavelength range of 380 nm or more and 490 nm or less, and the photon flux density FR is the number of light quanta (\u03bcmol\u00b7m\u207b\u00b2\u00b7g\u207b\u00b9) incident per unit time and unit area in a wavelength range of 700 nm or more and 780 nm or less.", @@ -330,6 +351,7 @@ "$ref": "#/texts/14" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "A second embodiment of the present disclosure is a plant cultivation method including irradiating plants with light from the light emitting device.", @@ -341,6 +363,7 @@ "$ref": "#/texts/14" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "According to embodiments of the present disclosure, a light emitting device capable of promoting growth of plants and a plant cultivation method can be provided.", @@ -365,6 +388,7 @@ "$ref": "#/texts/25" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "BRIEF DESCRIPTION OF THE DRAWINGS", @@ -377,6 +401,7 @@ "$ref": "#/texts/21" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "FIG. 1 is a schematic cross sectional view of a light emitting device according to an embodiment of the present disclosure.", @@ -388,6 +413,7 @@ "$ref": "#/texts/21" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "FIG. 2 is a diagram showing spectra of wavelengths and relative photon flux densities of exemplary light emitting devices according to embodiments of the present disclosure and a comparative light emitting devices.", @@ -399,6 +425,7 @@ "$ref": "#/texts/21" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "FIG. 3 is a graph showing fresh weight (edible part) at the harvest time of each plant grown by irradiating the plant with light from exemplary light emitting devices according to embodiments of the present disclosure and a comparative light emitting device.", @@ -410,6 +437,7 @@ "$ref": "#/texts/21" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "FIG. 4 is a graph showing nitrate nitrogen content in each plant grown by irradiating the plant with light from exemplary light emitting devices according to embodiments of the present disclosure and a comparative light emitting device.", @@ -458,6 +486,7 @@ "$ref": "#/texts/126" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "DETAILED DESCRIPTION", @@ -470,6 +499,7 @@ "$ref": "#/texts/26" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "A light emitting device and a plant cultivation method according to the present invention will be described below based on an embodiment. However, the embodiment described below only exemplifies the technical concept of the present invention, and the present invention is not limited to the light emitting device and plant cultivation method described below. In the present specification, the relationship between the color name and the chromaticity coordinate, the relationship between the wavelength range of light and the color name of monochromatic light follows JIS Z8110.", @@ -524,6 +554,7 @@ "$ref": "#/texts/42" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Light Emitting Device", @@ -536,6 +567,7 @@ "$ref": "#/texts/28" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "An embodiment of the present disclosure is a light emitting device including a light emitting element having a light emission peak wavelength in a range of 380 nm or more and 490 nm or less (hereinafter sometimes referred to as a \u201cregion of from near ultraviolet to blue color\u201d), and a first fluorescent material emitting light having at least one light emission peak wavelength in a range of 580 nm or more and less than 680 nm by being excited by light from the light emitting element. The light emitting device emits light having a ratio R/B of a photon flux density R to a photon flux density B within a range of 2.0 or more and 4.0 or less, and a ratio R/FR of the photon flux density R to a photon flux density FR within a range of 0.7 or more and 13.0 or less, where the photon flux density R is the number of light quanta (\u03bcmol\u00b7m\u207b\u00b2\u00b7g\u207b\u00b9) incident per unit time and unit area in a wavelength range of 620 nm or more and less than 700 nm, the photon flux density B is the number of light quanta (\u03bcmol\u00b7m\u207b\u00b2\u00b7g\u207b\u00b9) incident per unit time and unit area in a wavelength range of 380 nm or more and 490 nm or less, and the photon flux density FR is the number of light quanta (\u03bcmol\u00b7m\u207b\u00b2\u00b7g\u207b\u00b9) incident per unit time and unit area in a wavelength range of 700 nm or more and 780 nm or less.", @@ -547,6 +579,7 @@ "$ref": "#/texts/28" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "An example of the light emitting device according to one embodiment of the present disclosure is described below based on the drawings. FIG. 1 is a schematic cross sectional view showing a light emitting device 100 according to an embodiment of the present disclosure.", @@ -558,6 +591,7 @@ "$ref": "#/texts/28" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "The light emitting device 100 includes a molded article 40, a light emitting element 10 and a fluorescent member 50, as shown in FIG. 1. The molded article 40 includes a first lead 20 and a second lead 30 that are integrally molded with a resin portion 42 containing a thermoplastic resin or a thermosetting resin. The molded article 40 forms a depression having a bottom and sides, and the light emitting element 10 is placed on the bottom of the depression. The light emitting element 10 has a pair of an anode and a cathode, and the anode and the cathode are electrically connected to the first lead 20 and the second lead 30 respectively through the respective wires 60. The light emitting element 10 is covered with the fluorescent member 50. The fluorescent member 50 includes, for example, a fluorescent material 70 performing wavelength conversion of light from the light emitting element 10, and a resin. The fluorescent material 70 includes a first fluorescent material 71 and a second fluorescent material 72. A part of the first lead 20 and the second lead 30 that are connected to a pair of the anode and the cathode of the light emitting element 10 is exposed toward outside a package constituting the light emitting element 100. The light emitting device 100 can emit light by receiving electric power supply from the outside through the first lead 20 and the second lead 30.", @@ -569,6 +603,7 @@ "$ref": "#/texts/28" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "The fluorescent member 50 not only performs wavelength conversion of light emitted from the light emitting element 10, but functions as a member for protecting the light emitting element 10 from the external environment. In FIG. 1, the fluorescent material 70 is localized in the fluorescent member 50 in the state that the first fluorescent material 71 and the second fluorescent material 72 are mixed with each other, and is arranged adjacent to the light emitting element 10. This constitution can efficiently perform the wavelength conversion of light from the light emitting element 10 in the fluorescent material 70, and as a result, can provide a light emitting device having excellent light emission efficiency. The arrangement of the fluorescent member 50 containing the fluorescent material 70, and the light emitting element 10 is not limited to the embodiment that the fluorescent material 70 is arranged adjacent to the light emitting element 10 as shown in FIG. 1, and considering the influence of heat generated from the light emitting element 10, the fluorescent material 70 can be arranged separated from the light emitting element 10 in the fluorescent member 50. Furthermore, light having suppressed color unevenness can be emitted from the light emitting device 100 by arranging the fluorescent material 70 almost evenly in the fluorescent member 50. In FIG. 1, the fluorescent material 70 is arranged in the state that the first fluorescent material 71 and the second fluorescent material 72 are mixed with each other. However, for example, the first fluorescent material 71 may be arranged in a layer state and the second fluorescent material 72 may be arranged thereon in another layer state. Alternatively, the second fluorescent material 72 may be arranged in a layer state and the first fluorescent material 71 may be arranged thereon in another layer state.", @@ -580,6 +615,7 @@ "$ref": "#/texts/28" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "The light emitting device 100 includes the first fluorescent material 71 having at least one light emission peak wavelength in a range of 580 nm or more and less than 680 nm by being excited by light from the light emitting element 10, and preferably further includes the second fluorescent material 72 having at least one light emission peak wavelength in a range of 680 nm or more and 800 nm or less by being excited by light from the light emitting element 10.", @@ -591,6 +627,7 @@ "$ref": "#/texts/28" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "The first fluorescent material 71 and the second fluorescent material 72 are contained in, for example, the fluorescent member 50 covering the light emitting element 10. The light emitting device 100 in which the light emitting element 10 has been covered with the fluorescent member 50 containing the first fluorescent material 71 and the second fluorescent material 72 emits light having at least one light emission peak wavelength in a range of 580 nm or more and less than 680 nm by a part of light emission of the light emitting element 10 that is absorbed in the first fluorescent material 71. Furthermore, the light emitting device 100 emits light having at least one light emission peak wavelength in a range of 680 nm or more and 800 nm or less by a part of light emission of the light emitting element 10 that is absorbed in the second fluorescent material 72.", @@ -602,6 +639,7 @@ "$ref": "#/texts/28" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Plants grow when a pigment (chlorophyll a and chlorophyll b) present in chlorophyll thereof absorbs light and additionally takes carbon dioxide gas and water therein, and these are converted to carbohydrates (saccharides) by photosynthesis. Chlorophyll a and chlorophyll b used in growth promotion of plants particularly have absorption peaks in a red region of 625 nm or more and 675 nm or less and a blue region of 425 nm or more and 475 nm or less. The action of photosynthesis by chlorophylls of plants mainly occurs in a wavelength range of 400 nm or more and 700 nm or less, but chlorophyll a and chlorophyll b further have local absorption peaks in a region of 700 nm or more and 800 nm or less.", @@ -613,6 +651,7 @@ "$ref": "#/texts/28" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "For example, when plants are irradiated with light having longer wavelength than and absorption peak (in the vicinity of 680 nm) in a red region of chlorophyll a, a phenomenon called red drop, in which activity of photosynthesis rapidly decreases, occurs. However, it is known that when plants are irradiated with light containing near infrared region together with light of red region, photosynthesis is accelerated by a synergistic effect of those two kinds of lights. This phenomenon is called the Emerson effect.", @@ -624,6 +663,7 @@ "$ref": "#/texts/28" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Intensity of light with which plants are irradiated is represented by photon flux density. The photon flux density (\u03bcmol\u00b7m\u207b\u00b2\u00b7s\u207b\u00b9) is the number of photons reaching a unit area per unit time. The amount of photosynthesis depends on the number of photons, and therefore does not depend on other optical characteristics if the photon flux density is the same. However, wavelength dependency activating photosynthesis differs depending on photosynthetic pigment. Intensity of light necessary for photosynthesis of plants is sometimes represented by Photosynthetic Photon Flux Density (PPFD).", @@ -635,6 +675,7 @@ "$ref": "#/texts/28" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "The light emitting device 100 emits light having a ratio R/B of a photon flux density R to a photon flux density B within a range of 2.0 or more and 4.0 or less, and a ratio R/FR of the photon flux density R to a photon flux density FR within a range of 0.7 or more and 13.0 or less, where the photon flux density R is the number of light quanta (\u03bcmol\u00b7m\u207b\u00b2\u00b7g\u207b\u00b9) incident per unit time and unit area in a wavelength range of 620 nm or more and less than 700 nm, the photon flux density B is the number of light quanta (\u03bcmol\u00b7m\u207b\u00b2\u00b7g\u207b\u00b9) incident per unit time and unit area in a wavelength range of 380 nm or more and 490 nm or less, and the photon flux density FR is the number of light quanta (\u03bcmol\u00b7m\u207b\u00b2\u00b7g\u207b\u00b9) incident per unit time and unit area in a wavelength range of 700 nm or more and 780 nm or less.", @@ -646,6 +687,7 @@ "$ref": "#/texts/28" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "It is estimated that in plants, which are irradiated with light containing the photon flux density FR from the light emitting device 100, photosynthesis is activated by Emerson effect, and as a result, growth of plants can be promoted. Furthermore, when plants are irradiated with light containing the photon flux density FR, growth of the plants can be promoted by a reversible reaction between red light irradiation, to which chlorophyll as chromoprotein contained in plants has participated, and far infrared light irradiation.", @@ -657,6 +699,7 @@ "$ref": "#/texts/28" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Examples of nutrients necessary for growth of plants include nitrogen, phosphoric acid, and potassium. Of those nutrients, nitrogen is absorbed in plants as nitrate nitrogen (nitrate ion: NO\u2083\u207b). The nitrate nitrogen changes into nitrite ion (NO\u2082\u207b) by a reduction reaction, and when the nitrite ion is further reacted with fatty acid amine, nitrosoamine is formed. It is known that nitrite ion acts to hemoglobin in blood, and it is known that a nitroso compound sometimes affects health of a human body. Mechanism of converting nitrate nitrogen into nitrite ion in vivo is complicated, and the relationship between the amount of intake of nitrate nitrogen and the influence to health of a human body is not clarified. However, it is desired that the content of nitrate nitrogen having a possibility of affecting health of a human body is smaller.", @@ -668,6 +711,7 @@ "$ref": "#/texts/28" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "For the above reasons, nitrogen is one of nutrients necessary for growth of plants, but it is preferred that the content of nitrate nitrogen in food plants be reduced to a range that does not disturb the growth of plants.", @@ -679,6 +723,7 @@ "$ref": "#/texts/28" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "It is preferred that the light emitting device 100 further include the second fluorescent material 72 having at least one light emission peak wavelength in a range of 680 nm or more and 800 nm or less by being excited by light from the light emitting element 10, wherein the R/FR ratio is within a range of 0.7 or more and 5.0 or less. The R/FR ratio is more preferably within a range of 0.7 or more and 2.0 or less.", @@ -700,6 +745,7 @@ "$ref": "#/texts/46" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Light Emitting Element", @@ -712,6 +758,7 @@ "$ref": "#/texts/43" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "The light emitting element 10 is used as an excitation light source, and is a light emitting element emitting light having a light emission peak wavelength in a range of 380 nm or more and 490 nm or less. As a result, a stable light emitting device having high efficiency, high linearity of output to input and strong mechanical impacts can be obtained.", @@ -723,6 +770,7 @@ "$ref": "#/texts/43" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "The range of the light emission peak wavelength of the light emitting element 10 is preferably in a range of 390 nm or more and 480 nm or less, more preferably in a range of 420 nm or more and 470 nm or less, and still more preferably in a range of 440 nm or more and 460 nm or less, and particularly preferably in a range of 445 nm or more and 455 nm or less. A light emitting element including a nitride semiconductor (In\u2093AlyGa\u2081\u208b\u2093\u208byN, 0\u2266X, 0\u2266Y and X+Y\u22661) is preferably used as the light emitting element 10.", @@ -734,6 +782,7 @@ "$ref": "#/texts/43" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "The half value width of emission spectrum of the light emitting element 10 can be, for example, 30 nm or less.", @@ -755,6 +804,7 @@ "$ref": "#/texts/50" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Fluorescent Member", @@ -767,6 +817,7 @@ "$ref": "#/texts/47" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "The fluorescent member 50 used in the light emitting device 100 preferably includes the first fluorescent material 71 and a sealing material, and more preferably further includes the second fluorescent material 72. A thermoplastic resin and a thermosetting resin can be used as the sealing material. The fluorescent member 50 may contain other components such as a filler, a light stabilizer and a colorant, in addition to the fluorescent material and the sealing material. Examples of the filler include silica, barium titanate, titanium oxide and aluminum oxide.", @@ -778,6 +829,7 @@ "$ref": "#/texts/47" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "The content of other components other than the fluorescent material 70 and the sealing material in the fluorescent member 50 is preferably in a range of 0.01 parts by mass or more and 20 parts by mass or less, per 100 parts by mass of the sealing material.", @@ -789,6 +841,7 @@ "$ref": "#/texts/47" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "The total content of the fluorescent material 70 in the fluorescent member 50 can be, for example, 5 parts by mass or more and 300 parts by mass or less, per 100 parts by mass of the sealing material. The total content is preferably 10 parts by mass or more and 250 parts by mass or less, more preferably 15 parts by mass or more and 230 parts by mass or less, and still more preferably 15 parts by mass or more and 200 parts by mass or less. When the total content of the fluorescent material 70 in the fluorescent member 50 is within the above range, the light emitted from the light emitting element 10 can be efficiently subjected to wavelength conversion in the fluorescent material 70.", @@ -855,6 +908,7 @@ "$ref": "#/texts/69" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "First Fluorescent Material", @@ -867,6 +921,7 @@ "$ref": "#/texts/51" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "The first fluorescent material 71 is a fluorescent material that is excited by light from the light emitting element 10 and emits light having at least one light emission peak wavelength in a range of 580 nm or more and less than 680 nm. Examples of the first fluorescent material 71 include an Mn\u2074\u207a-activated fluorogermanate fluorescent material, an Eu\u00b2\u207a-activated nitride fluorescent material, an Eu\u00b2\u207a-activated alkaline earth sulfide fluorescent material and an Mn\u2074\u207a-activated halide fluorescent material. The first fluorescent material 71 may use one selected from those fluorescent materials and may use a combination of two or more thereof. The first fluorescent material preferably contains an Eu\u00b2\u207a-activated nitride fluorescent material and an Mn\u2074\u207a-activated fluorogermanate fluorescent material.", @@ -878,6 +933,7 @@ "$ref": "#/texts/51" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "The Eu\u00b2\u207a-activated nitride fluorescent material is preferably a fluorescent material that has a composition including at least one element selected from Sr and Ca, and Al and contains silicon nitride that is activated by Eu\u00b2\u207a, or a fluorescent material that has a composition including at least one element selected from the group consisting of alkaline earth metal elements and at least one element selected from the group consisting of alkali metal elements and contains aluminum nitride that is activated by Eu\u00b2\u207a.", @@ -889,6 +945,7 @@ "$ref": "#/texts/51" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "The halide fluorescent material that is activated by Mn\u2074\u207a is preferably a fluorescent material that has a composition including at least one element or ion selected from the group consisting of alkali metal elements and an ammonium ion (NH\u2074\u207a) and at least one element selected from the group consisting of Group 4 elements and Group 14 elements and contains a fluoride that is activated by Mn\u2074\u207a.", @@ -900,6 +957,7 @@ "$ref": "#/texts/51" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Examples of the first fluorescent material 71 specifically include fluorescent materials having any one composition of the following formulae (I) to (VI).", @@ -911,6 +969,7 @@ "$ref": "#/texts/51" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "(i\u2212j)MgO.(j/2)Sc\u2082O\u2083.kMgF\u2082.mCaF\u2082.(1\u2212n)GeO\u2082.(n/2)Mt\u2082O\u2083:zMn\u2074\u207a (I)", @@ -922,6 +981,7 @@ "$ref": "#/texts/51" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "wherein Mt is at least one selected from the group consisting of Al, Ga, and In, and j, k, m, n, and z are numbers satisfying 2\u2266i\u22664, 0\u2266j<0.5, 00.3) and higher rainfall (>700 mm per year) contribute to expansion of vector habitats and population. Additionally, having more than five rounds of MDA before pre-TAS was also statistically significantly associated with higher failure in the bivariate analysis. It is unclear why higher number of rounds is associated with first pre-TAS failure given that other research has shown the opposite [15,16].", @@ -944,6 +995,7 @@ "$ref": "#/texts/43" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "All other variables included in this analysis were not significantly associated with pre-TAS failure in our analysis. Goldberg et al. found Brugia spp. to be significantly associated with failure, but our results did not. This is likely due in part to the small number of districts with Brugia spp. in our dataset (6%) compared to 46% in the Goldberg et al. article [7]. MDA coverage levels were not significantly associated with pre-TAS failure, likely due to the lack of variance in the coverage data since WHO guidance dictates a minimum of five rounds of MDA with \u226565% epidemiological coverage to be eligible to implement pre-TAS. It should not be interpreted as evidence that high MDA coverage levels are not necessary to lower prevalence.", @@ -955,6 +1007,7 @@ "$ref": "#/texts/43" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "Limitations to this study include data sources, excluded data, unreported data, misassigned data, and aggregation of results at the district level. The main data sources for this analysis were programmatic data, which may be less accurate than data collected specifically for research purposes. This is particularly true of the MDA coverage data, where some countries report data quality challenges in areas of instability or frequent population migration. Even though risk factors such as age, sex, compliance with MDA, and use of bednets have been shown to influence infection in individuals [40,48\u201350], we could not include factors from the human host domain in our analysis, as data sets were aggregated at site level and did not include individual information. In addition, vector control data were not universally available across the 13 countries and thus were not included in the analysis, despite studies showing that vector control has an impact on reducing LF prevalence [41,48,51\u201353].", @@ -966,6 +1019,7 @@ "$ref": "#/texts/43" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "Fourteen districts were excluded from the analysis because we were not able to obtain complete data for baseline prevalence, MDA coverage, or geographic boundaries. One of these districts had failed pre-TAS. It is likely these exclusions had minimal impact on the conclusions, as they represented a small number of districts and were similar to other included districts in terms of key variables. Unreported data could have occurred if a country conducted a pre-TAS that failed and then chose not to report it or reported it as a mid-term survey instead. Anecdotally, we know this has occurred occasionally, but we do not believe the practice to be widespread. Another limitation in the analysis is a potential misassignment of key variable values to a district due to changes in the district over time. Redistricting, changes in district size or composition, was pervasive in many countries during the study period; however, we expect the impact on the study outcome to be minimal, as the historical prevalence and MDA data from the \u201cmother\u201d districts are usually flowed down to these new \u201cdaughter\u201d districts. However, it is possible that the split created an area of higher prevalence or lower MDA coverage than would have been found on average in the overall larger original \u201cmother\u201d district. Finally, the aggregation or averaging of results to the district level may mask heterogeneity within districts. Though this impact could be substantial in districts with considerable heterogeneity, the use of median values and binomial variables mitigated the likelihood of skewing the data to extreme outliners in a district.", @@ -977,6 +1031,7 @@ "$ref": "#/texts/43" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "As this analysis used data across a variety of countries and epidemiological situations, the results are likely relevant for other districts in the countries examined and in countries with similar epidemiological backgrounds. In general, as more data become available at site level through the increased use of electronic data collection tools, further analysis of geospatial variables and associations will be possible. For example, with the availability of GPS coordinates, it may become possible to analyze outcomes by site and to link the geospatial environmental domain variables at a smaller scale. Future analyses also might seek to include information from coverage surveys or qualitative research studies on vector control interventions such as bed net usage, MDA compliance, population movement, and sub-populations that might be missed during MDA. Future pre-TAS using electronic data collection could include sex and age of individuals included in the survey.", @@ -988,6 +1043,7 @@ "$ref": "#/texts/43" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "This paper provides evidence from analysis of 554 districts and 13 countries on the factors associated with pre-TAS results. Baseline prevalence, elevation, vector, population density, EVI, rainfall, and number of MDA rounds were all significant in either bivariate or multivariate analyses. This information along with knowledge of local context can help countries more effectively plan pre-TAS and forecast program activities, such as the potential need for more than five rounds of MDA in areas with high baseline and/or low elevation.", @@ -1006,6 +1062,7 @@ "$ref": "#/tables/1" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Tables", @@ -1018,6 +1075,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Table 1: Categorization of potential factors influencing pre-TAS results.", @@ -1029,6 +1087,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Table 2: Adjusted risk ratios for pre-TAS failure from log-binomial model sensitivity analysis.", @@ -1056,6 +1115,7 @@ "$ref": "#/pictures/4" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Figures", @@ -1068,6 +1128,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Fig 1: Number of pre-TAS by country.", @@ -1079,6 +1140,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Fig 2: District-level baseline prevalence by country.", @@ -1090,6 +1152,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Fig 3: Percent pre-TAS failure by each characteristic (unadjusted).", @@ -1101,6 +1164,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Fig 4: Adjusted risk ratios for pre-TAS failure with 95% Confidence Interval from log-binomial model.", @@ -1112,6 +1176,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Fig 5: Analysis of failures by model combinations.", @@ -1127,6 +1192,7 @@ "$ref": "#/groups/0" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "References", @@ -1139,6 +1205,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "World Health Organization. Lymphatic filariasis: progress report 2000\u20132009 and strategic plan 2010\u20132020. Geneva; 2010. ", @@ -1152,6 +1219,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "World Health Organization. Validation of elimination of lymphatic filariasis as a public health problem. Geneva; 2017. ", @@ -1165,6 +1233,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Global programme to eliminate lymphatic filariasis: progress report, 2018. Wkly Epidemiol Rec (2019)", @@ -1178,6 +1247,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "World Health Organization. Global programme to eliminate lymphatic filariasis: monitoring and epidemiological assessment of mass drug administration. Geneva; 2011. ", @@ -1191,6 +1261,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "World Health Organization. Strengthening the assessment of lymphatic filariasis transmission and documenting the achievement of elimination\u2014Meeting of the Neglected Tropical Diseases Strategic and Technical Advisory Group\u2019s Monitoring and Evaluation Subgroup on Disease-specific Indicators. 2016; 42. ", @@ -1204,6 +1275,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Kyelem D; Biswas G; Bockarie MJ; Bradley MH; El-Setouhy M; Fischer PU. Determinants of success in national programs to eliminate lymphatic filariasis: a perspective identifying essential elements and research needs. Am J Trop Med Hyg (2008)", @@ -1217,6 +1289,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Goldberg EM; King JD; Mupfasoni D; Kwong K; Hay SI; Pigott DM. Ecological and socioeconomic predictors of transmission assessment survey failure for lymphatic filariasis. Am J Trop Med Hyg (2019)", @@ -1230,6 +1303,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Cano J; Rebollo MP; Golding N; Pullan RL; Crellen T; Soler A. The global distribution and transmission limits of lymphatic filariasis: past and present. Parasites and Vectors (2014)", @@ -1243,6 +1317,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "CGIAR-CSI. CGIAR-CSI SRTM 90m DEM Digital Elevation Database. In: . ", @@ -1256,6 +1331,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "USGS NASA. Vegetation indices 16-DAy L3 global 500 MOD13A1 dataset [Internet]. [cited 1 May 2018]. Available: . ", @@ -1269,6 +1345,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Funk C; Peterson P; Landsfeld M; Pedreros D; Verdin J; Shukla S. The climate hazards infrared precipitation with stations\u2014A new environmental record for monitoring extremes. Sci Data (2015)", @@ -1282,6 +1359,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Lloyd CT; Sorichetta A; Tatem AJ. High resolution global gridded data for use in population studies. Sci Data (2017)", @@ -1295,6 +1373,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Elvidge CD; Baugh KE; Zhizhin M; Hsu F-C. Why VIIRS data are superior to DMSP for mapping nighttime lights. Proc Asia-Pacific Adv Netw (2013)", @@ -1308,6 +1387,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Jambulingam P; Subramanian S; De Vlas SJ; Vinubala C; Stolk WA. Mathematical modelling of lymphatic filariasis elimination programmes in India: required duration of mass drug administration and post-treatment level of infection indicators. Parasites and Vectors (2016)", @@ -1321,6 +1401,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Michael E; Malecela-Lazaro MN; Simonsen PE; Pedersen EM; Barker G; Kumar A. Mathematical modelling and the control of lymphatic filariasis. Lancet Infect Dis (2004)", @@ -1334,6 +1415,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Stolk WA; Swaminathan S; van Oortmarssen GJ; Das PK; Habbema JDF. Prospects for elimination of bancroftian filariasis by mass drug treatment in Pondicherry, India: a simulation study. J Infect Dis (2003)", @@ -1347,6 +1429,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Grady CA; De Rochars MB; Direny AN; Orelus JN; Wendt J; Radday J. Endpoints for lymphatic filariasis programs. Emerg Infect Dis (2007)", @@ -1360,6 +1443,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Evans D; McFarland D; Adamani W; Eigege A; Miri E; Schulz J. Cost-effectiveness of triple drug administration (TDA) with praziquantel, ivermectin and albendazole for the prevention of neglected tropical diseases in Nigeria. Ann Trop Med Parasitol (2011)", @@ -1373,6 +1457,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Richards FO; Eigege A; Miri ES; Kal A; Umaru J; Pam D. Epidemiological and entomological evaluations after six years or more of mass drug administration for lymphatic filariasis elimination in Nigeria. PLoS Negl Trop Dis (2011)", @@ -1386,6 +1471,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Biritwum NK; Yikpotey P; Marfo BK; Odoom S; Mensah EO; Asiedu O. Persistent \u201chotspots\u201d of lymphatic filariasis microfilaraemia despite 14 years of mass drug administration in Ghana. Trans R Soc Trop Med Hyg (2016)", @@ -1399,6 +1485,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Moraga P; Cano J; Baggaley RF; Gyapong JO; Njenga SM; Nikolay B. Modelling the distribution and transmission intensity of lymphatic filariasis in sub-Saharan Africa prior to scaling up interventions: integrated use of geostatistical and mathematical modelling. Parasites and Vectors (2015)", @@ -1412,6 +1499,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Irvine MA; Njenga SM; Gunawardena S; Wamae CN; Cano J; Brooker SJ. Understanding the relationship between prevalence of microfilariae and antigenaemia using a model of lymphatic filariasis infection. Trans R Soc Trop Med Hyg (2016)", @@ -1425,6 +1513,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Ottesen EA. Efficacy of diethylcarbamazine in eradicating infection with lymphatic-dwelling filariae in humans. Rev Infect Dis (1985)", @@ -1438,6 +1527,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Gambhir M; Bockarie M; Tisch D; Kazura J; Remais J; Spear R. Geographic and ecologic heterogeneity in elimination thresholds for the major vector-borne helminthic disease, lymphatic filariasis. BMC Biol (2010)", @@ -1451,6 +1541,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "World Health Organization. Global programme to eliminate lymphatic filariasis: practical entomology handbook. Geneva; 2013. ", @@ -1464,6 +1555,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Slater H; Michael E. Predicting the current and future potential distributions of lymphatic filariasis in Africa using maximum entropy ecological niche modelling. PLoS One (2012)", @@ -1477,6 +1569,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Slater H; Michael E. Mapping, Bayesian geostatistical analysis and spatial prediction of lymphatic filariasis prevalence in Africa. PLoS One (2013)", @@ -1490,6 +1583,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Sabesan S; Raju KHK; Subramanian S; Srivastava PK; Jambulingam P. Lymphatic filariasis transmission risk map of India, based on a geo-environmental risk model. Vector-Borne Zoonotic Dis (2013)", @@ -1503,6 +1597,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Stanton MC; Molyneux DH; Kyelem D; Bougma RW; Koudou BG; Kelly-Hope LA. Baseline drivers of lymphatic filariasis in Burkina Faso. Geospat Health (2013)", @@ -1516,6 +1611,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Manhenje I; Teresa Gal\u00e1n-Puchades M; Fuentes M V. Socio-environmental variables and transmission risk of lymphatic filariasis in central and northern Mozambique. Geospat Health (2013)", @@ -1529,6 +1625,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Ngwira BM; Tambala P; Perez a M; Bowie C; Molyneux DH. The geographical distribution of lymphatic filariasis infection in Malawi. Filaria J (2007)", @@ -1542,6 +1639,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Simonsen PE; Mwakitalu ME. Urban lymphatic filariasis. Parasitol Res (2013)", @@ -1555,6 +1653,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Proville J; Zavala-Araiza D; Wagner G. Night-time lights: a global, long term look at links to socio-economic trends. PLoS One (2017)", @@ -1568,6 +1667,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Endeshaw T; Taye A; Tadesse Z; Katabarwa MN; Shafi O; Seid T. Presence of Wuchereria bancrofti microfilaremia despite seven years of annual ivermectin monotherapy mass drug administration for onchocerciasis control: a study in north-west Ethiopia. Pathog Glob Health (2015)", @@ -1581,6 +1681,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Richards FO; Eigege A; Pam D; Kal A; Lenhart A; Oneyka JOA. Mass ivermectin treatment for onchocerciasis: lack of evidence for collateral impact on transmission of Wuchereria bancrofti in areas of co-endemicity. Filaria J (2005)", @@ -1594,6 +1695,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Kyelem D; Sanou S; Boatin B a; Medlock J; Couibaly S; Molyneux DH. Impact of long-term ivermectin (Mectizan) on Wuchereria bancrofti and Mansonella perstans infections in Burkina Faso: strategic and policy implications. Ann Trop Med Parasitol (2003)", @@ -1607,6 +1709,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Weil GJ; Lammie PJ; Richards FO; Eberhard ML. Changes in circulating parasite antigen levels after treatment of bancroftian filariasis with diethylcarbamazine and ivermectin. J Infect Dis (1991)", @@ -1620,6 +1723,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Kumar A; Sachan P. Measuring impact on filarial infection status in a community study: role of coverage of mass drug administration. Trop Biomed (2014)", @@ -1633,6 +1737,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Njenga SM; Mwandawiro CS; Wamae CN; Mukoko DA; Omar AA; Shimada M. Sustained reduction in prevalence of lymphatic filariasis infection in spite of missed rounds of mass drug administration in an area under mosquito nets for malaria control. Parasites and Vectors (2011)", @@ -1646,6 +1751,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Boyd A; Won KY; McClintock SK; Donovan C V; Laney SJ; Williams SA. A community-based study of factors associated with continuing transmission of lymphatic filariasis in Leogane, Haiti. PLoS Negl Trop Dis (2010)", @@ -1659,6 +1765,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Irvine MA; Reimer LJ; Njenga SM; Gunawardena S; Kelly-Hope L; Bockarie M. Modelling strategies to break transmission of lymphatic filariasis\u2014aggregation, adherence and vector competence greatly alter elimination. Parasites and Vectors (2015)", @@ -1672,6 +1779,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Irvine MA; Stolk WA; Smith ME; Subramanian S; Singh BK; Weil GJ. Effectiveness of a triple-drug regimen for global elimination of lymphatic filariasis: a modelling study. Lancet Infect Dis (2017)", @@ -1685,6 +1793,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Pion SD; Montavon C; Chesnais CB; Kamgno J; Wanji S; Klion AD. Positivity of antigen tests used for diagnosis of lymphatic filariasis in individuals without Wuchereria bancrofti infection but with high loa loa microfilaremia. Am J Trop Med Hyg (2016)", @@ -1698,6 +1807,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Wanji S; Esum ME; Njouendou AJ; Mbeng AA; Chounna Ndongmo PW; Abong RA. Mapping of lymphatic filariasis in loiasis areas: a new strategy shows no evidence for Wuchereria bancrofti endemicity in Cameroon. PLoS Negl Trop Dis (2018)", @@ -1711,6 +1821,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Chesnais CB; Awaca-Uvon NP; Bolay FK; Boussinesq M; Fischer PU; Gankpala L. A multi-center field study of two point-of-care tests for circulating Wuchereria bancrofti antigenemia in Africa. PLoS Negl Trop Dis (2017)", @@ -1724,6 +1835,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Silumbwe A; Zulu JM; Halwindi H; Jacobs C; Zgambo J; Dambe R. A systematic review of factors that shape implementation of mass drug administration for lymphatic filariasis in sub-Saharan Africa. BMC Public Health (2017)", @@ -1737,6 +1849,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Adams AM; Vuckovic M; Birch E; Brant TA; Bialek S; Yoon D. Eliminating neglected tropical diseases in urban areas: a review of challenges, strategies and research directions for successful mass drug administration. Trop Med Infect Dis (2018)", @@ -1750,6 +1863,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Rao RU; Samarasekera SD; Nagodavithana KC; Dassanayaka TDM; Punchihewa MW; Ranasinghe USB. Reassessment of areas with persistent lymphatic filariasis nine years after cessation of mass drug administration in Sri Lanka. PLoS Negl Trop Dis (2017)", @@ -1763,6 +1877,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Xu Z; Graves PM; Lau CL; Clements A; Geard N; Glass K. GEOFIL: a spatially-explicit agent-based modelling framework for predicting the long-term transmission dynamics of lymphatic filariasis in American Samoa. Epidemics (2018)", @@ -1776,6 +1891,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Id CM; Tettevi EJ; Mechan F; Idun B; Biritwum N; Osei-atweneboana MY. Elimination within reach: a cross-sectional study highlighting the factors that contribute to persistent lymphatic filariasis in eight communities in rural Ghana. PLoS Negl Trop Dis (2019)", @@ -1789,6 +1905,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Eigege A; Kal A; Miri E; Sallau A; Umaru J; Mafuyai H. Long-lasting insecticidal nets are synergistic with mass drug administration for interruption of lymphatic filariasis transmission in Nigeria. PLoS Negl Trop Dis (2013)", @@ -1802,6 +1919,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Van den Berg H; Kelly-Hope LA; Lindsay SW. Malaria and lymphatic filariasis: The case for integrated vector management. Lancet Infect Dis (2013)", @@ -1815,6 +1933,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Webber R.. Eradication of Wuchereria bancrofti infection through vector control. Trans R Soc Trop Med Hyg (1979)", @@ -1830,6 +1949,7 @@ "$ref": "#/texts/56" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -1847,6 +1967,7 @@ "$ref": "#/texts/56" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -1864,6 +1985,7 @@ "$ref": "#/texts/56" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -1881,6 +2003,7 @@ "$ref": "#/texts/56" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -1898,6 +2021,7 @@ "$ref": "#/texts/56" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -1917,6 +2041,7 @@ "$ref": "#/texts/53" }, "children": [], + "content_layer": "body", "label": "table", "prov": [], "captions": [ @@ -5433,6 +5558,7 @@ "$ref": "#/texts/53" }, "children": [], + "content_layer": "body", "label": "table", "prov": [], "captions": [ diff --git a/tests/data/groundtruth/docling_v2/pone.0234687.xml.json b/tests/data/groundtruth/docling_v2/pone.0234687.xml.json index 08543140..d09422b0 100644 --- a/tests/data/groundtruth/docling_v2/pone.0234687.xml.json +++ b/tests/data/groundtruth/docling_v2/pone.0234687.xml.json @@ -10,6 +10,7 @@ "furniture": { "self_ref": "#/furniture", "children": [], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -47,6 +48,7 @@ "$ref": "#/texts/76" } ], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -286,6 +288,7 @@ "$ref": "#/texts/153" } ], + "content_layer": "body", "name": "list", "label": "list" } @@ -325,6 +328,7 @@ "$ref": "#/texts/77" } ], + "content_layer": "body", "label": "title", "prov": [], "orig": "Potential to reduce greenhouse gas emissions through different dairy cattle systems in subtropical regions", @@ -336,6 +340,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Ribeiro-Filho Henrique M. N.; 1: Department of Animal Science, University of California, Davis, California, United States of America, 2: Programa de P\u00f3s-gradua\u00e7\u00e3o em Ci\u00eancia Animal, Universidade do Estado de Santa Catarina, Lages, Santa Catarina, Brazil; Civiero Maur\u00edcio; 2: Programa de P\u00f3s-gradua\u00e7\u00e3o em Ci\u00eancia Animal, Universidade do Estado de Santa Catarina, Lages, Santa Catarina, Brazil; Kebreab Ermias; 1: Department of Animal Science, University of California, Davis, California, United States of America", @@ -351,6 +356,7 @@ "$ref": "#/texts/3" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Abstract", @@ -363,6 +369,7 @@ "$ref": "#/texts/2" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "Carbon (C) footprint of dairy production, expressed in kg C dioxide (CO2) equivalents (CO2e) (kg energy-corrected milk (ECM))-1, encompasses emissions from feed production, diet management and total product output. The proportion of pasture on diets may affect all these factors, mainly in subtropical climate zones, where cows may access tropical and temperate pastures during warm and cold seasons, respectively. The aim of the study was to assess the C footprint of a dairy system with annual tropical and temperate pastures in a subtropical region. The system boundary included all processes up to the animal farm gate. Feed requirement during the entire life of each cow was based on data recorded from Holstein \u00d7 Jersey cow herds producing an average of 7,000 kg ECM lactation-1. The milk production response as consequence of feed strategies (scenarios) was based on results from two experiments (warm and cold seasons) using lactating cows from the same herd. Three scenarios were evaluated: total mixed ration (TMR) ad libitum intake, 75, and 50% of ad libitum TMR intake with access to grazing either a tropical or temperate pasture during lactation periods. Considering IPCC and international literature values to estimate emissions from urine/dung, feed production and electricity, the C footprint was similar between scenarios, averaging 1.06 kg CO2e (kg ECM)-1. Considering factors from studies conducted in subtropical conditions and actual inputs for on-farm feed production, the C footprint decreased 0.04 kg CO2e (kg ECM)-1 in scenarios including pastures compared to ad libitum TMR. Regardless of factors considered, emissions from feed production decreased as the proportion of pasture went up. In conclusion, decreasing TMR intake and including pastures in dairy cow diets in subtropical conditions have the potential to maintain or reduce the C footprint to a small extent.", @@ -390,6 +397,7 @@ "$ref": "#/texts/9" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Introduction", @@ -402,6 +410,7 @@ "$ref": "#/texts/4" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "Greenhouse gas (GHG) emissions from livestock activities represent 10\u201312% of global emissions [1], ranging from 5.5\u20137.5 Gt CO2 equivalents (CO2e) yr-1, with almost 30% coming from dairy cattle production systems [2]. However, the livestock sector supply between 13 and 17% of calories and between 28 and 33% of human edible protein consumption globally [3]. Additionally, livestock produce more human-edible protein per unit area than crops when land is unsuitable for food crop production [4].", @@ -413,6 +422,7 @@ "$ref": "#/texts/4" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "Considering the key role of livestock systems in global food security, several technical and management interventions have been investigated to mitigate methane (CH4) emissions from enteric fermentation [5], animal management [6] and manure management [7]. CH4 emissions from enteric fermentation represents around 34% of total emissions from livestock sector, which is the largest source [2]. Increasing proportions of concentrate and digestibility of forages in the diet have been proposed as mitigation strategies [1,5]. In contrast, some life cycle assessment (LCA) studies of dairy systems in temperate regions [8\u201311] have identified that increasing concentrate proportion may increase carbon (C) footprint due to greater resource use and pollutants from the production of feed compared to forage. Thus, increasing pasture proportion on dairy cattle systems may be an alternative management to mitigate the C footprint.", @@ -424,6 +434,7 @@ "$ref": "#/texts/4" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "In subtropical climate zones, cows may graze tropical pastures rather than temperate pastures during the warm season [12]. Some important dairy production areas, such as southern Brazil, central to northern Argentina, Uruguay, South Africa, New Zealand and Australia, are located in these climate zones, having more than 900 million ha in native, permanent or temporary pastures, producing almost 20% of global milk production [13]. However, due to a considerable inter-annual variation in pasture growth rates [14,15], the interest in mixed systems, using total mixed ration (TMR) + pasture has been increasing [16]. Nevertheless, to our best knowledge, studies conducted to evaluate milk production response in dairy cow diets receiving TMR and pastures have only been conducted in temperate pastures and not in tropical pastures (e.g. [17\u201319]).", @@ -435,6 +446,7 @@ "$ref": "#/texts/4" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "It has been shown that dairy cows receiving TMR-based diets may not decrease milk production when supplemented with temperate pastures in a vegetative growth stage [18]. On the other hand, tropical pastures have lower organic matter digestibility and cows experience reduced dry matter (DM) intake and milk yield compared to temperate pastures [20,21]. A lower milk yield increases the C footprint intensity [22], offsetting an expected advantage through lower GHG emissions from crop and reduced DM intake.", @@ -446,6 +458,7 @@ "$ref": "#/texts/4" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "The aim of this work was to quantify the C footprint and land use of dairy systems using cows with a medium milk production potential in a subtropical region. The effect of replacing total mixed ration (TMR) with pastures during lactation periods was evaluated.", @@ -491,6 +504,7 @@ "$ref": "#/texts/41" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Materials and methods", @@ -503,6 +517,7 @@ "$ref": "#/texts/10" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "An LCA was developed according to the ISO standards [23,24] and Food and Agriculture Organization of the United Nations (FAO) Livestock Environmental Assessment Protocol guidelines [25]. All procedures were approved by the \u2018Comiss\u00e3o de \u00c9tica no Uso de Animais\u2019 (CEUA/UDESC) on September 15, 2016\u2014Approval number 4373090816 - https://www.udesc.br/cav/ceua.", @@ -518,6 +533,7 @@ "$ref": "#/texts/13" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "System boundary", @@ -530,6 +546,7 @@ "$ref": "#/texts/12" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "The goal of the study was to assess the C footprint of annual tropical and temperate pastures in lactating dairy cow diets. The production system was divided into four main processes: (i) animal husbandry, (ii) manure management and urine and dung deposited by grazing animals, (iii) production of feed ingredients and (iv) farm management (Fig 1). The study boundary included all processes up to the animal farm gate (cradle to gate), including secondary sources such as GHG emissions during the production of fuel, electricity, machinery, manufacturing of fertilizer, pesticides, seeds and plastic used in silage production. Fuel combustion and machinery (manufacture and repairs) for manure handling and electricity for milking and confinement were accounted as emissions from farm management. Emissions post milk production were assumed to be similar for all scenarios, therefore, activities including milk processing, distribution, retail or consumption were outside of the system boundary.", @@ -548,6 +565,7 @@ "$ref": "#/texts/16" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Functional unit", @@ -560,6 +578,7 @@ "$ref": "#/texts/14" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "The functional unit was one kilogram of energy-corrected milk (ECM) at the farm gate. All processes in the system were calculated based on one kilogram ECM. The ECM was calculated by multiplying milk production by the ratio of the energy content of the milk to the energy content of standard milk with 4% fat and 3.3% true protein according to NRC [20] as follows:", @@ -571,6 +590,7 @@ "$ref": "#/texts/14" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "ECM = Milk production \u00d7 (0.0929 \u00d7 fat% + 0.0588\u00d7 true protein% + 0.192) / (0.0929 \u00d7 (4%) + 0.0588 \u00d7 (3.3%) + 0.192), where fat% and protein% are fat and protein percentages in milk, respectively. The average milk production and composition were recorded from the University of Santa Catarina State (Brazil) herd, considering 165 lactations between 2009 and 2018. The herd is predominantly Holstein \u00d7 Jersey cows, with key characteristics described in Table 1.", @@ -589,6 +609,7 @@ "$ref": "#/texts/19" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Data sources and livestock system description", @@ -601,6 +622,7 @@ "$ref": "#/texts/17" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "The individual feed requirements, as well as the milk production responses based on feed strategies were based on data recorded from the herd described above and two experiments performed using lactating cows from the same herd. Due to the variation on herbage production throughout the year, feed requirements were estimated taking into consideration that livestock systems have a calving period in April, which represents the beginning of fall season in the southern Hemisphere. The experiments have shown a 10% reduction in ECM production in dairy cows that received both 75 and 50% of ad libitum TMR intake with access to grazing a tropical pasture (pearl-millet, Pennisetum glaucum \u2018Campeiro\u2019) compared to cows receiving ad libitum TMR intake. Cows grazing on a temperate pasture (ryegrass, Lolium multiflorum \u2018Maximus\u2019) did not need changes to ECM production compared to the ad libitum TMR intake group.", @@ -612,6 +634,7 @@ "$ref": "#/texts/17" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "Using experimental data, three scenarios were evaluated during the lactation period: ad libitum TMR intake, and 75, and 50% of ad libitum TMR intake with access to grazing either an annual tropical or temperate pasture as a function of month ([26], Civiero et al., in press). From April to October (210 days) cows accessed an annual temperate pasture (ryegrass), and from November to beginning of February (95 days) cows grazed an annual tropical pasture (pearl-millet). The average annual reduction in ECM production in dairy cows with access to pastures is 3%. This value was assumed during an entire lactation period.", @@ -627,6 +650,7 @@ "$ref": "#/texts/21" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Impact assessment", @@ -639,6 +663,7 @@ "$ref": "#/texts/20" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "The CO2e emissions were calculated by multiplying the emissions of CO2, CH4 and N2O by their 100-year global warming potential (GWP100), based on IPCC assessment report 5 (AR5; [27]). The values of GWP100 are 1, 28 and 265 for CO2, CH4 and N2O, respectively.", @@ -657,6 +682,7 @@ "$ref": "#/texts/25" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Feed production", @@ -673,6 +699,7 @@ "$ref": "#/texts/24" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Diets composition", @@ -685,6 +712,7 @@ "$ref": "#/texts/23" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "The DM intake of each ingredient throughout the entire life of animals during lactation periods was calculated for each scenario: cows receiving only TMR, cows receiving 75% of TMR with annual pastures and cows receiving 50% of TMR with annual pastures (Table 2). In each of other phases of life (calf, heifer, dry cow), animals received the same diet, including a perennial tropical pasture (kikuyu grass, Pennisetum clandestinum). The DM intake of calves, heifers and dry cows was calculated assuming 2.8, 2.5 and 1.9% body weight, respectively [20]. In each case, the actual DM intake of concentrate and corn silage was recorded, and pasture DM intake was estimated by the difference between daily expected DM intake and actual DM intake of concentrate and corn silage. For lactating heifers and cows, TMR was formulated to meet the net energy for lactation (NEL) and metabolizable protein (MP) requirements of experimental animals, according to [28]. The INRA system was used because it is possible to estimate pasture DM intake taking into account the TMR intake, pasture management and the time of access to pasture using the GrazeIn model [29], which was integrated in the software INRAtion 4.07 (https://www.inration.educagri.fr/fr/forum.php). The nutrient intake was calculated as a product of TMR and pasture intake and the nutrient contents of TMR and pasture, respectively, which were determined in feed samples collected throughout the experiments.", @@ -703,6 +731,7 @@ "$ref": "#/texts/27" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "GHG emissions from crop and pasture production", @@ -715,6 +744,7 @@ "$ref": "#/texts/25" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "GHG emission factors used for off- and on-farm feed production were based on literature values, and are presented in Table 3. The emission factor used for corn grain is the average of emission factors observed in different levels of synthetic N fertilization [30]. The emission factor used for soybean is based on Brazilian soybean production [31]. The emissions used for corn silage, including feed processing (cutting, crushing and mixing), and annual or perennial grass productions were 3300 and 1500 kg CO2e ha-1, respectively [32]. The DM production (kg ha-1) of corn silage and pastures were based on regional and locally recorded data [33\u201336], assuming that animals are able to consume 70% of pastures during grazing.", @@ -726,6 +756,7 @@ "$ref": "#/texts/25" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "Emissions from on-farm feed production (corn silage and pasture) were estimated using primary and secondary sources based on the actual amount of each input (Table 4). Primary sources were direct and indirect N2O-N emissions from organic and synthetic fertilizers and crop/pasture residues, CO2-C emissions from lime and urea applications, as well as fuel combustion. The direct N2O-N emission factor (kg (kg N input)-1) is based on a local study performed previously [37]. For indirect N2O-N emissions (kg N2O-N (kg NH3-N + NOx)-1), as well as CO2-C emissions from lime + urea, default values proposed by IPCC [38] were used. For perennial pastures, a C sequestration of 0.57 t ha-1 was used based on a 9-year study conducted in southern Brazil [39]. Due to the use of conventional tillage, no C sequestration was considered for annual pastures. The amount of fuel required was 8.9 (no-tillage) and 14.3 L ha-1 (disking) for annual tropical and temperate pastures, respectively [40]. The CO2 from fuel combustion was 2.7 kg CO2 L-1 [41]. Secondary sources of emissions during the production of fuel, machinery, fertilizer, pesticides, seeds and plastic for ensilage were estimated using emission factors described by Rotz et al. [42].", @@ -741,6 +772,7 @@ "$ref": "#/texts/29" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Animal husbandry", @@ -753,6 +785,7 @@ "$ref": "#/texts/28" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "The CH4 emissions from enteric fermentation intensity (g (kg ECM)-1) was a function of estimated CH4 yield (g (kg DM intake)-1), actual DM intake and ECM. The enteric CH4 yield was estimated as a function of neutral detergent fiber (NDF) concentration on total DM intake, as proposed by Niu et al. [43], where: CH4 yield (g (kg DM intake)-1) = 13.8 + 0.185 \u00d7 NDF (% DM intake).", @@ -774,6 +807,7 @@ "$ref": "#/texts/33" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Manure from confined cows and urine and dung from grazing animals", @@ -786,6 +820,7 @@ "$ref": "#/texts/30" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "The CH4 emission from manure (kg (kg ECM)-1) was a function of daily CH4 emission from manure (kg cow-1) and daily ECM (kg cow-1). The daily CH4 emission from manure was estimated according to IPCC [38], which considered daily volatile solid (VS) excreted (kg DM cow-1) in manure. The daily VS was estimated as proposed by Eug\u00e8ne et al. [44] as: VS = NDOMI + (UE \u00d7 GE) \u00d7 (OM/18.45), where: VS = volatile solid excretion on an organic matter (OM) basis (kg day-1), NDOMI = non-digestible OM intake (kg day-1): (1- OM digestibility) \u00d7 OM intake, UE = urinary energy excretion as a fraction of GE (0.04), GE = gross energy intake (MJ day-1), OM = organic matter (g), 18.45 = conversion factor for dietary GE per kg of DM (MJ kg-1).", @@ -797,6 +832,7 @@ "$ref": "#/texts/30" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "The OM digestibility was estimated as a function of chemical composition, using equations published by INRA [21], which takes into account the effects of digestive interactions due to feeding level, the proportion of concentrate and rumen protein balance on OM digestibility. For scenarios where cows had access to grazing, the amount of calculated VS were corrected as a function of the time at pasture. The biodegradability of manure factor (0.13 for dairy cows in Latin America) and methane conversion factor (MCF) values were taken from IPCC [38]. The MCF values for pit storage below animal confinements (> 1 month) were used for the calculation, taking into account the annual average temperature (16.6\u00baC) or the average temperatures during the growth period of temperate (14.4\u00baC) or tropical (21\u00baC) annual pastures, which were 31%, 26% and 46%, respectively.", @@ -808,6 +844,7 @@ "$ref": "#/texts/30" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "The N2O-N emissions from urine and feces were estimated considering the proportion of N excreted as manure and storage or as urine and dung deposited by grazing animals. These proportions were calculated based on the proportion of daily time that animals stayed on pasture (7 h/24 h = 0.29) or confinement (1\u22120.29 = 0.71). For lactating heifers and cows, the total amount of N excreted was calculated by the difference between N intake and milk N excretion. For heifers and non-lactating cows, urinary and fecal N excretion were estimated as proposed by Reed et al. [45] (Table 3: equations 10 and 12, respectively). The N2O emissions from stored manure as well as urine and dung during grazing were calculated based on the conversion of N2O-N emissions to N2O emissions, where N2O emissions = N2O-N emissions \u00d7 44/28. The emission factors were 0.002 kg N2O-N (kg N)-1 stored in a pit below animal confinements, and 0.02 kg N2O-N (kg of urine and dung)-1 deposited on pasture [38]. The indirect N2O emissions from storage manure and urine and dung deposits on pasture were also estimated using the IPCC [38] emission factors.", @@ -838,6 +875,7 @@ "$ref": "#/texts/58" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Farm management", @@ -850,6 +888,7 @@ "$ref": "#/texts/34" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "Emissions due to farm management included those from fuel and machinery for manure handling and electricity for milking and confinement (Table 5). Emissions due to feed processing such as cutting, crushing, mixing and distributing, as well as secondary sources of emissions during the production of fuel, machinery, fertilizer, pesticides, seeds and plastic for ensilage were included in \u2018Emissions from crop and pasture production\u2019 section.", @@ -861,6 +900,7 @@ "$ref": "#/texts/34" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "The amount of fuel use for manure handling were estimated taking into consideration the amount of manure produced per cow and the amounts of fuel required for manure handling (L diesel t-1) [42]. The amount of manure was estimated from OM excretions (kg cow-1), assuming that the manure has 8% ash on DM basis and 60% DM content. The OM excretions were calculated by NDOMI \u00d7 days in confinement \u00d7 proportion of daily time that animals stayed on confinement.", @@ -872,6 +912,7 @@ "$ref": "#/texts/34" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "The emissions from fuel were estimated considering the primary (emissions from fuel burned) and secondary (emissions for producing and transporting fuel) emissions. The primary emissions were calculated by the amount of fuel required for manure handling (L) \u00d7 (kg CO2e L-1) [41]. The secondary emissions from fuel were calculated by the amount of fuel required for manure handling \u00d7 emissions for production and transport of fuel (kg CO2e L-1) [41]. Emissions from manufacture and repair of machinery for manure handling were estimated by manure produced per cow (t) \u00d7 (kg machinery mass (kg manure)-1 \u00d7 10\u22123) [42] \u00d7 kg CO2e (kg machinery mass)-1 [42].", @@ -883,6 +924,7 @@ "$ref": "#/texts/34" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "Emissions from electricity for milking and confinement were estimated using two emission factors (kg CO2 kWh-1). The first one is based on United States electricity matrix [41], and was used as a reference of an electricity matrix with less hydroelectric power than the region under study. The second is based on the Brazilian electricity matrix [46]. The electricity required for milking activities is 0.06 kWh (kg milk produced)-1 [47]. The annual electricity use for lighting was 75 kWh cow-1, which is the value considered for lactating cows in naturally ventilated barns [47].", @@ -898,6 +940,7 @@ "$ref": "#/texts/40" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Co-product allocation", @@ -910,6 +953,7 @@ "$ref": "#/texts/39" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "The C footprint for milk produced in the system was calculated using a biophysical allocation approach, as recommended by the International Dairy Federation [49], and described by Thoma et al. [48]. Briefly, ARmilk = 1\u20136.04 \u00d7 BMR, where: ARmilk is the allocation ratio for milk and BMR is cow BW at the time of slaughter (kg) + calf BW sold (kg) divided by the total ECM produced during cow`s entire life (kg). The ARmilk were 0.854 and 0.849 for TMR and TMR with both pasture scenarios, respectively. The ARmilk was applied to the whole emissions, except for the electricity consumed for milking (milking parlor) and refrigerant loss, which was directly assigned to milk production.", @@ -925,6 +969,7 @@ "$ref": "#/texts/42" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Sensitivity analysis", @@ -937,6 +982,7 @@ "$ref": "#/texts/41" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "A sensitivity index was calculated as described by Rotz et al. [42]. The sensitivity index was defined for each emission source as the percentage change in the C footprint for a 10% change in the given emission source divided by 10%. Thus, a value near 0 indicates a low sensitivity, whereas an index near or greater than 1 indicates a high sensitivity because a change in this value causes a similar change in the footprint.", @@ -967,6 +1013,7 @@ "$ref": "#/texts/61" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Results and discussion", @@ -979,6 +1026,7 @@ "$ref": "#/texts/43" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "The study has assessed the impact of tropical and temperate pastures in dairy cows fed TMR on the C footprint of dairy production in subtropics. Different factors were taken in to consideration to estimate emissions from manure (or urine and dung) of grazing animals, feed production and electricity use.", @@ -1003,6 +1051,7 @@ "$ref": "#/texts/49" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Greenhouse gas emissions", @@ -1015,6 +1064,7 @@ "$ref": "#/texts/45" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "Depending on emission factors used for calculating emissions from urine and dung (IPCC or local data) and feed production (Tables 3 or 4), the C footprint was similar (Fig 2A and 2B) or decreased by 0.04 kg CO2e (kg ECM)-1 (Fig 2C and 2D) in scenarios that included pastures compared to ad libitum TMR intake. Due to differences in emission factors, the overall GHG emission values ranged from 0.92 to 1.04 kg CO2e (kg ECM)-1 for dairy cows receiving TMR exclusively, and from 0.88 to 1.04 kg CO2e (kg ECM)-1 for cows with access to pasture. Using IPCC emission factors [38], manure emissions increased as TMR intake went down (Fig 2A and 2B). However, using local emission factors for estimating N2O-N emissions [37], manure emissions decreased as TMR intake went down (Fig 2C and 2D). Regardless of emission factors used (Tables 3 or 4), emissions from feed production decreased to a small extent as the proportion of TMR intake decreased. Emissions from farm management did not contribute more than 5% of overall GHG emissions.", @@ -1026,6 +1076,7 @@ "$ref": "#/texts/45" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "Considering IPCC emission factors for N2O emissions from urine and dung [38] and those from Table 3, the C footprint ranged from 0.99 to 1.04 kg CO2e (kg ECM)-1, and was close to those reported under confined based systems in California [49], Canada [50], China [8], Ireland [9], different scenarios in Australia [51,52] and Uruguay [11], which ranged from 0.98 to 1.16 kg CO2e (kg ECM)-1. When local emission factors for N2O emissions from urine and dung [37] and those from Table 4 were taking into account, the C footprint for scenarios including pasture, without accounting for sequestered CO2-C from perennial pasture\u20140.91 kg CO2e (kg ECM)-1\u2014was lower than the range of values described above. However, these values were still greater than high-performance confinement systems in UK and USA [53] or grass based dairy systems in Ireland [9,53] and New Zealand [8,54], which ranged from 0.52 to 0.89 kg CO2e (kg ECM)-1. Regardless of which emission factor was used, we found a lower C footprint in all conditions compared to scenarios with lower milk production per cow or in poor conditions of manure management, which ranged from 1.4 to 2.3 kg CO2e (kg ECM)-1 [8,55]. Thus, even though differences between studies may be partially explained by various assumptions (e.g., emission factors, co-product allocation, methane emissions estimation, sequestered CO2-C, etc.), herd productivity and manure management were systematically associated with the C footprint of the dairy systems.", @@ -1037,6 +1088,7 @@ "$ref": "#/texts/45" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "The similarity of C footprint between different scenarios using IPCC [38] for estimating emissions from manure and for emissions from feed production (Table 3) was a consequence of the trade-off between greater manure emissions and lower emissions to produce feed, as the proportion of pasture in diets increased. Additionally, the small negative effect of pasture on ECM production also contributed to the trade-off. The impact of milk production on the C footprint was reported in a meta-analysis comprising 30 studies from 15 different countries [22]. As observed in this study (Fig 2A and 2B) the authors reported no significant difference between the C footprint of pasture-based vs. confinement systems. However, they observed that an increase of 1000 kg cow-1 (5000 to 6000 kg ECM) reduced the C footprint by 0.12 kg CO2e (kg ECM)-1, which may explain an apparent discrepancy between our study and an LCA performed in south Brazilian conditions [56]. Their study compared a confinement and a grazing-based dairy system with annual average milk production of 7667 and 5535 kg cow, respectively. In this study, the same herd was used in all systems, with an annual average milk production of around 7000 kg cow-1. Experimental data showed a reduction not greater than 3% of ECM when 50% of TMR was replaced by pasture access.", @@ -1048,6 +1100,7 @@ "$ref": "#/texts/45" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "The lower C footprint in scenarios with access to pasture, when local emission factors [37] were used for N2O emissions from urine and dung and for feed production (Table 4), may also be partially attributed to the small negative effect of pasture on ECM production. Nevertheless, local emission factors for urine and dung had a great impact on scenarios including pastures compared to ad libitum TMR intake. Whereas the IPCC [38] considers an emission of 0.02 kg N2O-N (kg N)-1 for urine and dung from grazing animals, experimental evidence shows that it may be up to five times lower, averaging 0.004 kg N2O-N kg-1 [37].", @@ -1066,6 +1119,7 @@ "$ref": "#/texts/52" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Methane emissions", @@ -1078,6 +1132,7 @@ "$ref": "#/texts/50" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "The enteric CH4 intensity was similar between different scenarios (Fig 2), showing the greatest sensitivity index, with values ranging from 0.53 to 0.62, which indicate that for a 10% change in this source, the C footprint may change between 5.3 and 6.2% (Fig 3). The large effect of enteric CH4 emissions on the whole C footprint was expected, because the impact of enteric CH4 on GHG emissions of milk production in different dairy systems has been estimated to range from 44 to 60% of the total CO2e [50,52,57,58]. However, emissions in feed production may be the most important source of GHG when emission factors for producing concentrate feeds are greater than 0.7 kg CO2e kg-1 [59], which did not happen in this study.", @@ -1089,6 +1144,7 @@ "$ref": "#/texts/50" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "The lack of difference in enteric CH4 emissions in different systems can be explained by the narrow range of NDF content in diets (<4% difference). This non-difference is due to the lower NDF content of annual temperate pastures (495 g (kg DM)-1) compared to corn silage (550 g (kg DM)-1). Hence, an expected, increase NDF content with decreased concentrate was partially offset by an increase in the pasture proportion relatively low in NDF. This is in agreement with studies conducted in southern Brazil, which have shown that the actual enteric CH4 emissions may decrease with inclusion of temperate pastures in cows receiving corn silage and soybean meal [60] or increase enteric CH4 emissions when dairy cows grazing a temperate pasture was supplemented with corn silage [61]. Additionally, enteric CH4 emissions did not differ between dairy cows receiving TMR exclusively or grazing a tropical pasture in the same scenarios as in this study [26].", @@ -1110,6 +1166,7 @@ "$ref": "#/texts/56" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Emissions from excreta and feed production", @@ -1122,6 +1179,7 @@ "$ref": "#/texts/53" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "Using IPCC emission factors for N2O emissions from urine and dung [38] and those from Table 3, CH4 emissions from manure decreased 0.07 kg CO2e (kg ECM)-1, but N2O emissions from manure increased 0.09 kg CO2e (kg ECM)-1, as TMR intake was restricted to 50% ad libitum (Fig 4A). Emissions for pastures increased by 0.06 kg CO2e (kg ECM)-1, whereas emissions for producing concentrate feeds and corn silage decreased by 0.09 kg CO2e (kg ECM)-1, as TMR intake decreased (Fig 4B). In this situation, the lack of difference in calculated C footprints of different systems was also due to the greater emissions from manure, and offset by lower emissions from feed production with inclusion of pasture in lactating dairy cow diets. The greater N2O-N emissions from manure with pasture was a consequence of higher N2O-N emissions due to greater CP content and N urine excretion, as pasture intake increased. The effect of CP content on urine N excretion has been shown by several authors in lactating dairy cows [62\u201364]. For instance, by decreasing CP content from 185 to 152 g (kg DM)-1, N intake decreased by 20% and urine N excretion by 60% [62]. In this study, the CP content for lactating dairy cows ranged from 150 g (kg DM)-1 on TMR system to 198 g (kg DM)-1 on 50% TMR with pasture. Additionally, greater urine N excretion is expected with greater use of pasture. This occurs because protein utilization in pastures is inefficient, as the protein in fresh forages is highly degradable in the rumen and may not be captured by microbes [65].", @@ -1133,6 +1191,7 @@ "$ref": "#/texts/53" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "Using local emission factors for N2O emissions from urine and dung [37] and those from Table 4, reductions in CH4 emissions from stocked manure, when pastures were included on diets, did not offset by increases in N2O emissions from excreta (Fig 4C). In this case, total emissions from manure (Fig 4C) and feed production (Fig 4D) decreased with the inclusion of pasture. The impact of greater CP content and N urine excretion with increased pasture intake was offset by the much lower emission factors used for N2O emissions from urine and dung. As suggested by other authors [66,67], these results show that IPCC default value may need to be revised for the subtropical region.", @@ -1144,6 +1203,7 @@ "$ref": "#/texts/53" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "Emissions for feed production decreased when pasture was included due to the greater emission factor for corn grain production compared to pastures. Emissions from concentrate and silage had at least twice the sensitivity index compared to emissions from pastures. The amount of grain required per cow in a lifetime decreased from 7,300 kg to 4,000 kg when 50% of TMR was replaced by pasture access. These results are in agreement with other studies which found lower C footprint, as concentrate use is reduced and/or pasture is included [9,68,69]. Moreover, it has been demonstrated that in intensive dairy systems, after enteric fermentation, feed production is the second main contributor to C footprint [50]. There is potential to decrease the environmental impact of dairy systems by reducing the use of concentrate ingredients with high environmental impact, particularly in confinements [9].", @@ -1155,6 +1215,7 @@ "$ref": "#/texts/34" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "The lower impact of emissions from farm management is in agreement with other studies conducted in Europe [9, 62] and USA [42, 55], where the authors found that most emissions in dairy production systems are from enteric fermentation, feed production and emissions from excreta. As emissions from fuel for on-farm feed production were accounted into the \u2018emissions from crop and pasture production\u2019, total emissions from farm management were not greater than 5% of total C footprint.", @@ -1166,6 +1227,7 @@ "$ref": "#/texts/34" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "Emissions from farm management dropped when the emission factor for electricity generation was based on the Brazilian matrix. In this case, the emission factor for electricity generation (0.205 kg CO2e kWh-1 [46]) is much lower than that in a LCA study conducted in US (0.73 kg CO2e kWh-1 [42]). This apparent discrepancy is explained because in 2016, almost 66% of the electricity generated in Brazil was from hydropower, which has an emission factor of 0.074 kg CO2e kWh-1 against 0.382 and 0.926 kg CO2e kWh-1 produced by natural gas and hard coal, respectively [46].", @@ -1181,6 +1243,7 @@ "$ref": "#/texts/60" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Assumptions and limitations", @@ -1193,6 +1256,7 @@ "$ref": "#/texts/59" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "The milk production and composition data are the average for a typical herd, which might have great animal-to-animal variability. Likewise, DM yield of crops and pastures were collected from experimental observations, and may change as a function of inter-annual variation, climatic conditions, soil type, fertilization level etc. The emission factors for direct and indirect N2O emissions from urine and dung were alternatively estimated using local data, but more experiments are necessary to reduce the uncertainty. The CO2 emitted from lime and urea application was estimated from IPCC default values, which may not represent emissions in subtropical conditions. This LCA may be improved by reducing the uncertainty of factors for estimating emissions from excreta and feed production, including the C sequestration or emissions as a function of soil management.", @@ -1211,6 +1275,7 @@ "$ref": "#/texts/63" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Further considerations", @@ -1223,6 +1288,7 @@ "$ref": "#/texts/61" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "The potential for using pasture can reduce the C footprint because milk production kept pace with animal confinement. However, if milk production is to decrease with lower TMR intake and inclusion of pasture [19], the C footprint would be expected to increase. Lorenz et al. [22] showed that an increase in milk yield from 5,000 to 6,000 kg ECM reduced the C footprint by 0.12 kg CO2e (kg ECM)-1, whereas an increase from 10,000 to 11,000 kg ECM reduced the C footprint by only 0.06 kg CO2e (kg ECM)-1. Hence, the impact of increasing milk production on decreasing C footprint is not linear, and mitigation measures, such as breeding for increased genetic yield potential and increasing concentrate ratio in the diet, are potentially harmful for animal\u2019s health and welfare [70]. For instance, increasing concentrate ratio potentially increases the occurrence of subclinical ketosis and foot lesions, and C footprint may increase by 0.03 kg CO2e (kg ECM)-1 in subclinical ketosis [71] and by 0.02 kg CO2e (kg ECM)-1 in case of foot lesions [72].", @@ -1234,6 +1300,7 @@ "$ref": "#/texts/61" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "Grazing lands may also improve biodiversity [73]. Strategies such as zero tillage may increase stocks of soil C [74]. This study did not consider C sequestration during the growth of annual pastures, because it was assumed these grasses were planted with tillage, having a balance between C sequestration and C emissions [38]. Considering the C sequestration from no-tillage perennial pasture, the amount of C sequestration will more than compensates for C emitted. These results are in agreement with other authors who have shown that a reduction or elimination of soil tillage increases annual soil C sequestration in subtropical areas by 0.5 to 1.5 t ha-1 [75]. If 50% of tilled areas were under perennial grasslands, 1.0 t C ha-1 would be sequestered, further reducing the C footprint by 0.015 and 0.025 kg CO2e (kg ECM)-1 for the scenarios using 75 and 50% TMR, respectively. Eliminating tillage, the reduction on total GHG emissions would be 0.03 and 0.05 kg CO2e (kg ECM)-1 for 75 and 50% TMR, respectively. However, this approach may be controversial because lands which have been consistently managed for decades have approached steady state C storage, so that net exchange of CO2 would be negligible [76].", @@ -1249,6 +1316,7 @@ "$ref": "#/texts/65" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Conclusions", @@ -1261,6 +1329,7 @@ "$ref": "#/texts/64" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "This study assessed the C footprint of dairy cattle systems with or without access to pastures. Including pastures showed potential to maintain or decrease to a small extent the C footprint, which may be attributable to the evidence of low N2O emissions from urine and dung in dairy systems in subtropical areas. Even though the enteric CH4 intensity was the largest source of CO2e emissions, it did not change between different scenarios due to the narrow range of NDF content in diets and maintaining the same milk production with or without access to pastures.", @@ -1288,6 +1357,7 @@ "$ref": "#/tables/4" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Tables", @@ -1300,6 +1370,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Table 1: Descriptive characteristics of the herd.", @@ -1311,6 +1382,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Table 2: Dairy cows\u2019 diets in different scenariosa.", @@ -1322,6 +1394,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Table 3: GHG emission factors for Off- and On-farm feed production.", @@ -1333,6 +1406,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Table 4: GHG emissions from On-farm feed production.", @@ -1344,6 +1418,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Table 5: Factors for major resource inputs in farm management.", @@ -1368,6 +1443,7 @@ "$ref": "#/pictures/3" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Figures", @@ -1380,6 +1456,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Fig 1: Overview of the milk production system boundary considered in the study.", @@ -1391,6 +1468,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Fig 2: Overall greenhouse gas emissions in dairy cattle systems under various scenarios.\nTMR = ad libitum TMR intake, 75TMR = 75% of ad libitum TMR intake with access to pasture, 50TMR = 50% of ad libitum TMR intake with access to pasture. (a) N2O emission factors for urine and dung from IPCC [38], feed production emission factors from Table 3 without accounting for sequestered CO2-C from perennial pasture, production of electricity = 0.73 kg CO2e kWh-1 [41]. (b) N2O emission factors for urine and dung from IPCC [38], feed production emission factors from Table 3 without accounting for sequestered CO2-C from perennial pasture, production of electricity = 0.205 kg CO2e kWh-1 [46]; (c) N2O emission factors for urine and dung from local data [37], feed production EF from Table 4 without accounting for sequestered CO2-C from perennial pasture, production of electricity = 0.205 kg CO2e kWh-1 [46]. (d) N2O emission factors for urine and dung from local data [37], feed production emission factors from Table 4 accounting for sequestered CO2-C from perennial pasture, production of electricity = 0.205 kg CO2e kWh-1 [46].", @@ -1402,6 +1480,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Fig 3: Sensitivity of the C footprint.\nSensitivity index = percentage change in C footprint for a 10% change in the given emission source divided by 10% of. (a) N2O emission factors for urine and dung from IPCC [38], feed production emission factors from Table 3, production of electricity = 0.73 kg CO2e kWh-1 [41]. (b) N2O emission factors for urine and dung from IPCC [38], feed production emission factors from Table 3, production of electricity = 0.205 kg CO2e kWh-1 [46]; (c) N2O emission factors for urine and dung from local data [37], feed production EF from Table 4 without accounting sequestered CO2-C from perennial pasture, production of electricity = 0.205 kg CO2e kWh-1 [46]. (d) N2O emission factors for urine and dung from local data [37], feed production emission factors from Table 4 accounting sequestered CO2-C from perennial pasture, production of electricity = 0.205 kg CO2e kWh-1 [46].", @@ -1413,6 +1492,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Fig 4: Greenhouse gas emissions (GHG) from manure and feed production in dairy cattle systems.\nTMR = ad libitum TMR intake, 75TMR = 75% of ad libitum TMR intake with access to pasture, 50TMR = 50% of ad libitum TMR intake with access to pasture. (a) N2O emission factors for urine and dung from IPCC [38]. (b) Feed production emission factors from Table 3. (c) N2O emission factors for urine and dung from local data [37]. (d) Feed production emission factors from Table 4 accounting sequestered CO2-C from perennial pasture.", @@ -1428,6 +1508,7 @@ "$ref": "#/groups/0" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "References", @@ -1440,6 +1521,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Climate Change and Land. Chapter 5: Food Security (2019)", @@ -1453,6 +1535,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Herrero M; Henderson B; Havl\u00edk P; Thornton PK; Conant RT; Smith P. Greenhouse gas mitigation potentials in the livestock sector. Nat Clim Chang (2016)", @@ -1466,6 +1549,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Rivera-Ferre MG; L\u00f3pez-i-Gelats F; Howden M; Smith P; Morton JF; Herrero M. Re-framing the climate change debate in the livestock sector: mitigation and adaptation options. Wiley Interdiscip Rev Clim Chang (2016)", @@ -1479,6 +1563,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "van Zanten HHE; Mollenhorst H; Klootwijk CW; van Middelaar CE; de Boer IJM. Global food supply: land use efficiency of livestock systems. Int J Life Cycle Assess (2016)", @@ -1492,6 +1577,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Hristov AN; Oh J; Firkins L; Dijkstra J; Kebreab E; Waghorn G. SPECIAL TOPICS\u2014Mitigation of methane and nitrous oxide emissions from animal operations: I. A review of enteric methane mitigation options. J Anim Sci (2013)", @@ -1505,6 +1591,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Hristov AN; Ott T; Tricarico J; Rotz A; Waghorn G; Adesogan A. SPECIAL TOPICS\u2014Mitigation of methane and nitrous oxide emissions from animal operations: III. A review of animal management mitigation options. J Anim Sci (2013)", @@ -1518,6 +1605,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Montes F; Meinen R; Dell C; Rotz A; Hristov AN; Oh J. SPECIAL TOPICS\u2014Mitigation of methane and nitrous oxide emissions from animal operations: II. A review of manure management mitigation options. J Anim Sci (2013)", @@ -1531,6 +1619,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Ledgard SF; Wei S; Wang X; Falconer S; Zhang N; Zhang X. Nitrogen and carbon footprints of dairy farm systems in China and New Zealand, as influenced by productivity, feed sources and mitigations. Agric Water Manag (2019)", @@ -1544,6 +1633,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "O\u2019Brien D; Shalloo L; Patton J; Buckley F; Grainger C; Wallace M. A life cycle assessment of seasonal grass-based and confinement dairy farms. Agric Syst (2012)", @@ -1557,6 +1647,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Salou T; Le Mou\u00ebl C; van der Werf HMG. Environmental impacts of dairy system intensification: the functional unit matters!. J Clean Prod (2017)", @@ -1570,6 +1661,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Lizarralde C; Picasso V; Rotz CA; Cadenazzi M; Astigarraga L. Practices to Reduce Milk Carbon Footprint on Grazing Dairy Farms in Southern Uruguay. Case Studies. Sustain Agric Res (2014)", @@ -1583,6 +1675,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Clark CEF; Kaur R; Millapan LO; Golder HM; Thomson PC; Horadagoda A. The effect of temperate or tropical pasture grazing state and grain-based concentrate allocation on dairy cattle production and behavior. J Dairy Sci (2018)", @@ -1596,6 +1689,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "FAOSTAT. (2017)", @@ -1609,6 +1703,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Vogeler I; Mackay A; Vibart R; Rendel J; Beautrais J; Dennis S. Effect of inter-annual variability in pasture growth and irrigation response on farm productivity and profitability based on biophysical and farm systems modelling. Sci Total Environ (2016)", @@ -1622,6 +1717,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Wilkinson JM; Lee MRF; Rivero MJ; Chamberlain AT. Some challenges and opportunities for grazing dairy cows on temperate pastures. Grass Forage Sci. (2020)", @@ -1635,6 +1731,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Wales WJ; Marett LC; Greenwood JS; Wright MM; Thornhill JB; Jacobs JL. Use of partial mixed rations in pasture-based dairying in temperate regions of Australia. Anim Prod Sci (2013)", @@ -1648,6 +1745,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Bargo F; Muller LD; Delahoy JE; Cassidy TW. Performance of high producing dairy cows with three different feeding systems combining pasture and total mixed rations. J Dairy Sci (2002)", @@ -1661,6 +1759,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Vibart RE; Fellner V; Burns JC; Huntington GB; Green JT. Performance of lactating dairy cows fed varying levels of total mixed ration and pasture. J Dairy Res (2008)", @@ -1674,6 +1773,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Mendoza A; Cajarville C; Repetto JL. Short communication: Intake, milk production, and milk fatty acid profile of dairy cows fed diets combining fresh forage with a total mixed ration. J Dairy Sci (2016)", @@ -1687,6 +1787,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Nutrient Requirements of Dairy Cattle (2001)", @@ -1700,6 +1801,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Noiz\u00e8re P; Sauvant D; Delaby L. (2018)", @@ -1713,6 +1815,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Lorenz H; Reinsch T; Hess S; Taube F. Is low-input dairy farming more climate friendly? A meta-analysis of the carbon footprints of different production systems. J Clean Prod (2019)", @@ -1726,6 +1829,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "INTERNATIONAL STANDARD\u2014Environmental management\u2014Life cycle assessment\u2014Requirements and guidelines (2006)", @@ -1739,6 +1843,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Environmental management\u2014Life cycle assessment\u2014Principles and framework. Iso 14040 (2006)", @@ -1752,6 +1857,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "FAO. Environmental Performance of Large Ruminant Supply Chains: Guidelines for assessment (2016)", @@ -1765,6 +1871,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Civiero M; Ribeiro-Filho HMN; Schaitz LH. Pearl-millet grazing decreases daily methane emissions in dairy cows receiving total mixed ration. 7th Greenhouse Gas and Animal Agriculture Conference,. Foz do Igua\u00e7u (2019)", @@ -1778,6 +1885,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "IPCC\u2014Intergovernmental Panel on Climate Change. Climate Change 2014 Synthesis Report (Unedited Version). 2014. Available: ttps://. ", @@ -1791,6 +1899,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "INRA. Alimentation des bovins, ovins et caprins. Besoins des animaux\u2014valeurs des aliments. Tables Inra 2007. 4th ed. INRA, editor. 2007. ", @@ -1804,6 +1913,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Delagarde R; Faverdin P; Baratte C; Peyraud JL. GrazeIn: a model of herbage intake and milk production for grazing dairy cows. 2. Prediction of intake under rotational and continuously stocked grazing management. Grass Forage Sci (2011)", @@ -1817,6 +1927,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Ma BL; Liang BC; Biswas DK; Morrison MJ; McLaughlin NB. The carbon footprint of maize production as affected by nitrogen fertilizer and maize-legume rotations. Nutr Cycl Agroecosystems (2012)", @@ -1830,6 +1941,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Rauccci GS; Moreira CS; Alves PS; Mello FFC; Fraz\u00e3o LA; Cerri CEP. Greenhouse gas assessment of Brazilian soybean production: a case study of Mato Grosso State. J Clean Prod (2015)", @@ -1843,6 +1955,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Camargo GGT; Ryan MR; Richard TL. Energy Use and Greenhouse Gas Emissions from Crop Production Using the Farm Energy Analysis Tool. Bioscience (2013)", @@ -1856,6 +1969,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "da Silva MSJ; Jobim CC; Poppi EC; Tres TT; Osmari MP. Production technology and quality of corn silage for feeding dairy cattle in Southern Brazil. Rev Bras Zootec (2015)", @@ -1869,6 +1983,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Duchini PGPG Guzatti GCGC; Ribeiro-Filho HMNHMNN Sbrissia AFAFAF. Intercropping black oat (Avena strigosa) and annual ryegrass (Lolium multiflorum) can increase pasture leaf production compared with their monocultures. Crop Pasture Sci (2016)", @@ -1882,6 +1997,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Scaravelli LFB; Pereira LET; Olivo CJ; Agnolin CA. Produ\u00e7\u00e3o e qualidade de pastagens de Coastcross-1 e milheto utilizadas com vacas leiteiras. Cienc Rural (2007)", @@ -1895,6 +2011,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Sbrissia AF; Duchini PG; Zanini GD; Santos GT; Padilha DA; Schmitt D. Defoliation strategies in pastures submitted to intermittent stocking method: Underlying mechanisms buffering forage accumulation over a range of grazing heights. Crop Sci (2018)", @@ -1908,6 +2025,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Almeida JGR; Dall-Orsoletta AC; Oziemblowski MM; Michelon GM; Bayer C; Edouard N. Carbohydrate-rich supplements can improve nitrogen use efficiency and mitigate nitrogenous gas emissions from the excreta of dairy cows grazing temperate grass. Animal (2020)", @@ -1921,6 +2039,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Eggleston H.S.; Buendia L.; Miwa K. IPCC guidlines for national greenhouse gas inventories. (2006)", @@ -1934,6 +2053,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Ramalho B; Dieckow J; Barth G; Simon PL; Mangrich AS; Brevilieri RC. No-tillage and ryegrass grazing effects on stocks, stratification and lability of carbon and nitrogen in a subtropical Umbric Ferralsol. Eur J Soil Sci (2020)", @@ -1947,6 +2067,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Fernandes HC; da Silveira JCM; Rinaldi PCN. Avalia\u00e7\u00e3o do custo energ\u00e9tico de diferentes opera\u00e7\u00f5es agr\u00edcolas mecanizadas. Cienc e Agrotecnologia (2008)", @@ -1960,6 +2081,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Wang M Q. GREET 1.8a Spreadsheet Model. 2007. Available: . ", @@ -1973,6 +2095,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Rotz CAA; Montes F; Chianese DS; Chiane DS. The carbon footprint of dairy production systems through partial life cycle assessment. J Dairy Sci (2010)", @@ -1986,6 +2109,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Niu M; Kebreab E; Hristov AN; Oh J; Arndt C; Bannink A. Prediction of enteric methane production, yield, and intensity in dairy cattle using an intercontinental database. Glob Chang Biol (2018)", @@ -1999,6 +2123,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Eug\u00e8ne M; Sauvant D; Nozi\u00e8re P; Viallard D; Oueslati K; Lherm M. A new Tier 3 method to calculate methane emission inventory for ruminants. J Environ Manage (2019)", @@ -2012,6 +2137,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Reed KF; Moraes LE; Casper DP; Kebreab E. Predicting nitrogen excretion from cattle. J Dairy Sci (2015)", @@ -2025,6 +2151,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Barros MV; Piekarski CM; De Francisco AC. Carbon footprint of electricity generation in Brazil: An analysis of the 2016\u20132026 period. Energies (2018)", @@ -2038,6 +2165,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Ludington D; Johnson E. Dairy Farm Energy Audit Summary. New York State Energy Res Dev Auth (2003)", @@ -2051,6 +2179,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Thoma G; Jolliet O; Wang Y. A biophysical approach to allocation of life cycle environmental burdens for fluid milk supply chain analysis. Int Dairy J (2013)", @@ -2064,6 +2193,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Naranjo A; Johnson A; Rossow H. Greenhouse gas, water, and land footprint per unit of production of the California dairy industry over 50 years. (2020)", @@ -2077,6 +2207,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Jayasundara S; Worden D; Weersink A; Wright T; VanderZaag A; Gordon R. Improving farm profitability also reduces the carbon footprint of milk production in intensive dairy production systems. J Clean Prod (2019)", @@ -2090,6 +2221,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Williams SRO; Fisher PD; Berrisford T; Moate PJ; Reynard K. Reducing methane on-farm by feeding diets high in fat may not always reduce life cycle greenhouse gas emissions. Int J Life Cycle Assess (2014)", @@ -2103,6 +2235,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Gollnow S; Lundie S; Moore AD; McLaren J; van Buuren N; Stahle P. Carbon footprint of milk production from dairy cows in Australia. Int Dairy J (2014)", @@ -2116,6 +2249,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "O\u2019Brien D; Capper JL; Garnsworthy PC; Grainger C; Shalloo L. A case study of the carbon footprint of milk from high-performing confinement and grass-based dairy farms. J Dairy Sci (2014)", @@ -2129,6 +2263,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Chobtang J; McLaren SJ; Ledgard SF; Donaghy DJ. Consequential Life Cycle Assessment of Pasture-based Milk Production: A Case Study in the Waikato Region, New Zealand. J Ind Ecol (2017)", @@ -2142,6 +2277,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Garg MR; Phondba BT; Sherasia PL; Makkar HPS. Carbon footprint of milk production under smallholder dairying in Anand district of Western India: A cradle-to-farm gate life cycle assessment. Anim Prod Sci (2016)", @@ -2155,6 +2291,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "de L\u00e9is CM; Cherubini E; Ruviaro CF; Prud\u00eancio da Silva V; do Nascimento Lampert V; Spies A. Carbon footprint of milk production in Brazil: a comparative case study. Int J Life Cycle Assess (2015)", @@ -2168,6 +2305,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "O\u2019Brien D; Geoghegan A; McNamara K; Shalloo L. How can grass-based dairy farmers reduce the carbon footprint of milk?. Anim Prod Sci (2016)", @@ -2181,6 +2319,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "O\u2019Brien D; Brennan P; Humphreys J; Ruane E; Shalloo L. An appraisal of carbon footprint of milk from commercial grass-based dairy farms in Ireland according to a certified life cycle assessment methodology. Int J Life Cycle Assess (2014)", @@ -2194,6 +2333,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Baek CY; Lee KM; Park KH. Quantification and control of the greenhouse gas emissions from a dairy cow system. J Clean Prod (2014)", @@ -2207,6 +2347,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Dall-Orsoletta AC; Almeida JGR; Carvalho PCF; Savian J V. Ribeiro-Filho HMN. Ryegrass pasture combined with partial total mixed ration reduces enteric methane emissions and maintains the performance of dairy cows during mid to late lactation. J Dairy Sci (2016)", @@ -2220,6 +2361,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Dall-Orsoletta AC; Oziemblowski MM; Berndt A; Ribeiro-Filho HMN. Enteric methane emission from grazing dairy cows receiving corn silage or ground corn supplementation. Anim Feed Sci Technol (2019)", @@ -2233,6 +2375,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Niu M; Appuhamy JADRN; Leytem AB; Dungan RS; Kebreab E. Effect of dietary crude protein and forage contents on enteric methane emissions and nitrogen excretion from dairy cows simultaneously. Anim Prod Sci (2016)", @@ -2246,6 +2389,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Waghorn GC; Law N; Bryant M; Pacheco D; Dalley D. Digestion and nitrogen excretion by Holstein-Friesian cows in late lactation offered ryegrass-based pasture supplemented with fodder beet. Anim Prod Sci (2019)", @@ -2259,6 +2403,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Dickhoefer U; Glowacki S; G\u00f3mez CA; Castro-Montoya JM. Forage and protein use efficiency in dairy cows grazing a mixed grass-legume pasture and supplemented with different levels of protein and starch. Livest Sci (2018)", @@ -2272,6 +2417,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Schwab CG; Broderick GA. A 100-Year Review: Protein and amino acid nutrition in dairy cows. J Dairy Sci (2017)", @@ -2285,6 +2431,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Sordi A; Dieckow J; Bayer C; Alburquerque MA; Piva JT; Zanatta JA. Nitrous oxide emission factors for urine and dung patches in a subtropical Brazilian pastureland. Agric Ecosyst Environ (2014)", @@ -2298,6 +2445,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Simon PL; Dieckow J; de Klein CAM; Zanatta JA; van der Weerden TJ; Ramalho B. Nitrous oxide emission factors from cattle urine and dung, and dicyandiamide (DCD) as a mitigation strategy in subtropical pastures. Agric Ecosyst Environ (2018)", @@ -2311,6 +2459,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Wang X; Ledgard S; Luo J; Guo Y; Zhao Z; Guo L. Environmental impacts and resource use of milk production on the North China Plain, based on life cycle assessment. Sci Total Environ (2018)", @@ -2324,6 +2473,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Pirlo G; Lolli S. Environmental impact of milk production from samples of organic and conventional farms in Lombardy (Italy). J Clean Prod (2019)", @@ -2337,6 +2487,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Herzog A; Winckler C; Zollitsch W. In pursuit of sustainability in dairy farming: A review of interdependent effects of animal welfare improvement and environmental impact mitigation. Agric Ecosyst Environ (2018)", @@ -2350,6 +2501,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Mostert PF; van Middelaar CE; Bokkers EAM; de Boer IJM. The impact of subclinical ketosis in dairy cows on greenhouse gas emissions of milk production. J Clean Prod (2018)", @@ -2363,6 +2515,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Mostert PF; van Middelaar CE; de Boer IJM; Bokkers EAM. The impact of foot lesions in dairy cows on greenhouse gas emissions of milk production. Agric Syst (2018)", @@ -2376,6 +2529,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Foley JA; Ramankutty N; Brauman KA; Cassidy ES; Gerber JS; Johnston M. Solutions for a cultivated planet. Nature (2011)", @@ -2389,6 +2543,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Lal R.. Soil Carbon Sequestration Impacts on Global Climate Change and Food Security. Science (80-) (2004)", @@ -2402,6 +2557,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Boddey RM; Jantalia CP; Concei\u00e7ao PC; Zanatta JA; Bayer C; Mielniczuk J. Carbon accumulation at depth in Ferralsols under zero-till subtropical agriculture. Glob Chang Biol (2010)", @@ -2415,6 +2571,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "McConkey B; Angers D; Bentham M; Boehm M; Brierley T; Cerkowniak D. Canadian agricultural greenhouse gas monitoring accounting and reporting system: methodology and greenhouse gas estimates for agricultural land in the LULUCF sector for NIR 2014. (2014)", @@ -2430,6 +2587,7 @@ "$ref": "#/texts/72" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -2447,6 +2605,7 @@ "$ref": "#/texts/72" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -2464,6 +2623,7 @@ "$ref": "#/texts/72" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -2481,6 +2641,7 @@ "$ref": "#/texts/72" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -2500,6 +2661,7 @@ "$ref": "#/texts/66" }, "children": [], + "content_layer": "body", "label": "table", "prov": [], "captions": [ @@ -3486,6 +3648,7 @@ "$ref": "#/texts/66" }, "children": [], + "content_layer": "body", "label": "table", "prov": [], "captions": [ @@ -8664,6 +8827,7 @@ "$ref": "#/texts/66" }, "children": [], + "content_layer": "body", "label": "table", "prov": [], "captions": [ @@ -9786,6 +9950,7 @@ "$ref": "#/texts/66" }, "children": [], + "content_layer": "body", "label": "table", "prov": [], "captions": [ @@ -13226,6 +13391,7 @@ "$ref": "#/texts/66" }, "children": [], + "content_layer": "body", "label": "table", "prov": [], "captions": [ diff --git a/tests/data/groundtruth/docling_v2/powerpoint_sample.pptx.json b/tests/data/groundtruth/docling_v2/powerpoint_sample.pptx.json index 44288d7d..98c977b4 100644 --- a/tests/data/groundtruth/docling_v2/powerpoint_sample.pptx.json +++ b/tests/data/groundtruth/docling_v2/powerpoint_sample.pptx.json @@ -10,6 +10,7 @@ "furniture": { "self_ref": "#/furniture", "children": [], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -26,6 +27,7 @@ "$ref": "#/groups/2" } ], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -46,6 +48,7 @@ "$ref": "#/tables/0" } ], + "content_layer": "body", "name": "slide-0", "label": "chapter" }, @@ -74,6 +77,7 @@ "$ref": "#/texts/7" } ], + "content_layer": "body", "name": "slide-1", "label": "chapter" }, @@ -105,6 +109,7 @@ "$ref": "#/groups/7" } ], + "content_layer": "body", "name": "slide-2", "label": "chapter" }, @@ -124,6 +129,7 @@ "$ref": "#/texts/10" } ], + "content_layer": "body", "name": "list", "label": "ordered_list" }, @@ -146,6 +152,7 @@ "$ref": "#/texts/14" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -162,6 +169,7 @@ "$ref": "#/texts/17" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -181,6 +189,7 @@ "$ref": "#/texts/21" } ], + "content_layer": "body", "name": "list", "label": "ordered_list" }, @@ -200,6 +209,7 @@ "$ref": "#/texts/24" } ], + "content_layer": "body", "name": "list", "label": "list" } @@ -211,6 +221,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "title", "prov": [ { @@ -237,6 +248,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [ { @@ -263,6 +275,7 @@ "$ref": "#/groups/1" }, "children": [], + "content_layer": "body", "label": "title", "prov": [ { @@ -289,6 +302,7 @@ "$ref": "#/groups/1" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [ { @@ -315,6 +329,7 @@ "$ref": "#/groups/1" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [ { @@ -341,6 +356,7 @@ "$ref": "#/groups/1" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [ { @@ -367,6 +383,7 @@ "$ref": "#/groups/1" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [ { @@ -393,6 +410,7 @@ "$ref": "#/groups/1" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [ { @@ -419,6 +437,7 @@ "$ref": "#/groups/3" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -447,6 +466,7 @@ "$ref": "#/groups/3" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -475,6 +495,7 @@ "$ref": "#/groups/3" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -503,6 +524,7 @@ "$ref": "#/groups/4" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -531,6 +553,7 @@ "$ref": "#/groups/4" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -559,6 +582,7 @@ "$ref": "#/groups/4" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -587,6 +611,7 @@ "$ref": "#/groups/4" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -615,6 +640,7 @@ "$ref": "#/groups/2" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [ { @@ -641,6 +667,7 @@ "$ref": "#/groups/5" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -669,6 +696,7 @@ "$ref": "#/groups/5" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -697,6 +725,7 @@ "$ref": "#/groups/2" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [ { @@ -723,6 +752,7 @@ "$ref": "#/groups/6" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -751,6 +781,7 @@ "$ref": "#/groups/6" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -779,6 +810,7 @@ "$ref": "#/groups/6" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -807,6 +839,7 @@ "$ref": "#/groups/7" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -835,6 +868,7 @@ "$ref": "#/groups/7" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -863,6 +897,7 @@ "$ref": "#/groups/7" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -894,6 +929,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "table", "prov": [ { diff --git a/tests/data/groundtruth/docling_v2/powerpoint_with_image.pptx.json b/tests/data/groundtruth/docling_v2/powerpoint_with_image.pptx.json index eaa343f0..d35c891f 100644 --- a/tests/data/groundtruth/docling_v2/powerpoint_with_image.pptx.json +++ b/tests/data/groundtruth/docling_v2/powerpoint_with_image.pptx.json @@ -10,6 +10,7 @@ "furniture": { "self_ref": "#/furniture", "children": [], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -20,6 +21,7 @@ "$ref": "#/groups/0" } ], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -40,6 +42,7 @@ "$ref": "#/pictures/0" } ], + "content_layer": "body", "name": "slide-0", "label": "chapter" } @@ -51,6 +54,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "title", "prov": [ { @@ -77,6 +81,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [ { @@ -105,6 +110,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [ { diff --git a/tests/data/groundtruth/docling_v2/tablecell.docx.json b/tests/data/groundtruth/docling_v2/tablecell.docx.json index d811cc86..be1a920c 100644 --- a/tests/data/groundtruth/docling_v2/tablecell.docx.json +++ b/tests/data/groundtruth/docling_v2/tablecell.docx.json @@ -10,6 +10,7 @@ "furniture": { "self_ref": "#/furniture", "children": [], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -38,6 +39,7 @@ "$ref": "#/texts/6" } ], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -55,6 +57,7 @@ "$ref": "#/texts/1" } ], + "content_layer": "body", "name": "list", "label": "list" } @@ -66,6 +69,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Hello world1", @@ -79,6 +83,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Hello2", @@ -92,6 +97,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -103,6 +109,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Some text before", @@ -114,6 +121,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -125,6 +133,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -136,6 +145,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Some text after", @@ -150,6 +160,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "table", "prov": [], "captions": [], diff --git a/tests/data/groundtruth/docling_v2/test-01.xlsx.json b/tests/data/groundtruth/docling_v2/test-01.xlsx.json index b6642298..3c2a7164 100644 --- a/tests/data/groundtruth/docling_v2/test-01.xlsx.json +++ b/tests/data/groundtruth/docling_v2/test-01.xlsx.json @@ -10,6 +10,7 @@ "furniture": { "self_ref": "#/furniture", "children": [], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -26,6 +27,7 @@ "$ref": "#/groups/2" } ], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -40,6 +42,7 @@ "$ref": "#/tables/0" } ], + "content_layer": "body", "name": "sheet: Sheet1", "label": "section" }, @@ -59,6 +62,7 @@ "$ref": "#/tables/3" } ], + "content_layer": "body", "name": "sheet: Sheet2", "label": "section" }, @@ -78,6 +82,7 @@ "$ref": "#/pictures/0" } ], + "content_layer": "body", "name": "sheet: Sheet3", "label": "section" } @@ -90,6 +95,7 @@ "$ref": "#/groups/2" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [], @@ -114,6 +120,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "table", "prov": [], "captions": [], @@ -652,6 +659,7 @@ "$ref": "#/groups/1" }, "children": [], + "content_layer": "body", "label": "table", "prov": [], "captions": [], @@ -1554,6 +1562,7 @@ "$ref": "#/groups/1" }, "children": [], + "content_layer": "body", "label": "table", "prov": [], "captions": [], @@ -1944,6 +1953,7 @@ "$ref": "#/groups/1" }, "children": [], + "content_layer": "body", "label": "table", "prov": [], "captions": [], @@ -2334,6 +2344,7 @@ "$ref": "#/groups/2" }, "children": [], + "content_layer": "body", "label": "table", "prov": [], "captions": [], @@ -2800,6 +2811,7 @@ "$ref": "#/groups/2" }, "children": [], + "content_layer": "body", "label": "table", "prov": [], "captions": [], diff --git a/tests/data/groundtruth/docling_v2/test_emf_docx.docx.json b/tests/data/groundtruth/docling_v2/test_emf_docx.docx.json index 6418a215..86a3d96a 100644 --- a/tests/data/groundtruth/docling_v2/test_emf_docx.docx.json +++ b/tests/data/groundtruth/docling_v2/test_emf_docx.docx.json @@ -10,6 +10,7 @@ "furniture": { "self_ref": "#/furniture", "children": [], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -38,6 +39,7 @@ "$ref": "#/pictures/2" } ], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -49,6 +51,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Test with three images in unusual formats", @@ -60,6 +63,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Raster in emf:", @@ -71,6 +75,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Vector in emf:", @@ -82,6 +87,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Raster in webp:", @@ -95,6 +101,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [], @@ -108,6 +115,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [], @@ -121,6 +129,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [], diff --git a/tests/data/groundtruth/docling_v2/unit_test_01.html.json b/tests/data/groundtruth/docling_v2/unit_test_01.html.json index fa126177..c3708524 100644 --- a/tests/data/groundtruth/docling_v2/unit_test_01.html.json +++ b/tests/data/groundtruth/docling_v2/unit_test_01.html.json @@ -10,6 +10,7 @@ "furniture": { "self_ref": "#/furniture", "children": [], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -20,6 +21,7 @@ "$ref": "#/texts/0" } ], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -34,6 +36,7 @@ "$ref": "#/texts/4" } ], + "content_layer": "body", "name": "header-3", "label": "section" } @@ -52,6 +55,7 @@ "$ref": "#/texts/3" } ], + "content_layer": "body", "label": "title", "prov": [], "orig": "Title", @@ -67,6 +71,7 @@ "$ref": "#/texts/2" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "section-1", @@ -79,6 +84,7 @@ "$ref": "#/texts/1" }, "children": [], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "section-1.1", @@ -101,6 +107,7 @@ "$ref": "#/texts/6" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "section-2", @@ -113,6 +120,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "section-2.0.1", @@ -125,6 +133,7 @@ "$ref": "#/texts/3" }, "children": [], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "section-2.2", @@ -137,6 +146,7 @@ "$ref": "#/texts/3" }, "children": [], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "section-2.3", diff --git a/tests/data/groundtruth/docling_v2/unit_test_headers.docx.json b/tests/data/groundtruth/docling_v2/unit_test_headers.docx.json index c76d241a..83a6709a 100644 --- a/tests/data/groundtruth/docling_v2/unit_test_headers.docx.json +++ b/tests/data/groundtruth/docling_v2/unit_test_headers.docx.json @@ -10,6 +10,7 @@ "furniture": { "self_ref": "#/furniture", "children": [], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -20,6 +21,7 @@ "$ref": "#/texts/0" } ], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -34,6 +36,7 @@ "$ref": "#/texts/33" } ], + "content_layer": "body", "name": "header-2", "label": "section" } @@ -55,6 +58,7 @@ "$ref": "#/texts/27" } ], + "content_layer": "body", "label": "title", "prov": [], "orig": "Test Document", @@ -66,6 +70,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -99,6 +104,7 @@ "$ref": "#/texts/14" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Section 1", @@ -111,6 +117,7 @@ "$ref": "#/texts/2" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -122,6 +129,7 @@ "$ref": "#/texts/2" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Paragraph 1.1", @@ -133,6 +141,7 @@ "$ref": "#/texts/2" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -144,6 +153,7 @@ "$ref": "#/texts/2" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Paragraph 1.2", @@ -155,6 +165,7 @@ "$ref": "#/texts/2" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -182,6 +193,7 @@ "$ref": "#/texts/13" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Section 1.1", @@ -194,6 +206,7 @@ "$ref": "#/texts/8" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -205,6 +218,7 @@ "$ref": "#/texts/8" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Paragraph 1.1.1", @@ -216,6 +230,7 @@ "$ref": "#/texts/8" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -227,6 +242,7 @@ "$ref": "#/texts/8" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Paragraph 1.1.2", @@ -238,6 +254,7 @@ "$ref": "#/texts/8" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -268,6 +285,7 @@ "$ref": "#/texts/20" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Section 1.2", @@ -280,6 +298,7 @@ "$ref": "#/texts/14" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -291,6 +310,7 @@ "$ref": "#/texts/14" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Paragraph 1.1.1", @@ -302,6 +322,7 @@ "$ref": "#/texts/14" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -313,6 +334,7 @@ "$ref": "#/texts/14" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Paragraph 1.1.2", @@ -324,6 +346,7 @@ "$ref": "#/texts/14" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -354,6 +377,7 @@ "$ref": "#/texts/26" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Section 1.2.3", @@ -366,6 +390,7 @@ "$ref": "#/texts/20" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -377,6 +402,7 @@ "$ref": "#/texts/20" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Paragraph 1.2.3.1", @@ -388,6 +414,7 @@ "$ref": "#/texts/20" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -399,6 +426,7 @@ "$ref": "#/texts/20" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Paragraph 1.2.3.1", @@ -410,6 +438,7 @@ "$ref": "#/texts/20" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -421,6 +450,7 @@ "$ref": "#/texts/20" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -454,6 +484,7 @@ "$ref": "#/texts/39" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Section 2", @@ -466,6 +497,7 @@ "$ref": "#/texts/27" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -477,6 +509,7 @@ "$ref": "#/texts/27" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Paragraph 2.1", @@ -488,6 +521,7 @@ "$ref": "#/texts/27" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -499,6 +533,7 @@ "$ref": "#/texts/27" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Paragraph 2.2", @@ -510,6 +545,7 @@ "$ref": "#/texts/27" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -537,6 +573,7 @@ "$ref": "#/texts/38" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Section 2.1.1", @@ -549,6 +586,7 @@ "$ref": "#/texts/33" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -560,6 +598,7 @@ "$ref": "#/texts/33" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Paragraph 2.1.1.1", @@ -571,6 +610,7 @@ "$ref": "#/texts/33" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -582,6 +622,7 @@ "$ref": "#/texts/33" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Paragraph 2.1.1.1", @@ -593,6 +634,7 @@ "$ref": "#/texts/33" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -623,6 +665,7 @@ "$ref": "#/texts/45" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Section 2.1", @@ -635,6 +678,7 @@ "$ref": "#/texts/39" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -646,6 +690,7 @@ "$ref": "#/texts/39" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Paragraph 2.1.1", @@ -657,6 +702,7 @@ "$ref": "#/texts/39" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -668,6 +714,7 @@ "$ref": "#/texts/39" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Paragraph 2.1.2", @@ -679,6 +726,7 @@ "$ref": "#/texts/39" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -690,6 +738,7 @@ "$ref": "#/texts/39" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", diff --git a/tests/data/groundtruth/docling_v2/unit_test_headers_numbered.docx.json b/tests/data/groundtruth/docling_v2/unit_test_headers_numbered.docx.json index 38a25d33..b2f0aa8d 100644 --- a/tests/data/groundtruth/docling_v2/unit_test_headers_numbered.docx.json +++ b/tests/data/groundtruth/docling_v2/unit_test_headers_numbered.docx.json @@ -10,6 +10,7 @@ "furniture": { "self_ref": "#/furniture", "children": [], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -56,6 +57,7 @@ "$ref": "#/groups/2" } ], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -73,6 +75,7 @@ "$ref": "#/texts/27" } ], + "content_layer": "body", "name": "header-0", "label": "section" }, @@ -89,6 +92,7 @@ "$ref": "#/texts/14" } ], + "content_layer": "body", "name": "header-1", "label": "section" }, @@ -102,6 +106,7 @@ "$ref": "#/groups/3" } ], + "content_layer": "body", "name": "header-0", "label": "section" }, @@ -118,6 +123,7 @@ "$ref": "#/texts/39" } ], + "content_layer": "body", "name": "header-1", "label": "section" }, @@ -131,6 +137,7 @@ "$ref": "#/texts/33" } ], + "content_layer": "body", "name": "header-2", "label": "section" } @@ -149,6 +156,7 @@ "$ref": "#/texts/2" } ], + "content_layer": "body", "label": "title", "prov": [], "orig": "Test Document", @@ -160,6 +168,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -171,6 +180,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Section 1", @@ -183,6 +193,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -194,6 +205,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Paragraph 1.1", @@ -205,6 +217,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -216,6 +229,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Paragraph 1.2", @@ -227,6 +241,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -254,6 +269,7 @@ "$ref": "#/texts/13" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Section 1.1", @@ -266,6 +282,7 @@ "$ref": "#/texts/8" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -277,6 +294,7 @@ "$ref": "#/texts/8" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Paragraph 1.1.1", @@ -288,6 +306,7 @@ "$ref": "#/texts/8" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -299,6 +318,7 @@ "$ref": "#/texts/8" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Paragraph 1.1.2", @@ -310,6 +330,7 @@ "$ref": "#/texts/8" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -340,6 +361,7 @@ "$ref": "#/texts/20" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Section 1.2", @@ -352,6 +374,7 @@ "$ref": "#/texts/14" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -363,6 +386,7 @@ "$ref": "#/texts/14" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Paragraph 1.1.1", @@ -374,6 +398,7 @@ "$ref": "#/texts/14" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -385,6 +410,7 @@ "$ref": "#/texts/14" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Paragraph 1.1.2", @@ -396,6 +422,7 @@ "$ref": "#/texts/14" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -426,6 +453,7 @@ "$ref": "#/texts/26" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Section 1.2.3", @@ -438,6 +466,7 @@ "$ref": "#/texts/20" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -449,6 +478,7 @@ "$ref": "#/texts/20" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Paragraph 1.2.3.1", @@ -460,6 +490,7 @@ "$ref": "#/texts/20" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -471,6 +502,7 @@ "$ref": "#/texts/20" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Paragraph 1.2.3.1", @@ -482,6 +514,7 @@ "$ref": "#/texts/20" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -493,6 +526,7 @@ "$ref": "#/texts/20" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -504,6 +538,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Section 2", @@ -516,6 +551,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -527,6 +563,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Paragraph 2.1", @@ -538,6 +575,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -549,6 +587,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Paragraph 2.2", @@ -560,6 +599,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -587,6 +627,7 @@ "$ref": "#/texts/38" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Section 2.1.1", @@ -599,6 +640,7 @@ "$ref": "#/texts/33" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -610,6 +652,7 @@ "$ref": "#/texts/33" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Paragraph 2.1.1.1", @@ -621,6 +664,7 @@ "$ref": "#/texts/33" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -632,6 +676,7 @@ "$ref": "#/texts/33" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Paragraph 2.1.1.1", @@ -643,6 +688,7 @@ "$ref": "#/texts/33" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -673,6 +719,7 @@ "$ref": "#/texts/45" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Section 2.1", @@ -685,6 +732,7 @@ "$ref": "#/texts/39" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -696,6 +744,7 @@ "$ref": "#/texts/39" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Paragraph 2.1.1", @@ -707,6 +756,7 @@ "$ref": "#/texts/39" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -718,6 +768,7 @@ "$ref": "#/texts/39" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Paragraph 2.1.2", @@ -729,6 +780,7 @@ "$ref": "#/texts/39" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -740,6 +792,7 @@ "$ref": "#/texts/39" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", diff --git a/tests/data/groundtruth/docling_v2/unit_test_lists.docx.json b/tests/data/groundtruth/docling_v2/unit_test_lists.docx.json index 1410586c..c42561e4 100644 --- a/tests/data/groundtruth/docling_v2/unit_test_lists.docx.json +++ b/tests/data/groundtruth/docling_v2/unit_test_lists.docx.json @@ -10,6 +10,7 @@ "furniture": { "self_ref": "#/furniture", "children": [], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -20,6 +21,7 @@ "$ref": "#/groups/0" } ], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -34,6 +36,7 @@ "$ref": "#/texts/0" } ], + "content_layer": "body", "name": "header-0", "label": "section" }, @@ -62,6 +65,7 @@ "$ref": "#/texts/36" } ], + "content_layer": "body", "name": "header-2", "label": "section" }, @@ -81,6 +85,7 @@ "$ref": "#/texts/10" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -100,6 +105,7 @@ "$ref": "#/texts/15" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -122,6 +128,7 @@ "$ref": "#/texts/23" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -141,6 +148,7 @@ "$ref": "#/texts/22" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -160,6 +168,7 @@ "$ref": "#/texts/28" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -173,6 +182,7 @@ "$ref": "#/texts/27" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -192,6 +202,7 @@ "$ref": "#/texts/34" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -208,6 +219,7 @@ "$ref": "#/groups/10" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -221,6 +233,7 @@ "$ref": "#/texts/33" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -243,6 +256,7 @@ "$ref": "#/texts/42" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -262,6 +276,7 @@ "$ref": "#/groups/13" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -275,6 +290,7 @@ "$ref": "#/texts/41" } ], + "content_layer": "body", "name": "list", "label": "list" } @@ -308,6 +324,7 @@ "$ref": "#/groups/1" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Test Document", @@ -320,6 +337,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -331,6 +349,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -342,6 +361,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Paragraph 2.1.1", @@ -353,6 +373,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -364,6 +385,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Paragraph 2.1.2", @@ -375,6 +397,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -393,6 +416,7 @@ "$ref": "#/texts/11" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Test 1:", @@ -405,6 +429,7 @@ "$ref": "#/groups/2" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "List item 1", @@ -418,6 +443,7 @@ "$ref": "#/groups/2" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "List item 2", @@ -431,6 +457,7 @@ "$ref": "#/groups/2" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "List item 3", @@ -444,6 +471,7 @@ "$ref": "#/texts/7" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -462,6 +490,7 @@ "$ref": "#/texts/16" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Test 2:", @@ -474,6 +503,7 @@ "$ref": "#/groups/3" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "List item a", @@ -487,6 +517,7 @@ "$ref": "#/groups/3" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "List item b", @@ -500,6 +531,7 @@ "$ref": "#/groups/3" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "List item c", @@ -513,6 +545,7 @@ "$ref": "#/texts/12" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -531,6 +564,7 @@ "$ref": "#/texts/24" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Test 3:", @@ -543,6 +577,7 @@ "$ref": "#/groups/4" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "List item 1", @@ -556,6 +591,7 @@ "$ref": "#/groups/4" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "List item 2", @@ -569,6 +605,7 @@ "$ref": "#/groups/5" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "List item 1.1", @@ -582,6 +619,7 @@ "$ref": "#/groups/5" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "List item 1.2", @@ -595,6 +633,7 @@ "$ref": "#/groups/5" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "List item 1.3", @@ -608,6 +647,7 @@ "$ref": "#/groups/4" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "List item 3", @@ -621,6 +661,7 @@ "$ref": "#/texts/17" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -639,6 +680,7 @@ "$ref": "#/texts/29" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Test 4:", @@ -651,6 +693,7 @@ "$ref": "#/groups/6" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "List item 1", @@ -664,6 +707,7 @@ "$ref": "#/groups/7" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "List item 1.1", @@ -677,6 +721,7 @@ "$ref": "#/groups/6" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "List item 2", @@ -690,6 +735,7 @@ "$ref": "#/texts/25" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -708,6 +754,7 @@ "$ref": "#/texts/35" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Test 5:", @@ -720,6 +767,7 @@ "$ref": "#/groups/8" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "List item 1", @@ -733,6 +781,7 @@ "$ref": "#/groups/9" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "List item 1.1", @@ -746,6 +795,7 @@ "$ref": "#/groups/10" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "List item 1.1.1", @@ -759,6 +809,7 @@ "$ref": "#/groups/8" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "List item 3", @@ -772,6 +823,7 @@ "$ref": "#/texts/30" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -796,6 +848,7 @@ "$ref": "#/texts/45" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Test 6:", @@ -808,6 +861,7 @@ "$ref": "#/groups/11" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "List item 1", @@ -821,6 +875,7 @@ "$ref": "#/groups/11" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "List item 2", @@ -834,6 +889,7 @@ "$ref": "#/groups/12" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "List item 1.1", @@ -847,6 +903,7 @@ "$ref": "#/groups/12" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "List item 1.2", @@ -860,6 +917,7 @@ "$ref": "#/groups/13" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "List item 1.2.1", @@ -873,6 +931,7 @@ "$ref": "#/groups/11" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "List item 3", @@ -886,6 +945,7 @@ "$ref": "#/texts/36" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -897,6 +957,7 @@ "$ref": "#/texts/36" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -908,6 +969,7 @@ "$ref": "#/texts/36" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", diff --git a/tests/data/groundtruth/docling_v2/wiki_duck.html.json b/tests/data/groundtruth/docling_v2/wiki_duck.html.json index 11168769..2763617d 100644 --- a/tests/data/groundtruth/docling_v2/wiki_duck.html.json +++ b/tests/data/groundtruth/docling_v2/wiki_duck.html.json @@ -10,6 +10,7 @@ "furniture": { "self_ref": "#/furniture", "children": [], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -95,6 +96,7 @@ "$ref": "#/texts/250" } ], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -124,6 +126,7 @@ "$ref": "#/texts/5" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -149,6 +152,7 @@ "$ref": "#/texts/10" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -158,6 +162,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "name": "list", "label": "list" }, @@ -171,6 +176,7 @@ "$ref": "#/texts/11" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -180,6 +186,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "name": "list", "label": "list" }, @@ -196,6 +203,7 @@ "$ref": "#/texts/13" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -212,6 +220,7 @@ "$ref": "#/texts/15" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -228,6 +237,7 @@ "$ref": "#/texts/17" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -241,6 +251,7 @@ "$ref": "#/texts/18" } ], + "content_layer": "body", "name": "header-1", "label": "section" }, @@ -281,6 +292,7 @@ "$ref": "#/texts/38" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -290,6 +302,7 @@ "$ref": "#/texts/20" }, "children": [], + "content_layer": "body", "name": "list", "label": "list" }, @@ -299,6 +312,7 @@ "$ref": "#/texts/21" }, "children": [], + "content_layer": "body", "name": "list", "label": "list" }, @@ -308,6 +322,7 @@ "$ref": "#/texts/22" }, "children": [], + "content_layer": "body", "name": "list", "label": "list" }, @@ -317,6 +332,7 @@ "$ref": "#/texts/23" }, "children": [], + "content_layer": "body", "name": "list", "label": "list" }, @@ -339,6 +355,7 @@ "$ref": "#/texts/28" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -348,6 +365,7 @@ "$ref": "#/texts/25" }, "children": [], + "content_layer": "body", "name": "list", "label": "list" }, @@ -357,6 +375,7 @@ "$ref": "#/texts/26" }, "children": [], + "content_layer": "body", "name": "list", "label": "list" }, @@ -366,6 +385,7 @@ "$ref": "#/texts/27" }, "children": [], + "content_layer": "body", "name": "list", "label": "list" }, @@ -375,6 +395,7 @@ "$ref": "#/texts/28" }, "children": [], + "content_layer": "body", "name": "list", "label": "list" }, @@ -397,6 +418,7 @@ "$ref": "#/texts/33" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -406,6 +428,7 @@ "$ref": "#/texts/30" }, "children": [], + "content_layer": "body", "name": "list", "label": "list" }, @@ -415,6 +438,7 @@ "$ref": "#/texts/31" }, "children": [], + "content_layer": "body", "name": "list", "label": "list" }, @@ -424,6 +448,7 @@ "$ref": "#/texts/32" }, "children": [], + "content_layer": "body", "name": "list", "label": "list" }, @@ -433,6 +458,7 @@ "$ref": "#/texts/33" }, "children": [], + "content_layer": "body", "name": "list", "label": "list" }, @@ -442,6 +468,7 @@ "$ref": "#/texts/34" }, "children": [], + "content_layer": "body", "name": "list", "label": "list" }, @@ -458,6 +485,7 @@ "$ref": "#/texts/37" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -467,6 +495,7 @@ "$ref": "#/texts/36" }, "children": [], + "content_layer": "body", "name": "list", "label": "list" }, @@ -476,6 +505,7 @@ "$ref": "#/texts/37" }, "children": [], + "content_layer": "body", "name": "list", "label": "list" }, @@ -485,6 +515,7 @@ "$ref": "#/texts/38" }, "children": [], + "content_layer": "body", "name": "list", "label": "list" }, @@ -903,6 +934,7 @@ "$ref": "#/texts/175" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -919,6 +951,7 @@ "$ref": "#/texts/177" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -928,6 +961,7 @@ "$ref": "#/texts/39" }, "children": [], + "content_layer": "body", "name": "list", "label": "list" }, @@ -947,6 +981,7 @@ "$ref": "#/texts/180" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -966,6 +1001,7 @@ "$ref": "#/texts/183" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -1006,6 +1042,7 @@ "$ref": "#/texts/193" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -1022,6 +1059,7 @@ "$ref": "#/texts/195" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -1038,6 +1076,7 @@ "$ref": "#/texts/197" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -1051,6 +1090,7 @@ "$ref": "#/texts/256" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -1079,6 +1119,7 @@ "$ref": "#/texts/262" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -1254,6 +1295,7 @@ "$ref": "#/texts/319" } ], + "content_layer": "body", "name": "ordered list", "label": "ordered_list" }, @@ -1324,6 +1366,7 @@ "$ref": "#/texts/340" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -1352,6 +1395,7 @@ "$ref": "#/texts/347" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -1374,6 +1418,7 @@ "$ref": "#/texts/351" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -1393,6 +1438,7 @@ "$ref": "#/texts/354" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -1481,6 +1527,7 @@ "$ref": "#/texts/380" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -1497,6 +1544,7 @@ "$ref": "#/texts/382" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -1534,6 +1582,7 @@ "$ref": "#/texts/391" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -1550,6 +1599,7 @@ "$ref": "#/texts/393" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -1559,6 +1609,7 @@ "$ref": "#/texts/341" }, "children": [], + "content_layer": "body", "name": "list", "label": "list" } @@ -1570,6 +1621,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Main page", @@ -1583,6 +1635,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Contents", @@ -1596,6 +1649,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Current events", @@ -1609,6 +1663,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Random article", @@ -1622,6 +1677,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "About Wikipedia", @@ -1635,6 +1691,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Contact us", @@ -1648,6 +1705,7 @@ "$ref": "#/groups/1" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Help", @@ -1661,6 +1719,7 @@ "$ref": "#/groups/1" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Learn to edit", @@ -1674,6 +1733,7 @@ "$ref": "#/groups/1" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Community portal", @@ -1687,6 +1747,7 @@ "$ref": "#/groups/1" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Recent changes", @@ -1700,6 +1761,7 @@ "$ref": "#/groups/1" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Upload file", @@ -1713,6 +1775,7 @@ "$ref": "#/groups/3" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Donate", @@ -1726,6 +1789,7 @@ "$ref": "#/groups/5" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Create account", @@ -1739,6 +1803,7 @@ "$ref": "#/groups/5" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Log in", @@ -1752,6 +1817,7 @@ "$ref": "#/groups/6" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Create account", @@ -1765,6 +1831,7 @@ "$ref": "#/groups/6" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Log in", @@ -1778,6 +1845,7 @@ "$ref": "#/groups/7" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Contributions", @@ -1791,6 +1859,7 @@ "$ref": "#/groups/7" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Talk", @@ -1808,6 +1877,7 @@ "$ref": "#/groups/9" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Contents", @@ -1820,6 +1890,7 @@ "$ref": "#/groups/9" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "(Top)", @@ -1837,6 +1908,7 @@ "$ref": "#/groups/10" } ], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "1 Etymology", @@ -1854,6 +1926,7 @@ "$ref": "#/groups/11" } ], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "2 Taxonomy", @@ -1871,6 +1944,7 @@ "$ref": "#/groups/12" } ], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "3 Morphology", @@ -1888,6 +1962,7 @@ "$ref": "#/groups/13" } ], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "4 Distribution and habitat", @@ -1905,6 +1980,7 @@ "$ref": "#/groups/14" } ], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "5 Behaviour Toggle Behaviour subsection", @@ -1922,6 +1998,7 @@ "$ref": "#/groups/15" } ], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "5.1 Feeding", @@ -1939,6 +2016,7 @@ "$ref": "#/groups/16" } ], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "5.2 Breeding", @@ -1956,6 +2034,7 @@ "$ref": "#/groups/17" } ], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "5.3 Communication", @@ -1973,6 +2052,7 @@ "$ref": "#/groups/18" } ], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "5.4 Predators", @@ -1990,6 +2070,7 @@ "$ref": "#/groups/19" } ], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "6 Relationship with humans Toggle Relationship with humans subsection", @@ -2007,6 +2088,7 @@ "$ref": "#/groups/20" } ], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "6.1 Hunting", @@ -2024,6 +2106,7 @@ "$ref": "#/groups/21" } ], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "6.2 Domestication", @@ -2041,6 +2124,7 @@ "$ref": "#/groups/22" } ], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "6.3 Heraldry", @@ -2058,6 +2142,7 @@ "$ref": "#/groups/23" } ], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "6.4 Cultural references", @@ -2075,6 +2160,7 @@ "$ref": "#/groups/24" } ], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "7 See also", @@ -2092,6 +2178,7 @@ "$ref": "#/groups/25" } ], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "8 Notes Toggle Notes subsection", @@ -2109,6 +2196,7 @@ "$ref": "#/groups/26" } ], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "8.1 Citations", @@ -2126,6 +2214,7 @@ "$ref": "#/groups/27" } ], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "8.2 Sources", @@ -2143,6 +2232,7 @@ "$ref": "#/groups/28" } ], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "9 External links", @@ -2220,6 +2310,7 @@ "$ref": "#/texts/341" } ], + "content_layer": "body", "label": "title", "prov": [], "orig": "Duck", @@ -2231,6 +2322,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Ac\u00e8h", @@ -2244,6 +2336,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Afrikaans", @@ -2257,6 +2350,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Alemannisch", @@ -2270,6 +2364,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u12a0\u121b\u122d\u129b", @@ -2283,6 +2378,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u00c6nglisc", @@ -2296,6 +2392,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u0627\u0644\u0639\u0631\u0628\u064a\u0629", @@ -2309,6 +2406,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Aragon\u00e9s", @@ -2322,6 +2420,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u0710\u072a\u0721\u071d\u0710", @@ -2335,6 +2434,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Arm\u00e3neashti", @@ -2348,6 +2448,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Asturianu", @@ -2361,6 +2462,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Atikamekw", @@ -2374,6 +2476,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u0410\u0432\u0430\u0440", @@ -2387,6 +2490,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Aymar aru", @@ -2400,6 +2504,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u062a\u06c6\u0631\u06a9\u062c\u0647", @@ -2413,6 +2518,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Basa Bali", @@ -2426,6 +2532,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u09ac\u09be\u0982\u09b2\u09be", @@ -2439,6 +2546,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u95a9\u5357\u8a9e / B\u00e2n-l\u00e2m-g\u00fa", @@ -2452,6 +2560,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u0411\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f", @@ -2465,6 +2574,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u0411\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f (\u0442\u0430\u0440\u0430\u0448\u043a\u0435\u0432\u0456\u0446\u0430)", @@ -2478,6 +2588,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Bikol Central", @@ -2491,6 +2602,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u0411\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438", @@ -2504,6 +2616,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Brezhoneg", @@ -2517,6 +2630,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u0411\u0443\u0440\u044f\u0430\u0434", @@ -2530,6 +2644,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Catal\u00e0", @@ -2543,6 +2658,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u0427\u04d1\u0432\u0430\u0448\u043b\u0430", @@ -2556,6 +2672,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u010ce\u0161tina", @@ -2569,6 +2686,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "ChiShona", @@ -2582,6 +2700,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Cymraeg", @@ -2595,6 +2714,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Dagbanli", @@ -2608,6 +2728,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Dansk", @@ -2621,6 +2742,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Deitsch", @@ -2634,6 +2756,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Deutsch", @@ -2647,6 +2770,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u0921\u094b\u091f\u0947\u0932\u0940", @@ -2660,6 +2784,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac", @@ -2673,6 +2798,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Emili\u00e0n e rumagn\u00f2l", @@ -2686,6 +2812,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Espa\u00f1ol", @@ -2699,6 +2826,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Esperanto", @@ -2712,6 +2840,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Euskara", @@ -2725,6 +2854,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u0641\u0627\u0631\u0633\u06cc", @@ -2738,6 +2868,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Fran\u00e7ais", @@ -2751,6 +2882,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Gaeilge", @@ -2764,6 +2896,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Galego", @@ -2777,6 +2910,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u0413\u04c0\u0430\u043b\u0433\u04c0\u0430\u0439", @@ -2790,6 +2924,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u8d1b\u8a9e", @@ -2803,6 +2938,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u06af\u06cc\u0644\u06a9\u06cc", @@ -2816,6 +2952,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\ud800\udf32\ud800\udf3f\ud800\udf44\ud800\udf39\ud800\udf43\ud800\udf3a", @@ -2829,6 +2966,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u0917\u094b\u0902\u092f\u091a\u0940 \u0915\u094b\u0902\u0915\u0923\u0940 / G\u00f5ychi Konknni", @@ -2842,6 +2980,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u5ba2\u5bb6\u8a9e / Hak-k\u00e2-ng\u00ee", @@ -2855,6 +2994,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\ud55c\uad6d\uc5b4", @@ -2868,6 +3008,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Hausa", @@ -2881,6 +3022,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u0540\u0561\u0575\u0565\u0580\u0565\u0576", @@ -2894,6 +3036,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u0939\u093f\u0928\u094d\u0926\u0940", @@ -2907,6 +3050,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Hrvatski", @@ -2920,6 +3064,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Ido", @@ -2933,6 +3078,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Bahasa Indonesia", @@ -2946,6 +3092,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "I\u00f1upiatun", @@ -2959,6 +3106,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u00cdslenska", @@ -2972,6 +3120,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Italiano", @@ -2985,6 +3134,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u05e2\u05d1\u05e8\u05d9\u05ea", @@ -2998,6 +3148,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Jawa", @@ -3011,6 +3162,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u0c95\u0ca8\u0ccd\u0ca8\u0ca1", @@ -3024,6 +3176,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Kapampangan", @@ -3037,6 +3190,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u10e5\u10d0\u10e0\u10d7\u10e3\u10da\u10d8", @@ -3050,6 +3204,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u0915\u0949\u0936\u0941\u0930 / \u06a9\u0672\u0634\u064f\u0631", @@ -3063,6 +3218,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u049a\u0430\u0437\u0430\u049b\u0448\u0430", @@ -3076,6 +3232,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Ikirundi", @@ -3089,6 +3246,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Kongo", @@ -3102,6 +3260,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Krey\u00f2l ayisyen", @@ -3115,6 +3274,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u041a\u044b\u0440\u044b\u043a \u043c\u0430\u0440\u044b", @@ -3128,6 +3288,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u0ea5\u0eb2\u0ea7", @@ -3141,6 +3302,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Latina", @@ -3154,6 +3316,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Latvie\u0161u", @@ -3167,6 +3330,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Lietuvi\u0173", @@ -3180,6 +3344,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Li Niha", @@ -3193,6 +3358,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Ligure", @@ -3206,6 +3372,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Limburgs", @@ -3219,6 +3386,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Ling\u00e1la", @@ -3232,6 +3400,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Malagasy", @@ -3245,6 +3414,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u0d2e\u0d32\u0d2f\u0d3e\u0d33\u0d02", @@ -3258,6 +3428,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u092e\u0930\u093e\u0920\u0940", @@ -3271,6 +3442,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u0645\u0627\u0632\u0650\u0631\u0648\u0646\u06cc", @@ -3284,6 +3456,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Bahasa Melayu", @@ -3297,6 +3470,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\uabc3\uabe4\uabc7\uabe9 \uabc2\uabe3\uabdf", @@ -3310,6 +3484,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u95a9\u6771\u8a9e / M\u00ecng-d\u0115\u0324ng-ng\u1e73\u0304", @@ -3323,6 +3498,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u041c\u043e\u043a\u0448\u0435\u043d\u044c", @@ -3336,6 +3512,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u041c\u043e\u043d\u0433\u043e\u043b", @@ -3349,6 +3526,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u1019\u103c\u1014\u103a\u1019\u102c\u1018\u102c\u101e\u102c", @@ -3362,6 +3540,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Nederlands", @@ -3375,6 +3554,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Nedersaksies", @@ -3388,6 +3568,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u0928\u0947\u092a\u093e\u0932\u0940", @@ -3401,6 +3582,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u0928\u0947\u092a\u093e\u0932 \u092d\u093e\u0937\u093e", @@ -3414,6 +3596,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u65e5\u672c\u8a9e", @@ -3427,6 +3610,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u041d\u043e\u0445\u0447\u0438\u0439\u043d", @@ -3440,6 +3624,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Norsk nynorsk", @@ -3453,6 +3638,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Occitan", @@ -3466,6 +3652,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Oromoo", @@ -3479,6 +3666,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u0a2a\u0a70\u0a1c\u0a3e\u0a2c\u0a40", @@ -3492,6 +3680,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Picard", @@ -3505,6 +3694,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Plattd\u00fc\u00fctsch", @@ -3518,6 +3708,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Polski", @@ -3531,6 +3722,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Portugu\u00eas", @@ -3544,6 +3736,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Q\u0131r\u0131mtatarca", @@ -3557,6 +3750,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Rom\u00e2n\u0103", @@ -3570,6 +3764,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u0420\u0443\u0441\u0441\u043a\u0438\u0439", @@ -3583,6 +3778,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u0421\u0430\u0445\u0430 \u0442\u044b\u043b\u0430", @@ -3596,6 +3792,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u1c65\u1c5f\u1c71\u1c5b\u1c5f\u1c72\u1c64", @@ -3609,6 +3806,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Sardu", @@ -3622,6 +3820,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Scots", @@ -3635,6 +3834,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Seeltersk", @@ -3648,6 +3848,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Shqip", @@ -3661,6 +3862,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Sicilianu", @@ -3674,6 +3876,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u0dc3\u0dd2\u0d82\u0dc4\u0dbd", @@ -3687,6 +3890,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Simple English", @@ -3700,6 +3904,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u0633\u0646\u068c\u064a", @@ -3713,6 +3918,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u06a9\u0648\u0631\u062f\u06cc", @@ -3726,6 +3932,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u0421\u0440\u043f\u0441\u043a\u0438 / srpski", @@ -3739,6 +3946,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Srpskohrvatski / \u0441\u0440\u043f\u0441\u043a\u043e\u0445\u0440\u0432\u0430\u0442\u0441\u043a\u0438", @@ -3752,6 +3960,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Sunda", @@ -3765,6 +3974,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Svenska", @@ -3778,6 +3988,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Tagalog", @@ -3791,6 +4002,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u0ba4\u0bae\u0bbf\u0bb4\u0bcd", @@ -3804,6 +4016,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Taqbaylit", @@ -3817,6 +4030,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u0422\u0430\u0442\u0430\u0440\u0447\u0430 / tatar\u00e7a", @@ -3830,6 +4044,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u0e44\u0e17\u0e22", @@ -3843,6 +4058,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "T\u00fcrk\u00e7e", @@ -3856,6 +4072,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u0423\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430", @@ -3869,6 +4086,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u0626\u06c7\u064a\u063a\u06c7\u0631\u0686\u06d5 / Uyghurche", @@ -3882,6 +4100,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Vahcuengh", @@ -3895,6 +4114,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Ti\u1ebfng Vi\u1ec7t", @@ -3908,6 +4128,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Walon", @@ -3921,6 +4142,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u6587\u8a00", @@ -3934,6 +4156,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Winaray", @@ -3947,6 +4170,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u5434\u8bed", @@ -3960,6 +4184,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u7cb5\u8a9e", @@ -3973,6 +4198,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u017demait\u0117\u0161ka", @@ -3986,6 +4212,7 @@ "$ref": "#/groups/29" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "\u4e2d\u6587", @@ -3999,6 +4226,7 @@ "$ref": "#/groups/30" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Article", @@ -4012,6 +4240,7 @@ "$ref": "#/groups/30" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Talk", @@ -4025,6 +4254,7 @@ "$ref": "#/groups/32" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Read", @@ -4038,6 +4268,7 @@ "$ref": "#/groups/32" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "View source", @@ -4051,6 +4282,7 @@ "$ref": "#/groups/32" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "View history", @@ -4064,6 +4296,7 @@ "$ref": "#/groups/33" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Read", @@ -4077,6 +4310,7 @@ "$ref": "#/groups/33" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "View source", @@ -4090,6 +4324,7 @@ "$ref": "#/groups/33" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "View history", @@ -4103,6 +4338,7 @@ "$ref": "#/groups/34" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "What links here", @@ -4116,6 +4352,7 @@ "$ref": "#/groups/34" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Related changes", @@ -4129,6 +4366,7 @@ "$ref": "#/groups/34" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Upload file", @@ -4142,6 +4380,7 @@ "$ref": "#/groups/34" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Special pages", @@ -4155,6 +4394,7 @@ "$ref": "#/groups/34" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Permanent link", @@ -4168,6 +4408,7 @@ "$ref": "#/groups/34" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Page information", @@ -4181,6 +4422,7 @@ "$ref": "#/groups/34" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Cite this page", @@ -4194,6 +4436,7 @@ "$ref": "#/groups/34" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Get shortened URL", @@ -4207,6 +4450,7 @@ "$ref": "#/groups/34" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Download QR code", @@ -4220,6 +4464,7 @@ "$ref": "#/groups/34" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Wikidata item", @@ -4233,6 +4478,7 @@ "$ref": "#/groups/35" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Download as PDF", @@ -4246,6 +4492,7 @@ "$ref": "#/groups/35" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Printable version", @@ -4259,6 +4506,7 @@ "$ref": "#/groups/36" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Wikimedia Commons", @@ -4272,6 +4520,7 @@ "$ref": "#/groups/36" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Wikiquote", @@ -4285,6 +4534,7 @@ "$ref": "#/texts/39" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Duck is the common name for numerous species of waterfowl in the family Anatidae. Ducks are generally smaller and shorter-necked than swans and geese, which are members of the same family. Divided among several subfamilies, they are a form taxon; they do not represent a monophyletic group (the group of all descendants of a single common ancestral species), since swans and geese are not considered ducks. Ducks are mostly aquatic birds, and may be found in both fresh water and sea water.", @@ -4296,6 +4546,7 @@ "$ref": "#/texts/39" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Ducks are sometimes confused with several types of unrelated water birds with similar forms, such as loons or divers, grebes, gallinules and coots.", @@ -4329,6 +4580,7 @@ "$ref": "#/pictures/6" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Etymology", @@ -4341,6 +4593,7 @@ "$ref": "#/texts/200" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "The word duck comes from Old English d\u016bce 'diver', a derivative of the verb *d\u016bcan 'to duck, bend down low as if to get under something, or dive', because of the way many species in the dabbling duck group feed by upending; compare with Dutch duiken and German tauchen 'to dive'.", @@ -4352,6 +4605,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Pacific black duck displaying the characteristic upending \"duck\"", @@ -4363,6 +4617,7 @@ "$ref": "#/texts/200" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "This word replaced Old English ened /\u00e6nid 'duck', possibly to avoid confusion with other words, such as ende 'end' with similar forms. Other Germanic languages still have similar words for duck, for example, Dutch eend, German Ente and Norwegian and. The word ened /\u00e6nid was inherited from Proto-Indo-European; cf. Latin anas \"duck\", Lithuanian \u00e1ntis 'duck', Ancient Greek \u03bd\u1fc6\u03c3\u03c3\u03b1 /\u03bd\u1fc6\u03c4\u03c4\u03b1 (n\u0113ssa /n\u0113tta) 'duck', and Sanskrit \u0101t\u00ed 'water bird', among others.", @@ -4374,6 +4629,7 @@ "$ref": "#/texts/200" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "A duckling is a young duck in downy plumage[1] or baby duck,[2] but in the food trade a young domestic duck which has just reached adult size and bulk and its meat is still fully tender, is sometimes labelled as a duckling.", @@ -4385,6 +4641,7 @@ "$ref": "#/texts/200" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "A male is called a drake and the female is called a duck, or in ornithology a hen.[3][4]", @@ -4396,6 +4653,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Male mallard.", @@ -4407,6 +4665,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Wood ducks.", @@ -4431,6 +4690,7 @@ "$ref": "#/texts/212" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Taxonomy", @@ -4443,6 +4703,7 @@ "$ref": "#/texts/208" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "All ducks belong to the biological order Anseriformes, a group that contains the ducks, geese and swans, as well as the screamers, and the magpie goose.[5] All except the screamers belong to the biological family Anatidae.[5] Within the family, ducks are split into a variety of subfamilies and 'tribes'. The number and composition of these subfamilies and tribes is the cause of considerable disagreement among taxonomists.[5] Some base their decisions on morphological characteristics, others on shared behaviours or genetic studies.[6][7] The number of suggested subfamilies containing ducks ranges from two to five.[8][9] The significant level of hybridisation that occurs among wild ducks complicates efforts to tease apart the relationships between various species.[9]", @@ -4454,6 +4715,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Mallard landing in approach", @@ -4465,6 +4727,7 @@ "$ref": "#/texts/208" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "In most modern classifications, the so-called 'true ducks' belong to the subfamily Anatinae, which is further split into a varying number of tribes.[10] The largest of these, the Anatini, contains the 'dabbling' or 'river' ducks \u2013 named for their method of feeding primarily at the surface of fresh water.[11] The 'diving ducks', also named for their primary feeding method, make up the tribe Aythyini.[12] The 'sea ducks' of the tribe Mergini are diving ducks which specialise on fish and shellfish and spend a majority of their lives in saltwater.[13] The tribe Oxyurini contains the 'stifftails', diving ducks notable for their small size and stiff, upright tails.[14]", @@ -4476,6 +4739,7 @@ "$ref": "#/texts/208" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "A number of other species called ducks are not considered to be 'true ducks', and are typically placed in other subfamilies or tribes. The whistling ducks are assigned either to a tribe (Dendrocygnini) in the subfamily Anatinae or the subfamily Anserinae,[15] or to their own subfamily (Dendrocygninae) or family (Dendrocyganidae).[9][16] The freckled duck of Australia is either the sole member of the tribe Stictonettini in the subfamily Anserinae,[15] or in its own family, the Stictonettinae.[9] The shelducks make up the tribe Tadornini in the family Anserinae in some classifications,[15] and their own subfamily, Tadorninae, in others,[17] while the steamer ducks are either placed in the family Anserinae in the tribe Tachyerini[15] or lumped with the shelducks in the tribe Tadorini.[9] The perching ducks make up in the tribe Cairinini in the subfamily Anserinae in some classifications, while that tribe is eliminated in other classifications and its members assigned to the tribe Anatini.[9] The torrent duck is generally included in the subfamily Anserinae in the monotypic tribe Merganettini,[15] but is sometimes included in the tribe Tadornini.[18] The pink-eared duck is sometimes included as a true duck either in the tribe Anatini[15] or the tribe Malacorhynchini,[19] and other times is included with the shelducks in the tribe Tadornini.[15]", @@ -4497,6 +4761,7 @@ "$ref": "#/texts/216" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Morphology", @@ -4509,6 +4774,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Male Mandarin duck", @@ -4520,6 +4786,7 @@ "$ref": "#/texts/213" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "The overall body plan of ducks is elongated and broad, and they are also relatively long-necked, albeit not as long-necked as the geese and swans. The body shape of diving ducks varies somewhat from this in being more rounded. The bill is usually broad and contains serrated pectens, which are particularly well defined in the filter-feeding species. In the case of some fishing species the bill is long and strongly serrated. The scaled legs are strong and well developed, and generally set far back on the body, more so in the highly aquatic species. The wings are very strong and are generally short and pointed, and the flight of ducks requires fast continuous strokes, requiring in turn strong wing muscles. Three species of steamer duck are almost flightless, however. Many species of duck are temporarily flightless while moulting; they seek out protected habitat with good food supplies during this period. This moult typically precedes migration.", @@ -4531,6 +4798,7 @@ "$ref": "#/texts/213" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "The drakes of northern species often have extravagant plumage, but that is moulted in summer to give a more female-like appearance, the \"eclipse\" plumage. Southern resident species typically show less sexual dimorphism, although there are exceptions such as the paradise shelduck of New Zealand, which is both strikingly sexually dimorphic and in which the female's plumage is brighter than that of the male. The plumage of juvenile birds generally resembles that of the female. Female ducks have evolved to have a corkscrew shaped vagina to prevent rape.", @@ -4555,6 +4823,7 @@ "$ref": "#/texts/221" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Distribution and habitat", @@ -4567,6 +4836,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Flying steamer ducks in Ushuaia, Argentina", @@ -4578,6 +4848,7 @@ "$ref": "#/texts/217" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Ducks have a cosmopolitan distribution, and are found on every continent except Antarctica.[5] Several species manage to live on subantarctic islands, including South Georgia and the Auckland Islands.[20] Ducks have reached a number of isolated oceanic islands, including the Hawaiian Islands, Micronesia and the Gal\u00e1pagos Islands, where they are often vagrants and less often residents.[21][22] A handful are endemic to such far-flung islands.[21]", @@ -4589,6 +4860,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Female mallard in Cornwall, England", @@ -4600,6 +4872,7 @@ "$ref": "#/texts/217" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Some duck species, mainly those breeding in the temperate and Arctic Northern Hemisphere, are migratory; those in the tropics are generally not. Some ducks, particularly in Australia where rainfall is erratic, are nomadic, seeking out the temporary lakes and pools that form after localised heavy rain.[23]", @@ -4624,6 +4897,7 @@ "$ref": "#/texts/238" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Behaviour", @@ -4661,6 +4935,7 @@ "$ref": "#/texts/231" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Feeding", @@ -4673,6 +4948,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Pecten along the bill", @@ -4684,6 +4960,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Mallard duckling preening", @@ -4695,6 +4972,7 @@ "$ref": "#/texts/223" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Ducks eat food sources such as grasses, aquatic plants, fish, insects, small amphibians, worms, and small molluscs.", @@ -4706,6 +4984,7 @@ "$ref": "#/texts/223" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Dabbling ducks feed on the surface of water or on land, or as deep as they can reach by up-ending without completely submerging.[24] Along the edge of the bill, there is a comb-like structure called a pecten. This strains the water squirting from the side of the bill and traps any food. The pecten is also used to preen feathers and to hold slippery food items.", @@ -4717,6 +4996,7 @@ "$ref": "#/texts/223" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Diving ducks and sea ducks forage deep underwater. To be able to submerge more easily, the diving ducks are heavier than dabbling ducks, and therefore have more difficulty taking off to fly.", @@ -4728,6 +5008,7 @@ "$ref": "#/texts/223" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "A few specialized species such as the mergansers are adapted to catch and swallow large fish.", @@ -4739,6 +5020,7 @@ "$ref": "#/texts/223" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "The others have the characteristic wide flat bill adapted to dredging-type jobs such as pulling up waterweed, pulling worms and small molluscs out of mud, searching for insect larvae, and bulk jobs such as dredging out, holding, turning head first, and swallowing a squirming frog. To avoid injury when digging into sediment it has no cere, but the nostrils come out through hard horn.", @@ -4750,6 +5032,7 @@ "$ref": "#/texts/223" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "The Guardian published an article advising that ducks should not be fed with bread because it damages the health of the ducks and pollutes waterways.[25]", @@ -4768,6 +5051,7 @@ "$ref": "#/texts/234" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Breeding", @@ -4780,6 +5064,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "A Muscovy duckling", @@ -4791,6 +5076,7 @@ "$ref": "#/texts/232" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Ducks generally only have one partner at a time, although the partnership usually only lasts one year.[26] Larger species and the more sedentary species (like fast-river specialists) tend to have pair-bonds that last numerous years.[27] Most duck species breed once a year, choosing to do so in favourable conditions (spring/summer or wet seasons). Ducks also tend to make a nest before breeding, and, after hatching, lead their ducklings to water. Mother ducks are very caring and protective of their young, but may abandon some of their ducklings if they are physically stuck in an area they cannot get out of (such as nesting in an enclosed courtyard) or are not prospering due to genetic defects or sickness brought about by hypothermia, starvation, or disease. Ducklings can also be orphaned by inconsistent late hatching where a few eggs hatch after the mother has abandoned the nest and led her ducklings to water.[28]", @@ -4809,6 +5095,7 @@ "$ref": "#/texts/237" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Communication", @@ -4821,6 +5108,7 @@ "$ref": "#/texts/235" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Female mallard ducks (as well as several other species in the genus Anas, such as the American and Pacific black ducks, spot-billed duck, northern pintail and common teal) make the classic \"quack\" sound while males make a similar but raspier sound that is sometimes written as \"breeeeze\",[29][self-published source?] but, despite widespread misconceptions, most species of duck do not \"quack\".[30] In general, ducks make a range of calls, including whistles, cooing, yodels and grunts. For example, the scaup \u2013 which are diving ducks \u2013 make a noise like \"scaup\" (hence their name). Calls may be loud displaying calls or quieter contact calls.", @@ -4832,6 +5120,7 @@ "$ref": "#/texts/235" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "A common urban legend claims that duck quacks do not echo; however, this has been proven to be false. This myth was first debunked by the Acoustics Research Centre at the University of Salford in 2003 as part of the British Association's Festival of Science.[31] It was also debunked in one of the earlier episodes of the popular Discovery Channel television show MythBusters.[32]", @@ -4853,6 +5142,7 @@ "$ref": "#/texts/241" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Predators", @@ -4865,6 +5155,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Ringed teal", @@ -4876,6 +5167,7 @@ "$ref": "#/texts/238" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Ducks have many predators. Ducklings are particularly vulnerable, since their inability to fly makes them easy prey not only for predatory birds but also for large fish like pike, crocodilians, predatory testudines such as the alligator snapping turtle, and other aquatic hunters, including fish-eating birds such as herons. Ducks' nests are raided by land-based predators, and brooding females may be caught unaware on the nest by mammals, such as foxes, or large birds, such as hawks or owls.", @@ -4887,6 +5179,7 @@ "$ref": "#/texts/238" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Adult ducks are fast fliers, but may be caught on the water by large aquatic predators including big fish such as the North American muskie and the European pike. In flight, ducks are safe from all but a few predators such as humans and the peregrine falcon, which uses its speed and strength to catch ducks.", @@ -4911,6 +5204,7 @@ "$ref": "#/texts/252" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Relationship with humans", @@ -4930,6 +5224,7 @@ "$ref": "#/texts/245" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Hunting", @@ -4942,6 +5237,7 @@ "$ref": "#/texts/243" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Humans have hunted ducks since prehistoric times. Excavations of middens in California dating to 7800 \u2013 6400 BP have turned up bones of ducks, including at least one now-extinct flightless species.[33] Ducks were captured in \"significant numbers\" by Holocene inhabitants of the lower Ohio River valley, suggesting they took advantage of the seasonal bounty provided by migrating waterfowl.[34] Neolithic hunters in locations as far apart as the Caribbean,[35] Scandinavia,[36] Egypt,[37] Switzerland,[38] and China relied on ducks as a source of protein for some or all of the year.[39] Archeological evidence shows that M\u0101ori people in New Zealand hunted the flightless Finsch's duck, possibly to extinction, though rat predation may also have contributed to its fate.[40] A similar end awaited the Chatham duck, a species with reduced flying capabilities which went extinct shortly after its island was colonised by Polynesian settlers.[41] It is probable that duck eggs were gathered by Neolithic hunter-gathers as well, though hard evidence of this is uncommon.[35][42]", @@ -4953,6 +5249,7 @@ "$ref": "#/texts/243" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "In many areas, wild ducks (including ducks farmed and released into the wild) are hunted for food or sport,[43] by shooting, or by being trapped using duck decoys. Because an idle floating duck or a duck squatting on land cannot react to fly or move quickly, \"a sitting duck\" has come to mean \"an easy target\". These ducks may be contaminated by pollutants such as PCBs.[44]", @@ -4971,6 +5268,7 @@ "$ref": "#/texts/248" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Domestication", @@ -4983,6 +5281,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Indian Runner ducks, a common breed of domestic ducks", @@ -4994,6 +5293,7 @@ "$ref": "#/texts/246" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Ducks have many economic uses, being farmed for their meat, eggs, and feathers (particularly their down). Approximately 3 billion ducks are slaughtered each year for meat worldwide.[45] They are also kept and bred by aviculturists and often displayed in zoos. Almost all the varieties of domestic ducks are descended from the mallard (Anas platyrhynchos), apart from the Muscovy duck (Cairina moschata).[46][47] The Call duck is another example of a domestic duck breed. Its name comes from its original use established by hunters, as a decoy to attract wild mallards from the sky, into traps set for them on the ground. The call duck is the world's smallest domestic duck breed, as it weighs less than 1\u00a0kg (2.2\u00a0lb).[48]", @@ -5012,6 +5312,7 @@ "$ref": "#/texts/251" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Heraldry", @@ -5024,6 +5325,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "Three black-colored ducks in the coat of arms of Maaninka[49]", @@ -5035,6 +5337,7 @@ "$ref": "#/texts/249" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Ducks appear on several coats of arms, including the coat of arms of Lub\u0101na (Latvia)[50] and the coat of arms of F\u00f6gl\u00f6 (\u00c5land).[51]", @@ -5053,6 +5356,7 @@ "$ref": "#/texts/254" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Cultural references", @@ -5065,6 +5369,7 @@ "$ref": "#/texts/252" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "In 2002, psychologist Richard Wiseman and colleagues at the University of Hertfordshire, UK, finished a year-long LaughLab experiment, concluding that of all animals, ducks attract the most humor and silliness; he said, \"If you're going to tell a joke involving an animal, make it a duck.\"[52] The word \"duck\" may have become an inherently funny word in many languages, possibly because ducks are seen as silly in their looks or behavior. Of the many ducks in fiction, many are cartoon characters, such as Walt Disney's Donald Duck, and Warner Bros.' Daffy Duck. Howard the Duck started as a comic book character in 1973[53][54] and was made into a movie in 1986.", @@ -5076,6 +5381,7 @@ "$ref": "#/texts/252" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "The 1992 Disney film The Mighty Ducks, starring Emilio Estevez, chose the duck as the mascot for the fictional youth hockey team who are protagonists of the movie, based on the duck being described as a fierce fighter. This led to the duck becoming the nickname and mascot for the eventual National Hockey League professional team of the Anaheim Ducks, who were founded with the name the Mighty Ducks of Anaheim.[citation needed] The duck is also the nickname of the University of Oregon sports teams as well as the Long Island Ducks minor league baseball team.[55]", @@ -5094,6 +5400,7 @@ "$ref": "#/groups/38" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "See also", @@ -5106,6 +5413,7 @@ "$ref": "#/groups/37" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Birds portal", @@ -5119,6 +5427,7 @@ "$ref": "#/groups/38" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Domestic duck", @@ -5132,6 +5441,7 @@ "$ref": "#/groups/38" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Duck as food", @@ -5145,6 +5455,7 @@ "$ref": "#/groups/38" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Duck test", @@ -5158,6 +5469,7 @@ "$ref": "#/groups/38" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Duck breeds", @@ -5171,6 +5483,7 @@ "$ref": "#/groups/38" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Fictional ducks", @@ -5184,6 +5497,7 @@ "$ref": "#/groups/38" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Rubber duck", @@ -5204,6 +5518,7 @@ "$ref": "#/texts/320" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Notes", @@ -5220,6 +5535,7 @@ "$ref": "#/groups/39" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Citations", @@ -5232,6 +5548,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ \"Duckling\". The American Heritage Dictionary of the English Language, Fourth Edition. Houghton Mifflin Company. 2006. Retrieved 2015-05-22.", @@ -5245,6 +5562,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ \"Duckling\". Kernerman English Multilingual Dictionary (Beta Version). K. Dictionaries Ltd. 2000\u20132006. Retrieved 2015-05-22.", @@ -5258,6 +5576,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Dohner, Janet Vorwald (2001). The Encyclopedia of Historic and Endangered Livestock and Poultry Breeds. Yale University Press. ISBN\u00a0978-0300138139.", @@ -5271,6 +5590,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Visca, Curt; Visca, Kelley (2003). How to Draw Cartoon Birds. The Rosen Publishing Group. ISBN\u00a09780823961566.", @@ -5284,6 +5604,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ a b c d Carboneras 1992, p.\u00a0536.", @@ -5297,6 +5618,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Livezey 1986, pp.\u00a0737\u2013738.", @@ -5310,6 +5632,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Madsen, McHugh & de Kloet 1988, p.\u00a0452.", @@ -5323,6 +5646,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Donne-Gouss\u00e9, Laudet & H\u00e4nni 2002, pp.\u00a0353\u2013354.", @@ -5336,6 +5660,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ a b c d e f Carboneras 1992, p.\u00a0540.", @@ -5349,6 +5674,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Elphick, Dunning & Sibley 2001, p.\u00a0191.", @@ -5362,6 +5688,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Kear 2005, p.\u00a0448.", @@ -5375,6 +5702,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Kear 2005, p.\u00a0622\u2013623.", @@ -5388,6 +5716,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Kear 2005, p.\u00a0686.", @@ -5401,6 +5730,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Elphick, Dunning & Sibley 2001, p.\u00a0193.", @@ -5414,6 +5744,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ a b c d e f g Carboneras 1992, p.\u00a0537.", @@ -5427,6 +5758,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ American Ornithologists' Union 1998, p.\u00a0xix.", @@ -5440,6 +5772,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ American Ornithologists' Union 1998.", @@ -5453,6 +5786,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Carboneras 1992, p.\u00a0538.", @@ -5466,6 +5800,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Christidis & Boles 2008, p.\u00a062.", @@ -5479,6 +5814,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Shirihai 2008, pp.\u00a0239, 245.", @@ -5492,6 +5828,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ a b Pratt, Bruner & Berrett 1987, pp.\u00a098\u2013107.", @@ -5505,6 +5842,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Fitter, Fitter & Hosking 2000, pp.\u00a052\u20133.", @@ -5518,6 +5856,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ \"Pacific Black Duck\". www.wiresnr.org. Retrieved 2018-04-27.", @@ -5531,6 +5870,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Ogden, Evans. \"Dabbling Ducks\". CWE. Retrieved 2006-11-02.", @@ -5544,6 +5884,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Karl Mathiesen (16 March 2015). \"Don't feed the ducks bread, say conservationists\". The Guardian. Retrieved 13 November 2016.", @@ -5557,6 +5898,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Rohwer, Frank C.; Anderson, Michael G. (1988). \"Female-Biased Philopatry, Monogamy, and the Timing of Pair Formation in Migratory Waterfowl\". Current Ornithology. pp.\u00a0187\u2013221. doi:10.1007/978-1-4615-6787-5_4. ISBN\u00a0978-1-4615-6789-9.", @@ -5570,6 +5912,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Smith, Cyndi M.; Cooke, Fred; Robertson, Gregory J.; Goudie, R. Ian; Boyd, W. Sean (2000). \"Long-Term Pair Bonds in Harlequin Ducks\". The Condor. 102 (1): 201\u2013205. doi:10.1093/condor/102.1.201. hdl:10315/13797.", @@ -5583,6 +5926,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ \"If You Find An Orphaned Duckling - Wildlife Rehabber\". wildliferehabber.com. Archived from the original on 2018-09-23. Retrieved 2018-12-22.", @@ -5596,6 +5940,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Carver, Heather (2011). The Duck Bible. Lulu.com. ISBN\u00a09780557901562.[self-published source]", @@ -5609,6 +5954,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Titlow, Budd (2013-09-03). Bird Brains: Inside the Strange Minds of Our Fine Feathered Friends. Rowman & Littlefield. ISBN\u00a09780762797707.", @@ -5622,6 +5968,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Amos, Jonathan (2003-09-08). \"Sound science is quackers\". BBC News. Retrieved 2006-11-02.", @@ -5635,6 +5982,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ \"Mythbusters Episode 8\". 12 December 2003.", @@ -5648,6 +5996,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Erlandson 1994, p.\u00a0171.", @@ -5661,6 +6010,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Jeffries 2008, pp.\u00a0168, 243.", @@ -5674,6 +6024,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ a b Sued-Badillo 2003, p.\u00a065.", @@ -5687,6 +6038,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Thorpe 1996, p.\u00a068.", @@ -5700,6 +6052,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Maisels 1999, p.\u00a042.", @@ -5713,6 +6066,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Rau 1876, p.\u00a0133.", @@ -5726,6 +6080,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Higman 2012, p.\u00a023.", @@ -5739,6 +6094,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Hume 2012, p.\u00a053.", @@ -5752,6 +6108,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Hume 2012, p.\u00a052.", @@ -5765,6 +6122,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Fieldhouse 2002, p.\u00a0167.", @@ -5778,6 +6136,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Livingston, A. D. (1998-01-01). Guide to Edible Plants and Animals. Wordsworth Editions, Limited. ISBN\u00a09781853263774.", @@ -5791,6 +6150,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ \"Study plan for waterfowl injury assessment: Determining PCB concentrations in Hudson river resident waterfowl\" (PDF). New York State Department of Environmental Conservation. US Department of Commerce. December 2008. p.\u00a03. Archived (PDF) from the original on 2022-10-09. Retrieved 2 July 2019.", @@ -5804,6 +6164,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ \"FAOSTAT\". www.fao.org. Retrieved 2019-10-25.", @@ -5817,6 +6178,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ \"Anas platyrhynchos, Domestic Duck; DigiMorph Staff - The University of Texas at Austin\". Digimorph.org. Retrieved 2012-12-23.", @@ -5830,6 +6192,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Sy Montgomery. \"Mallard; Encyclop\u00e6dia Britannica\". Britannica.com. Retrieved 2012-12-23.", @@ -5843,6 +6206,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Glenday, Craig (2014). Guinness World Records. Guinness World Records Limited. pp.\u00a0135. ISBN\u00a0978-1-908843-15-9.", @@ -5856,6 +6220,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Suomen kunnallisvaakunat (in Finnish). Suomen Kunnallisliitto. 1982. p.\u00a0147. ISBN\u00a0951-773-085-3.", @@ -5869,6 +6234,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ \"Lub\u0101nas simbolika\" (in Latvian). Retrieved September 9, 2021.", @@ -5882,6 +6248,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ \"F\u00f6gl\u00f6\" (in Swedish). Retrieved September 9, 2021.", @@ -5895,6 +6262,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Young, Emma. \"World's funniest joke revealed\". New Scientist. Retrieved 7 January 2019.", @@ -5908,6 +6276,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ \"Howard the Duck (character)\". Grand Comics Database.", @@ -5921,6 +6290,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ Sanderson, Peter; Gilbert, Laura (2008). \"1970s\". Marvel Chronicle A Year by Year History. London, United Kingdom: Dorling Kindersley. p.\u00a0161. ISBN\u00a0978-0756641238. December saw the debut of the cigar-smoking Howard the Duck. In this story by writer Steve Gerber and artist Val Mayerik, various beings from different realities had begun turning up in the Man-Thing's Florida swamp, including this bad-tempered talking duck.", @@ -5934,6 +6304,7 @@ "$ref": "#/groups/39" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "^ \"The Duck\". University of Oregon Athletics. Retrieved 2022-01-20.", @@ -5951,6 +6322,7 @@ "$ref": "#/groups/40" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Sources", @@ -5963,6 +6335,7 @@ "$ref": "#/groups/40" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "American Ornithologists' Union (1998). Checklist of North American Birds (PDF). Washington, DC: American Ornithologists' Union. ISBN\u00a0978-1-891276-00-2. Archived (PDF) from the original on 2022-10-09.", @@ -5976,6 +6349,7 @@ "$ref": "#/groups/40" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Carboneras, Carlos (1992). del Hoyo, Josep; Elliott, Andrew; Sargatal, Jordi (eds.). Handbook of the Birds of the World. Vol.\u00a01: Ostrich to Ducks. Barcelona: Lynx Edicions. ISBN\u00a0978-84-87334-10-8.", @@ -5989,6 +6363,7 @@ "$ref": "#/groups/40" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Christidis, Les; Boles, Walter E., eds. (2008). Systematics and Taxonomy of Australian Birds. Collingwood, VIC: Csiro Publishing. ISBN\u00a0978-0-643-06511-6.", @@ -6002,6 +6377,7 @@ "$ref": "#/groups/40" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Donne-Gouss\u00e9, Carole; Laudet, Vincent; H\u00e4nni, Catherine (July 2002). \"A molecular phylogeny of Anseriformes based on mitochondrial DNA analysis\". Molecular Phylogenetics and Evolution. 23 (3): 339\u2013356. Bibcode:2002MolPE..23..339D. doi:10.1016/S1055-7903(02)00019-2. PMID\u00a012099792.", @@ -6015,6 +6391,7 @@ "$ref": "#/groups/40" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Elphick, Chris; Dunning, John B. Jr.; Sibley, David, eds. (2001). The Sibley Guide to Bird Life and Behaviour. London: Christopher Helm. ISBN\u00a0978-0-7136-6250-4.", @@ -6028,6 +6405,7 @@ "$ref": "#/groups/40" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Erlandson, Jon M. (1994). Early Hunter-Gatherers of the California Coast. New York, NY: Springer Science & Business Media. ISBN\u00a0978-1-4419-3231-0.", @@ -6041,6 +6419,7 @@ "$ref": "#/groups/40" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Fieldhouse, Paul (2002). Food, Feasts, and Faith: An Encyclopedia of Food Culture in World Religions. Vol.\u00a0I: A\u2013K. Santa Barbara: ABC-CLIO. ISBN\u00a0978-1-61069-412-4.", @@ -6054,6 +6433,7 @@ "$ref": "#/groups/40" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Fitter, Julian; Fitter, Daniel; Hosking, David (2000). Wildlife of the Gal\u00e1pagos. Princeton, NJ: Princeton University Press. ISBN\u00a0978-0-691-10295-5.", @@ -6067,6 +6447,7 @@ "$ref": "#/groups/40" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Higman, B. W. (2012). How Food Made History. Chichester, UK: John Wiley & Sons. ISBN\u00a0978-1-4051-8947-7.", @@ -6080,6 +6461,7 @@ "$ref": "#/groups/40" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Hume, Julian H. (2012). Extinct Birds. London: Christopher Helm. ISBN\u00a0978-1-4729-3744-5.", @@ -6093,6 +6475,7 @@ "$ref": "#/groups/40" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Jeffries, Richard (2008). Holocene Hunter-Gatherers of the Lower Ohio River Valley. Tuscaloosa: University of Alabama Press. ISBN\u00a0978-0-8173-1658-7.", @@ -6106,6 +6489,7 @@ "$ref": "#/groups/40" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Kear, Janet, ed. (2005). Ducks, Geese and Swans: Species Accounts (Cairina to Mergus). Bird Families of the World. Oxford: Oxford University Press. ISBN\u00a0978-0-19-861009-0.", @@ -6119,6 +6503,7 @@ "$ref": "#/groups/40" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Livezey, Bradley C. (October 1986). \"A phylogenetic analysis of recent Anseriform genera using morphological characters\" (PDF). The Auk. 103 (4): 737\u2013754. doi:10.1093/auk/103.4.737. Archived (PDF) from the original on 2022-10-09.", @@ -6132,6 +6517,7 @@ "$ref": "#/groups/40" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Madsen, Cort S.; McHugh, Kevin P.; de Kloet, Siwo R. (July 1988). \"A partial classification of waterfowl (Anatidae) based on single-copy DNA\" (PDF). The Auk. 105 (3): 452\u2013459. doi:10.1093/auk/105.3.452. Archived (PDF) from the original on 2022-10-09.", @@ -6145,6 +6531,7 @@ "$ref": "#/groups/40" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Maisels, Charles Keith (1999). Early Civilizations of the Old World. London: Routledge. ISBN\u00a0978-0-415-10975-8.", @@ -6158,6 +6545,7 @@ "$ref": "#/groups/40" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Pratt, H. Douglas; Bruner, Phillip L.; Berrett, Delwyn G. (1987). A Field Guide to the Birds of Hawaii and the Tropical Pacific. Princeton, NJ: Princeton University Press. ISBN\u00a00-691-02399-9.", @@ -6171,6 +6559,7 @@ "$ref": "#/groups/40" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Rau, Charles (1876). Early Man in Europe. New York: Harper & Brothers. LCCN\u00a005040168.", @@ -6184,6 +6573,7 @@ "$ref": "#/groups/40" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Shirihai, Hadoram (2008). A Complete Guide to Antarctic Wildlife. Princeton, NJ, US: Princeton University Press. ISBN\u00a0978-0-691-13666-0.", @@ -6197,6 +6587,7 @@ "$ref": "#/groups/40" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Sued-Badillo, Jalil (2003). Autochthonous Societies. General History of the Caribbean. Paris: UNESCO. ISBN\u00a0978-92-3-103832-7.", @@ -6210,6 +6601,7 @@ "$ref": "#/groups/40" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Thorpe, I. J. (1996). The Origins of Agriculture in Europe. New York: Routledge. ISBN\u00a0978-0-415-08009-5.", @@ -6254,6 +6646,7 @@ "$ref": "#/groups/48" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "External links", @@ -6266,6 +6659,7 @@ "$ref": "#/groups/41" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Definitions from Wiktionary", @@ -6279,6 +6673,7 @@ "$ref": "#/groups/41" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Media from Commons", @@ -6292,6 +6687,7 @@ "$ref": "#/groups/41" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Quotations from Wikiquote", @@ -6305,6 +6701,7 @@ "$ref": "#/groups/41" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Recipes from Wikibooks", @@ -6318,6 +6715,7 @@ "$ref": "#/groups/41" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Taxa from Wikispecies", @@ -6331,6 +6729,7 @@ "$ref": "#/groups/41" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Data from Wikidata", @@ -6344,6 +6743,7 @@ "$ref": "#/groups/42" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "list of books (useful looking abstracts)", @@ -6357,6 +6757,7 @@ "$ref": "#/groups/42" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Ducks on postage stamps Archived 2013-05-13 at the Wayback Machine", @@ -6370,6 +6771,7 @@ "$ref": "#/groups/42" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "", @@ -6383,6 +6785,7 @@ "$ref": "#/groups/42" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Ducks at a Distance, by Rob Hines at Project Gutenberg - A modern illustrated guide to identification of US waterfowl", @@ -6396,6 +6799,7 @@ "$ref": "#/groups/43" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Ducks", @@ -6409,6 +6813,7 @@ "$ref": "#/groups/43" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Game birds", @@ -6422,6 +6827,7 @@ "$ref": "#/groups/43" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Bird common names", @@ -6435,6 +6841,7 @@ "$ref": "#/groups/44" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "All accuracy disputes", @@ -6448,6 +6855,7 @@ "$ref": "#/groups/44" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Accuracy disputes from February 2020", @@ -6461,6 +6869,7 @@ "$ref": "#/groups/44" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "CS1 Finnish-language sources (fi)", @@ -6474,6 +6883,7 @@ "$ref": "#/groups/44" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "CS1 Latvian-language sources (lv)", @@ -6487,6 +6897,7 @@ "$ref": "#/groups/44" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "CS1 Swedish-language sources (sv)", @@ -6500,6 +6911,7 @@ "$ref": "#/groups/44" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Articles with short description", @@ -6513,6 +6925,7 @@ "$ref": "#/groups/44" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Short description is different from Wikidata", @@ -6526,6 +6939,7 @@ "$ref": "#/groups/44" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Wikipedia indefinitely move-protected pages", @@ -6539,6 +6953,7 @@ "$ref": "#/groups/44" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Wikipedia indefinitely semi-protected pages", @@ -6552,6 +6967,7 @@ "$ref": "#/groups/44" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Articles with 'species' microformats", @@ -6565,6 +6981,7 @@ "$ref": "#/groups/44" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Articles containing Old English (ca. 450-1100)-language text", @@ -6578,6 +6995,7 @@ "$ref": "#/groups/44" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Articles containing Dutch-language text", @@ -6591,6 +7009,7 @@ "$ref": "#/groups/44" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Articles containing German-language text", @@ -6604,6 +7023,7 @@ "$ref": "#/groups/44" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Articles containing Norwegian-language text", @@ -6617,6 +7037,7 @@ "$ref": "#/groups/44" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Articles containing Lithuanian-language text", @@ -6630,6 +7051,7 @@ "$ref": "#/groups/44" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Articles containing Ancient Greek (to 1453)-language text", @@ -6643,6 +7065,7 @@ "$ref": "#/groups/44" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "All articles with self-published sources", @@ -6656,6 +7079,7 @@ "$ref": "#/groups/44" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Articles with self-published sources from February 2020", @@ -6669,6 +7093,7 @@ "$ref": "#/groups/44" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "All articles with unsourced statements", @@ -6682,6 +7107,7 @@ "$ref": "#/groups/44" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Articles with unsourced statements from January 2022", @@ -6695,6 +7121,7 @@ "$ref": "#/groups/44" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "CS1: long volume value", @@ -6708,6 +7135,7 @@ "$ref": "#/groups/44" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Pages using Sister project links with wikidata mismatch", @@ -6721,6 +7149,7 @@ "$ref": "#/groups/44" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Pages using Sister project links with hidden wikidata", @@ -6734,6 +7163,7 @@ "$ref": "#/groups/44" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Webarchive template wayback links", @@ -6747,6 +7177,7 @@ "$ref": "#/groups/44" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Articles with Project Gutenberg links", @@ -6760,6 +7191,7 @@ "$ref": "#/groups/44" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Articles containing video clips", @@ -6773,6 +7205,7 @@ "$ref": "#/groups/45" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "This page was last edited on 21 September 2024, at 12:11\u00a0(UTC).", @@ -6786,6 +7219,7 @@ "$ref": "#/groups/45" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Text is available under the Creative Commons Attribution-ShareAlike License 4.0;\nadditional terms may apply. By using this site, you agree to the Terms of Use and Privacy Policy. Wikipedia\u00ae is a registered trademark of the Wikimedia Foundation, Inc., a non-profit organization.", @@ -6799,6 +7233,7 @@ "$ref": "#/groups/46" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Privacy policy", @@ -6812,6 +7247,7 @@ "$ref": "#/groups/46" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "About Wikipedia", @@ -6825,6 +7261,7 @@ "$ref": "#/groups/46" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Disclaimers", @@ -6838,6 +7275,7 @@ "$ref": "#/groups/46" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Contact Wikipedia", @@ -6851,6 +7289,7 @@ "$ref": "#/groups/46" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Code of Conduct", @@ -6864,6 +7303,7 @@ "$ref": "#/groups/46" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Developers", @@ -6877,6 +7317,7 @@ "$ref": "#/groups/46" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Statistics", @@ -6890,6 +7331,7 @@ "$ref": "#/groups/46" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Cookie statement", @@ -6903,6 +7345,7 @@ "$ref": "#/groups/46" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Mobile view", @@ -6916,6 +7359,7 @@ "$ref": "#/groups/47" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "", @@ -6929,6 +7373,7 @@ "$ref": "#/groups/47" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "", @@ -6944,6 +7389,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [], @@ -6957,6 +7403,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [], @@ -6970,6 +7417,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [], @@ -6983,6 +7431,7 @@ "$ref": "#/texts/39" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [], @@ -6996,6 +7445,7 @@ "$ref": "#/texts/200" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -7013,6 +7463,7 @@ "$ref": "#/texts/200" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -7030,6 +7481,7 @@ "$ref": "#/texts/200" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -7047,6 +7499,7 @@ "$ref": "#/texts/208" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -7064,6 +7517,7 @@ "$ref": "#/texts/213" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -7081,6 +7535,7 @@ "$ref": "#/texts/217" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -7098,6 +7553,7 @@ "$ref": "#/texts/217" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -7115,6 +7571,7 @@ "$ref": "#/texts/223" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -7132,6 +7589,7 @@ "$ref": "#/texts/223" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -7149,6 +7607,7 @@ "$ref": "#/texts/232" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -7166,6 +7625,7 @@ "$ref": "#/texts/238" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -7183,6 +7643,7 @@ "$ref": "#/texts/246" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -7200,6 +7661,7 @@ "$ref": "#/texts/249" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -7217,6 +7679,7 @@ "$ref": "#/texts/341" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [], @@ -7232,6 +7695,7 @@ "$ref": "#/texts/39" }, "children": [], + "content_layer": "body", "label": "table", "prov": [], "captions": [], @@ -7830,6 +8294,7 @@ "$ref": "#/texts/341" }, "children": [], + "content_layer": "body", "label": "table", "prov": [], "captions": [], diff --git a/tests/data/groundtruth/docling_v2/word_sample.docx.json b/tests/data/groundtruth/docling_v2/word_sample.docx.json index 8c6e6298..115182a3 100644 --- a/tests/data/groundtruth/docling_v2/word_sample.docx.json +++ b/tests/data/groundtruth/docling_v2/word_sample.docx.json @@ -10,6 +10,7 @@ "furniture": { "self_ref": "#/furniture", "children": [], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -23,6 +24,7 @@ "$ref": "#/texts/1" } ], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -43,6 +45,7 @@ "$ref": "#/texts/8" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -62,6 +65,7 @@ "$ref": "#/texts/12" } ], + "content_layer": "body", "name": "list", "label": "list" }, @@ -81,6 +85,7 @@ "$ref": "#/texts/22" } ], + "content_layer": "body", "name": "list", "label": "list" } @@ -92,6 +97,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Summer activities", @@ -116,6 +122,7 @@ "$ref": "#/texts/4" } ], + "content_layer": "body", "label": "title", "prov": [], "orig": "Swimming in the lake", @@ -127,6 +134,7 @@ "$ref": "#/texts/1" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Duck", @@ -138,6 +146,7 @@ "$ref": "#/texts/1" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Figure 1: This is a cute duckling", @@ -168,6 +177,7 @@ "$ref": "#/texts/14" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Let\u2019s swim!", @@ -180,6 +190,7 @@ "$ref": "#/texts/4" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "To get started with swimming, first lay down in a water and try not to drown:", @@ -191,6 +202,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "You can relax and look around", @@ -204,6 +216,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Paddle about", @@ -217,6 +230,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Enjoy summer warmth", @@ -230,6 +244,7 @@ "$ref": "#/texts/4" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Also, don\u2019t forget:", @@ -241,6 +256,7 @@ "$ref": "#/groups/1" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Wear sunglasses", @@ -254,6 +270,7 @@ "$ref": "#/groups/1" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Don\u2019t forget to drink water", @@ -267,6 +284,7 @@ "$ref": "#/groups/1" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Use sun cream", @@ -280,6 +298,7 @@ "$ref": "#/texts/4" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Hmm, what else\u2026", @@ -313,6 +332,7 @@ "$ref": "#/groups/2" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Let\u2019s eat", @@ -325,6 +345,7 @@ "$ref": "#/texts/14" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "After we had a good day of swimming in the lake, it\u2019s important to eat something nice", @@ -336,6 +357,7 @@ "$ref": "#/texts/14" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "I like to eat leaves", @@ -347,6 +369,7 @@ "$ref": "#/texts/14" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "Here are some interesting things a respectful duck could eat:", @@ -358,6 +381,7 @@ "$ref": "#/texts/14" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -369,6 +393,7 @@ "$ref": "#/texts/14" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "And let\u2019s add another list in the end:", @@ -380,6 +405,7 @@ "$ref": "#/groups/2" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Leaves", @@ -393,6 +419,7 @@ "$ref": "#/groups/2" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Berries", @@ -406,6 +433,7 @@ "$ref": "#/groups/2" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "Grain", @@ -421,6 +449,7 @@ "$ref": "#/texts/1" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [], @@ -445,6 +474,7 @@ "$ref": "#/texts/14" }, "children": [], + "content_layer": "body", "label": "table", "prov": [], "captions": [], diff --git a/tests/data/groundtruth/docling_v2/word_tables.docx.json b/tests/data/groundtruth/docling_v2/word_tables.docx.json index 957a83c8..abb913ed 100644 --- a/tests/data/groundtruth/docling_v2/word_tables.docx.json +++ b/tests/data/groundtruth/docling_v2/word_tables.docx.json @@ -10,6 +10,7 @@ "furniture": { "self_ref": "#/furniture", "children": [], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -20,6 +21,7 @@ "$ref": "#/groups/0" } ], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -34,6 +36,7 @@ "$ref": "#/texts/0" } ], + "content_layer": "body", "name": "header-0", "label": "section" } @@ -94,6 +97,7 @@ "$ref": "#/texts/11" } ], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "Test with tables", @@ -106,6 +110,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "A uniform table", @@ -117,6 +122,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -128,6 +134,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "A non-uniform table with horizontal spans", @@ -139,6 +146,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -150,6 +158,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "A non-uniform table with horizontal spans in inner columns", @@ -161,6 +170,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -172,6 +182,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "A non-uniform table with vertical spans", @@ -183,6 +194,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -194,6 +206,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "A non-uniform table with all kinds of spans and empty cells", @@ -205,6 +218,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -216,6 +230,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [], "orig": "", @@ -230,6 +245,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "table", "prov": [], "captions": [], @@ -472,6 +488,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "table", "prov": [], "captions": [], @@ -690,6 +707,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "table", "prov": [], "captions": [], @@ -980,6 +998,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "table", "prov": [], "captions": [], @@ -1346,6 +1365,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "table", "prov": [], "captions": [], diff --git a/tests/test_backend_patent_uspto.py b/tests/test_backend_patent_uspto.py index 21bc88c5..0e95a4d6 100644 --- a/tests/test_backend_patent_uspto.py +++ b/tests/test_backend_patent_uspto.py @@ -14,7 +14,7 @@ from docling.backend.xml.uspto_backend import PatentUsptoDocumentBackend, XmlTab from docling.datamodel.base_models import InputFormat from docling.datamodel.document import InputDocument -GENERATE: bool = True +GENERATE: bool = False DATA_PATH: Path = Path("./tests/data/uspto/") GT_PATH: Path = Path("./tests/data/groundtruth/docling_v2/")