diff --git a/docling/backend/html_backend.py b/docling/backend/html_backend.py index 9913ecdf..9dc77b83 100644 --- a/docling/backend/html_backend.py +++ b/docling/backend/html_backend.py @@ -67,7 +67,8 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend): self.soup = BeautifulSoup(html_content, "html.parser") except Exception as e: raise RuntimeError( - f"Could not initialize HTML backend for file with hash {self.document_hash}." + "Could not initialize HTML backend for file with " + f"hash {self.document_hash}." ) from e @override @@ -104,18 +105,22 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend): _log.debug("Trying to convert HTML...") if self.is_valid(): - self.content_layer = ContentLayer.FURNITURE - assert self.soup is not None content = self.soup.body or self.soup # Replace
tags with newline characters # TODO: remove style to avoid losing text from tags like i, b, span, ... for br in content("br"): br.replace_with(NavigableString("\n")) + + headers = content.find(["h1", "h2", "h3", "h4", "h5", "h6"]) + self.content_layer = ( + ContentLayer.BODY if headers is None else ContentLayer.FURNITURE + ) self.walk(content, doc) else: raise RuntimeError( - f"Cannot convert doc with {self.document_hash} because the backend failed to init." + f"Cannot convert doc with {self.document_hash} because the backend " + "failed to init." ) return doc @@ -147,7 +152,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend): if text and tag.name in ["div"]: doc.add_text( parent=self.parents[self.level], - label=DocItemLabel.TEXT, + label=DocItemLabel.PARAGRAPH, text=text, content_layer=self.content_layer, ) @@ -259,11 +264,10 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend): if element.text is None: return text = element.text.strip() - label = DocItemLabel.TEXT if text: doc.add_text( parent=self.parents[self.level], - label=label, + label=DocItemLabel.PARAGRAPH, text=text, content_layer=self.content_layer, ) @@ -533,7 +537,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend): def handle_image(self, element: Tag, doc: DoclingDocument) -> None: """Handles image tags (img).""" - _log.warning(f"ignoring tags at the moment: {element}") + _log.debug(f"ignoring tags at the moment: {element}") doc.add_picture( parent=self.parents[self.level], diff --git a/tests/data/groundtruth/docling_v2/wiki_duck.html.itxt b/tests/data/groundtruth/docling_v2/wiki_duck.html.itxt index 4e22c090..d2b74885 100644 --- a/tests/data/groundtruth/docling_v2/wiki_duck.html.itxt +++ b/tests/data/groundtruth/docling_v2/wiki_duck.html.itxt @@ -1,491 +1,416 @@ item-0 at level 0: unspecified: group _root_ - item-1 at level 1: paragraph: Main menu - item-2 at level 1: paragraph: Navigation - item-3 at level 1: list: group list - item-4 at level 2: list_item: Main page - item-5 at level 2: list_item: Contents - item-6 at level 2: list_item: Current events - item-7 at level 2: list_item: Random article - item-8 at level 2: list_item: About Wikipedia - item-9 at level 2: list_item: Contact us - item-10 at level 1: paragraph: Contribute - item-11 at level 1: list: group list - item-12 at level 2: list_item: Help - item-13 at level 2: list_item: Learn to edit - item-14 at level 2: list_item: Community portal - item-15 at level 2: list_item: Recent changes - item-16 at level 2: list_item: Upload file - item-17 at level 1: picture - item-18 at level 1: picture - item-19 at level 1: picture - item-20 at level 1: list: group list - item-21 at level 1: list: group list - item-22 at level 2: list_item: Donate - item-23 at level 1: list: group list - item-24 at level 1: list: group list - item-25 at level 2: list_item: Create account - item-26 at level 2: list_item: Log in - item-27 at level 1: list: group list - item-28 at level 2: list_item: Create account - item-29 at level 2: list_item: Log in - item-30 at level 1: paragraph: Pages for logged out editors - item-31 at level 1: list: group list - item-32 at level 2: list_item: Contributions - item-33 at level 2: list_item: Talk - item-34 at level 1: section: group header-1 - item-35 at level 2: section_header: Contents - item-36 at level 3: list: group list - item-37 at level 4: list_item: (Top) - item-38 at level 4: list_item: 1 Etymology - item-39 at level 5: list: group list - item-40 at level 4: list_item: 2 Taxonomy - item-41 at level 5: list: group list - item-42 at level 4: list_item: 3 Morphology - item-43 at level 5: list: group list - item-44 at level 4: list_item: 4 Distribution and habitat - item-45 at level 5: list: group list - item-46 at level 4: list_item: 5 Behaviour Toggle Behaviour subsection - item-47 at level 5: list: group list - item-48 at level 6: list_item: 5.1 Feeding - item-49 at level 7: list: group list - item-50 at level 6: list_item: 5.2 Breeding - item-51 at level 7: list: group list - item-52 at level 6: list_item: 5.3 Communication - item-53 at level 7: list: group list - item-54 at level 6: list_item: 5.4 Predators - item-55 at level 7: list: group list - item-56 at level 4: list_item: 6 Relationship with humans Toggle Relationship with humans subsection - item-57 at level 5: list: group list - item-58 at level 6: list_item: 6.1 Hunting - item-59 at level 7: list: group list - item-60 at level 6: list_item: 6.2 Domestication - item-61 at level 7: list: group list - item-62 at level 6: list_item: 6.3 Heraldry - item-63 at level 7: list: group list - item-64 at level 6: list_item: 6.4 Cultural references - item-65 at level 7: list: group list - item-66 at level 4: list_item: 7 See also - item-67 at level 5: list: group list - item-68 at level 4: list_item: 8 Notes Toggle Notes subsection - item-69 at level 5: list: group list - item-70 at level 6: list_item: 8.1 Citations - item-71 at level 7: list: group list - item-72 at level 6: list_item: 8.2 Sources - item-73 at level 7: list: group list - item-74 at level 4: list_item: 9 External links - item-75 at level 5: list: group list - item-76 at level 1: title: Duck - item-77 at level 2: list: group list - item-78 at level 3: list_item: Acèh - item-79 at level 3: list_item: Afrikaans - item-80 at level 3: list_item: Alemannisch - item-81 at level 3: list_item: አማርኛ - item-82 at level 3: list_item: Ænglisc - item-83 at level 3: list_item: العربية - item-84 at level 3: list_item: Aragonés - item-85 at level 3: list_item: ܐܪܡܝܐ - item-86 at level 3: list_item: Armãneashti - item-87 at level 3: list_item: Asturianu - item-88 at level 3: list_item: Atikamekw - item-89 at level 3: list_item: Авар - item-90 at level 3: list_item: Aymar aru - item-91 at level 3: list_item: تۆرکجه - item-92 at level 3: list_item: Basa Bali - item-93 at level 3: list_item: বাংলা - item-94 at level 3: list_item: 閩南語 / Bân-lâm-gú - item-95 at level 3: list_item: Беларуская - item-96 at level 3: list_item: Беларуская (тарашкевіца) - item-97 at level 3: list_item: Bikol Central - item-98 at level 3: list_item: Български - item-99 at level 3: list_item: Brezhoneg - item-100 at level 3: list_item: Буряад - item-101 at level 3: list_item: Català - item-102 at level 3: list_item: Чӑвашла - item-103 at level 3: list_item: Čeština - item-104 at level 3: list_item: ChiShona - item-105 at level 3: list_item: Cymraeg - item-106 at level 3: list_item: Dagbanli - item-107 at level 3: list_item: Dansk - item-108 at level 3: list_item: Deitsch - item-109 at level 3: list_item: Deutsch - item-110 at level 3: list_item: डोटेली - item-111 at level 3: list_item: Ελληνικά - item-112 at level 3: list_item: Emiliàn e rumagnòl - item-113 at level 3: list_item: Español - item-114 at level 3: list_item: Esperanto - item-115 at level 3: list_item: Euskara - item-116 at level 3: list_item: فارسی - item-117 at level 3: list_item: Français - item-118 at level 3: list_item: Gaeilge - item-119 at level 3: list_item: Galego - item-120 at level 3: list_item: ГӀалгӀай - item-121 at level 3: list_item: 贛語 - item-122 at level 3: list_item: گیلکی - item-123 at level 3: list_item: 𐌲𐌿𐍄𐌹𐍃𐌺 - item-124 at level 3: list_item: गोंयची कोंकणी / Gõychi Konknni - item-125 at level 3: list_item: 客家語 / Hak-kâ-ngî - item-126 at level 3: list_item: 한국어 - item-127 at level 3: list_item: Hausa - item-128 at level 3: list_item: Հայերեն - item-129 at level 3: list_item: हिन्दी - item-130 at level 3: list_item: Hrvatski - item-131 at level 3: list_item: Ido - item-132 at level 3: list_item: Bahasa Indonesia - item-133 at level 3: list_item: Iñupiatun - item-134 at level 3: list_item: Íslenska - item-135 at level 3: list_item: Italiano - item-136 at level 3: list_item: עברית - item-137 at level 3: list_item: Jawa - item-138 at level 3: list_item: ಕನ್ನಡ - item-139 at level 3: list_item: Kapampangan - item-140 at level 3: list_item: ქართული - item-141 at level 3: list_item: कॉशुर / کٲشُر - item-142 at level 3: list_item: Қазақша - item-143 at level 3: list_item: Ikirundi - item-144 at level 3: list_item: Kongo - item-145 at level 3: list_item: Kreyòl ayisyen - item-146 at level 3: list_item: Кырык мары - item-147 at level 3: list_item: ລາວ - item-148 at level 3: list_item: Latina - item-149 at level 3: list_item: Latviešu - item-150 at level 3: list_item: Lietuvių - item-151 at level 3: list_item: Li Niha - item-152 at level 3: list_item: Ligure - item-153 at level 3: list_item: Limburgs - item-154 at level 3: list_item: Lingála - item-155 at level 3: list_item: Malagasy - item-156 at level 3: list_item: മലയാളം - item-157 at level 3: list_item: मराठी - item-158 at level 3: list_item: مازِرونی - item-159 at level 3: list_item: Bahasa Melayu - item-160 at level 3: list_item: ꯃꯤꯇꯩ ꯂꯣꯟ - item-161 at level 3: list_item: 閩東語 / Mìng-dĕ̤ng-ngṳ̄ - item-162 at level 3: list_item: Мокшень - item-163 at level 3: list_item: Монгол - item-164 at level 3: list_item: မြန်မာဘာသာ - item-165 at level 3: list_item: Nederlands - item-166 at level 3: list_item: Nedersaksies - item-167 at level 3: list_item: नेपाली - item-168 at level 3: list_item: नेपाल भाषा - item-169 at level 3: list_item: 日本語 - item-170 at level 3: list_item: Нохчийн - item-171 at level 3: list_item: Norsk nynorsk - item-172 at level 3: list_item: Occitan - item-173 at level 3: list_item: Oromoo - item-174 at level 3: list_item: ਪੰਜਾਬੀ - item-175 at level 3: list_item: Picard - item-176 at level 3: list_item: Plattdüütsch - item-177 at level 3: list_item: Polski - item-178 at level 3: list_item: Português - item-179 at level 3: list_item: Qırımtatarca - item-180 at level 3: list_item: Română - item-181 at level 3: list_item: Русский - item-182 at level 3: list_item: Саха тыла - item-183 at level 3: list_item: ᱥᱟᱱᱛᱟᱲᱤ - item-184 at level 3: list_item: Sardu - item-185 at level 3: list_item: Scots - item-186 at level 3: list_item: Seeltersk - item-187 at level 3: list_item: Shqip - item-188 at level 3: list_item: Sicilianu - item-189 at level 3: list_item: සිංහල - item-190 at level 3: list_item: Simple English - item-191 at level 3: list_item: سنڌي - item-192 at level 3: list_item: کوردی - item-193 at level 3: list_item: Српски / srpski - item-194 at level 3: list_item: Srpskohrvatski / српскохрватски - item-195 at level 3: list_item: Sunda - item-196 at level 3: list_item: Svenska - item-197 at level 3: list_item: Tagalog - item-198 at level 3: list_item: தமிழ் - item-199 at level 3: list_item: Taqbaylit - item-200 at level 3: list_item: Татарча / tatarça - item-201 at level 3: list_item: ไทย - item-202 at level 3: list_item: Türkçe - item-203 at level 3: list_item: Українська - item-204 at level 3: list_item: ئۇيغۇرچە / Uyghurche - item-205 at level 3: list_item: Vahcuengh - item-206 at level 3: list_item: Tiếng Việt - item-207 at level 3: list_item: Walon - item-208 at level 3: list_item: 文言 - item-209 at level 3: list_item: Winaray - item-210 at level 3: list_item: 吴语 - item-211 at level 3: list_item: 粵語 - item-212 at level 3: list_item: Žemaitėška - item-213 at level 3: list_item: 中文 - item-214 at level 2: list: group list - item-215 at level 3: list_item: Article - item-216 at level 3: list_item: Talk - item-217 at level 2: list: group list - item-218 at level 2: list: group list - item-219 at level 3: list_item: Read - item-220 at level 3: list_item: View source - item-221 at level 3: list_item: View history - item-222 at level 2: paragraph: Tools - item-223 at level 2: paragraph: Actions - item-224 at level 2: list: group list - item-225 at level 3: list_item: Read - item-226 at level 3: list_item: View source - item-227 at level 3: list_item: View history - item-228 at level 2: paragraph: General - item-229 at level 2: list: group list - item-230 at level 3: list_item: What links here - item-231 at level 3: list_item: Related changes - item-232 at level 3: list_item: Upload file - item-233 at level 3: list_item: Special pages - item-234 at level 3: list_item: Permanent link - item-235 at level 3: list_item: Page information - item-236 at level 3: list_item: Cite this page - item-237 at level 3: list_item: Get shortened URL - item-238 at level 3: list_item: Download QR code - item-239 at level 3: list_item: Wikidata item - item-240 at level 2: paragraph: Print/export - item-241 at level 2: list: group list - item-242 at level 3: list_item: Download as PDF - item-243 at level 3: list_item: Printable version - item-244 at level 2: paragraph: In other projects - item-245 at level 2: list: group list - item-246 at level 3: list_item: Wikimedia Commons - item-247 at level 3: list_item: Wikiquote - item-248 at level 2: paragraph: Appearance - item-249 at level 2: picture - item-250 at level 2: paragraph: From Wikipedia, the free encyclopedia - item-251 at level 2: paragraph: Common name for many species of bird - item-252 at level 2: paragraph: This article is about the bird. ... as a food, see . For other uses, see . - item-253 at level 2: paragraph: "Duckling" redirects here. For other uses, see . - item-254 at level 2: table with [13x2] - item-255 at level 2: paragraph: Duck is the common name for nume ... und in both fresh water and sea water. - item-256 at level 2: paragraph: Ducks are sometimes confused wit ... divers, grebes, gallinules and coots. - item-257 at level 2: section_header: Etymology - item-258 at level 3: paragraph: The word duck comes from Old Eng ... h duiken and German tauchen 'to dive'. - item-259 at level 3: picture - item-259 at level 4: caption: Pacific black duck displaying the characteristic upending "duck" - item-260 at level 3: paragraph: This word replaced Old English e ... nskrit ātí 'water bird', among others. - item-261 at level 3: paragraph: A duckling is a young duck in do ... , is sometimes labelled as a duckling. - item-262 at level 3: paragraph: A male is called a drake and the ... a duck, or in ornithology a hen.[3][4] - item-263 at level 3: picture - item-263 at level 4: caption: Male mallard. - item-264 at level 3: picture - item-264 at level 4: caption: Wood ducks. - item-265 at level 2: section_header: Taxonomy - item-266 at level 3: paragraph: All ducks belong to the biologic ... ationships between various species.[9] - item-267 at level 3: picture - item-267 at level 4: caption: Mallard landing in approach - item-268 at level 3: paragraph: In most modern classifications, ... all size and stiff, upright tails.[14] - item-269 at level 3: paragraph: A number of other species called ... shelducks in the tribe Tadornini.[15] - item-270 at level 2: section_header: Morphology - item-271 at level 3: picture - item-271 at level 4: caption: Male Mandarin duck - item-272 at level 3: paragraph: The overall body plan of ducks i ... is moult typically precedes migration. - item-273 at level 3: paragraph: The drakes of northern species o ... rkscrew shaped vagina to prevent rape. - item-274 at level 2: section_header: Distribution and habitat - item-275 at level 3: picture - item-275 at level 4: caption: Flying steamer ducks in Ushuaia, Argentina - item-276 at level 3: paragraph: Ducks have a cosmopolitan distri ... endemic to such far-flung islands.[21] - item-277 at level 3: picture - item-277 at level 4: caption: Female mallard in Cornwall, England - item-278 at level 3: paragraph: Some duck species, mainly those ... t form after localised heavy rain.[23] - item-279 at level 2: section_header: Behaviour - item-280 at level 3: section_header: Feeding - item-281 at level 4: picture - item-281 at level 5: caption: Pecten along the bill - item-282 at level 4: picture - item-282 at level 5: caption: Mallard duckling preening - item-283 at level 4: paragraph: Ducks eat food sources such as g ... amphibians, worms, and small molluscs. - item-284 at level 4: paragraph: Dabbling ducks feed on the surfa ... thers and to hold slippery food items. - item-285 at level 4: paragraph: Diving ducks and sea ducks forag ... ave more difficulty taking off to fly. - item-286 at level 4: paragraph: A few specialized species such a ... apted to catch and swallow large fish. - item-287 at level 4: paragraph: The others have the characterist ... e nostrils come out through hard horn. - item-288 at level 4: paragraph: The Guardian published an articl ... the ducks and pollutes waterways.[25] - item-289 at level 3: section_header: Breeding - item-290 at level 4: picture - item-290 at level 5: caption: A Muscovy duckling - item-291 at level 4: paragraph: Ducks generally only have one pa ... st and led her ducklings to water.[28] - item-292 at level 3: section_header: Communication - item-293 at level 4: paragraph: Female mallard ducks (as well as ... laying calls or quieter contact calls. - item-294 at level 4: paragraph: A common urban legend claims tha ... annel television show MythBusters.[32] - item-295 at level 3: section_header: Predators - item-296 at level 4: picture - item-296 at level 5: caption: Ringed teal - item-297 at level 4: paragraph: Ducks have many predators. Duckl ... or large birds, such as hawks or owls. - item-298 at level 4: paragraph: Adult ducks are fast fliers, but ... its speed and strength to catch ducks. - item-299 at level 2: section_header: Relationship with humans - item-300 at level 3: section_header: Hunting - item-301 at level 4: paragraph: Humans have hunted ducks since p ... evidence of this is uncommon.[35][42] - item-302 at level 4: paragraph: In many areas, wild ducks (inclu ... inated by pollutants such as PCBs.[44] - item-303 at level 3: section_header: Domestication - item-304 at level 4: picture - item-304 at level 5: caption: Indian Runner ducks, a common breed of domestic ducks - item-305 at level 4: paragraph: Ducks have many economic uses, b ... it weighs less than 1 kg (2.2 lb).[48] - item-306 at level 3: section_header: Heraldry - item-307 at level 4: picture - item-307 at level 5: caption: Three black-colored ducks in the coat of arms of Maaninka[49] - item-308 at level 4: paragraph: Ducks appear on several coats of ... the coat of arms of Föglö (Åland).[51] - item-309 at level 3: section_header: Cultural references - item-310 at level 4: paragraph: In 2002, psychologist Richard Wi ... 54] and was made into a movie in 1986. - item-311 at level 4: paragraph: The 1992 Disney film The Mighty ... Ducks minor league baseball team.[55] - item-312 at level 2: section_header: See also - item-313 at level 3: list: group list - item-314 at level 4: list_item: Birds portal - item-315 at level 3: list: group list - item-316 at level 4: list_item: Domestic duck - item-317 at level 4: list_item: Duck as food - item-318 at level 4: list_item: Duck test - item-319 at level 4: list_item: Duck breeds - item-320 at level 4: list_item: Fictional ducks - item-321 at level 4: list_item: Rubber duck - item-322 at level 2: section_header: Notes - item-323 at level 3: section_header: Citations - item-324 at level 4: ordered_list: group ordered list - item-325 at level 5: list_item: ^ "Duckling". The American Herit ... n Company. 2006. Retrieved 2015-05-22. - item-326 at level 5: list_item: ^ "Duckling". Kernerman English ... Ltd. 2000–2006. Retrieved 2015-05-22. - item-327 at level 5: list_item: ^ Dohner, Janet Vorwald (2001). ... University Press. ISBN 978-0300138139. - item-328 at level 5: list_item: ^ Visca, Curt; Visca, Kelley (20 ... Publishing Group. ISBN 9780823961566. - item-329 at level 5: list_item: ^ a b c d Carboneras 1992, p. 536. - item-330 at level 5: list_item: ^ Livezey 1986, pp. 737–738. - item-331 at level 5: list_item: ^ Madsen, McHugh & de Kloet 1988, p. 452. - item-332 at level 5: list_item: ^ Donne-Goussé, Laudet & Hänni 2002, pp. 353–354. - item-333 at level 5: list_item: ^ a b c d e f Carboneras 1992, p. 540. - item-334 at level 5: list_item: ^ Elphick, Dunning & Sibley 2001, p. 191. - item-335 at level 5: list_item: ^ Kear 2005, p. 448. - item-336 at level 5: list_item: ^ Kear 2005, p. 622–623. - item-337 at level 5: list_item: ^ Kear 2005, p. 686. - item-338 at level 5: list_item: ^ Elphick, Dunning & Sibley 2001, p. 193. - item-339 at level 5: list_item: ^ a b c d e f g Carboneras 1992, p. 537. - item-340 at level 5: list_item: ^ American Ornithologists' Union 1998, p. xix. - item-341 at level 5: list_item: ^ American Ornithologists' Union 1998. - item-342 at level 5: list_item: ^ Carboneras 1992, p. 538. - item-343 at level 5: list_item: ^ Christidis & Boles 2008, p. 62. - item-344 at level 5: list_item: ^ Shirihai 2008, pp. 239, 245. - item-345 at level 5: list_item: ^ a b Pratt, Bruner & Berrett 1987, pp. 98–107. - item-346 at level 5: list_item: ^ Fitter, Fitter & Hosking 2000, pp. 52–3. - item-347 at level 5: list_item: ^ "Pacific Black Duck". www.wiresnr.org. Retrieved 2018-04-27. - item-348 at level 5: list_item: ^ Ogden, Evans. "Dabbling Ducks". CWE. Retrieved 2006-11-02. - item-349 at level 5: list_item: ^ Karl Mathiesen (16 March 2015) ... Guardian. Retrieved 13 November 2016. - item-350 at level 5: list_item: ^ Rohwer, Frank C.; Anderson, Mi ... 4615-6787-5_4. ISBN 978-1-4615-6789-9. - item-351 at level 5: list_item: ^ Smith, Cyndi M.; Cooke, Fred; ... 093/condor/102.1.201. hdl:10315/13797. - item-352 at level 5: list_item: ^ "If You Find An Orphaned Duckl ... l on 2018-09-23. Retrieved 2018-12-22. - item-353 at level 5: list_item: ^ Carver, Heather (2011). The Du ...  9780557901562.[self-published source] - item-354 at level 5: list_item: ^ Titlow, Budd (2013-09-03). Bir ... man & Littlefield. ISBN 9780762797707. - item-355 at level 5: list_item: ^ Amos, Jonathan (2003-09-08). " ... kers". BBC News. Retrieved 2006-11-02. - item-356 at level 5: list_item: ^ "Mythbusters Episode 8". 12 December 2003. - item-357 at level 5: list_item: ^ Erlandson 1994, p. 171. - item-358 at level 5: list_item: ^ Jeffries 2008, pp. 168, 243. - item-359 at level 5: list_item: ^ a b Sued-Badillo 2003, p. 65. - item-360 at level 5: list_item: ^ Thorpe 1996, p. 68. - item-361 at level 5: list_item: ^ Maisels 1999, p. 42. - item-362 at level 5: list_item: ^ Rau 1876, p. 133. - item-363 at level 5: list_item: ^ Higman 2012, p. 23. - item-364 at level 5: list_item: ^ Hume 2012, p. 53. - item-365 at level 5: list_item: ^ Hume 2012, p. 52. - item-366 at level 5: list_item: ^ Fieldhouse 2002, p. 167. - item-367 at level 5: list_item: ^ Livingston, A. D. (1998-01-01) ... Editions, Limited. ISBN 9781853263774. - item-368 at level 5: list_item: ^ "Study plan for waterfowl inju ... on 2022-10-09. Retrieved 2 July 2019. - item-369 at level 5: list_item: ^ "FAOSTAT". www.fao.org. Retrieved 2019-10-25. - item-370 at level 5: list_item: ^ "Anas platyrhynchos, Domestic ... . Digimorph.org. Retrieved 2012-12-23. - item-371 at level 5: list_item: ^ Sy Montgomery. "Mallard; Encyc ... Britannica.com. Retrieved 2012-12-23. - item-372 at level 5: list_item: ^ Glenday, Craig (2014). Guinnes ... ited. pp. 135. ISBN 978-1-908843-15-9. - item-373 at level 5: list_item: ^ Suomen kunnallisvaakunat (in F ... tto. 1982. p. 147. ISBN 951-773-085-3. - item-374 at level 5: list_item: ^ "Lubānas simbolika" (in Latvian). Retrieved September 9, 2021. - item-375 at level 5: list_item: ^ "Föglö" (in Swedish). Retrieved September 9, 2021. - item-376 at level 5: list_item: ^ Young, Emma. "World's funniest ... w Scientist. Retrieved 7 January 2019. - item-377 at level 5: list_item: ^ "Howard the Duck (character)". Grand Comics Database. - item-378 at level 5: list_item: ^ Sanderson, Peter; Gilbert, Lau ... luding this bad-tempered talking duck. - item-379 at level 5: list_item: ^ "The Duck". University of Oregon Athletics. Retrieved 2022-01-20. - item-380 at level 3: section_header: Sources - item-381 at level 4: list: group list - item-382 at level 5: list_item: American Ornithologists' Union ( ... (PDF) from the original on 2022-10-09. - item-383 at level 5: list_item: Carboneras, Carlos (1992). del H ... Lynx Edicions. ISBN 978-84-87334-10-8. - item-384 at level 5: list_item: Christidis, Les; Boles, Walter E ... ro Publishing. ISBN 978-0-643-06511-6. - item-385 at level 5: list_item: Donne-Goussé, Carole; Laudet, Vi ... /S1055-7903(02)00019-2. PMID 12099792. - item-386 at level 5: list_item: Elphick, Chris; Dunning, John B. ... istopher Helm. ISBN 978-0-7136-6250-4. - item-387 at level 5: list_item: Erlandson, Jon M. (1994). Early ... usiness Media. ISBN 978-1-4419-3231-0. - item-388 at level 5: list_item: Fieldhouse, Paul (2002). Food, F ... ara: ABC-CLIO. ISBN 978-1-61069-412-4. - item-389 at level 5: list_item: Fitter, Julian; Fitter, Daniel; ... versity Press. ISBN 978-0-691-10295-5. - item-390 at level 5: list_item: Higman, B. W. (2012). How Food M ... Wiley & Sons. ISBN 978-1-4051-8947-7. - item-391 at level 5: list_item: Hume, Julian H. (2012). Extinct ... istopher Helm. ISBN 978-1-4729-3744-5. - item-392 at level 5: list_item: Jeffries, Richard (2008). Holoce ... Alabama Press. ISBN 978-0-8173-1658-7. - item-393 at level 5: list_item: Kear, Janet, ed. (2005). Ducks, ... versity Press. ISBN 978-0-19-861009-0. - item-394 at level 5: list_item: Livezey, Bradley C. (October 198 ... (PDF) from the original on 2022-10-09. - item-395 at level 5: list_item: Madsen, Cort S.; McHugh, Kevin P ... (PDF) from the original on 2022-10-09. - item-396 at level 5: list_item: Maisels, Charles Keith (1999). E ... on: Routledge. ISBN 978-0-415-10975-8. - item-397 at level 5: list_item: Pratt, H. Douglas; Bruner, Phill ... University Press. ISBN 0-691-02399-9. - item-398 at level 5: list_item: Rau, Charles (1876). Early Man i ... ork: Harper & Brothers. LCCN 05040168. - item-399 at level 5: list_item: Shirihai, Hadoram (2008). A Comp ... versity Press. ISBN 978-0-691-13666-0. - item-400 at level 5: list_item: Sued-Badillo, Jalil (2003). Auto ... Paris: UNESCO. ISBN 978-92-3-103832-7. - item-401 at level 5: list_item: Thorpe, I. J. (1996). The Origin ... rk: Routledge. ISBN 978-0-415-08009-5. - item-402 at level 2: section_header: External links - item-403 at level 3: list: group list - item-404 at level 4: list_item: Definitions from Wiktionary - item-405 at level 4: list_item: Media from Commons - item-406 at level 4: list_item: Quotations from Wikiquote - item-407 at level 4: list_item: Recipes from Wikibooks - item-408 at level 4: list_item: Taxa from Wikispecies - item-409 at level 4: list_item: Data from Wikidata - item-410 at level 3: list: group list - item-411 at level 4: list_item: list of books (useful looking abstracts) - item-412 at level 4: list_item: Ducks on postage stamps Archived 2013-05-13 at the Wayback Machine - item-413 at level 4: list_item: Ducks at a Distance, by Rob Hine ... uide to identification of US waterfowl - item-414 at level 3: table with [3x2] - item-415 at level 3: picture - item-416 at level 3: paragraph: Retrieved from "" - item-417 at level 3: paragraph: : - item-418 at level 3: list: group list - item-419 at level 4: list_item: Ducks - item-420 at level 4: list_item: Game birds - item-421 at level 4: list_item: Bird common names - item-422 at level 3: paragraph: Hidden categories: - item-423 at level 3: list: group list - item-424 at level 4: list_item: All accuracy disputes - item-425 at level 4: list_item: Accuracy disputes from February 2020 - item-426 at level 4: list_item: CS1 Finnish-language sources (fi) - item-427 at level 4: list_item: CS1 Latvian-language sources (lv) - item-428 at level 4: list_item: CS1 Swedish-language sources (sv) - item-429 at level 4: list_item: Articles with short description - item-430 at level 4: list_item: Short description is different from Wikidata - item-431 at level 4: list_item: Wikipedia indefinitely move-protected pages - item-432 at level 4: list_item: Wikipedia indefinitely semi-protected pages - item-433 at level 4: list_item: Articles with 'species' microformats - item-434 at level 4: list_item: Articles containing Old English (ca. 450-1100)-language text - item-435 at level 4: list_item: Articles containing Dutch-language text - item-436 at level 4: list_item: Articles containing German-language text - item-437 at level 4: list_item: Articles containing Norwegian-language text - item-438 at level 4: list_item: Articles containing Lithuanian-language text - item-439 at level 4: list_item: Articles containing Ancient Greek (to 1453)-language text - item-440 at level 4: list_item: All articles with self-published sources - item-441 at level 4: list_item: Articles with self-published sources from February 2020 - item-442 at level 4: list_item: All articles with unsourced statements - item-443 at level 4: list_item: Articles with unsourced statements from January 2022 - item-444 at level 4: list_item: CS1: long volume value - item-445 at level 4: list_item: Pages using Sister project links with wikidata mismatch - item-446 at level 4: list_item: Pages using Sister project links with hidden wikidata - item-447 at level 4: list_item: Webarchive template wayback links - item-448 at level 4: list_item: Articles with Project Gutenberg links - item-449 at level 4: list_item: Articles containing video clips - item-450 at level 3: list: group list - item-451 at level 4: list_item: This page was last edited on 21 September 2024, at 12:11 (UTC). - item-452 at level 4: list_item: Text is available under the Crea ... tion, Inc., a non-profit organization. - item-453 at level 3: list: group list - item-454 at level 4: list_item: Privacy policy - item-455 at level 4: list_item: About Wikipedia - item-456 at level 4: list_item: Disclaimers - item-457 at level 4: list_item: Contact Wikipedia - item-458 at level 4: list_item: Code of Conduct - item-459 at level 4: list_item: Developers - item-460 at level 4: list_item: Statistics - item-461 at level 4: list_item: Cookie statement - item-462 at level 4: list_item: Mobile view - item-463 at level 3: list: group list - item-464 at level 3: list: group list - item-465 at level 1: caption: Pacific black duck displaying the characteristic upending "duck" - item-466 at level 1: caption: Male mallard. - item-467 at level 1: caption: Wood ducks. - item-468 at level 1: caption: Mallard landing in approach - item-469 at level 1: caption: Male Mandarin duck - item-470 at level 1: caption: Flying steamer ducks in Ushuaia, Argentina - item-471 at level 1: caption: Female mallard in Cornwall, England - item-472 at level 1: caption: Pecten along the bill - item-473 at level 1: caption: Mallard duckling preening - item-474 at level 1: caption: A Muscovy duckling - item-475 at level 1: caption: Ringed teal - item-476 at level 1: caption: Indian Runner ducks, a common breed of domestic ducks - item-477 at level 1: caption: Three black-colored ducks in the coat of arms of Maaninka[49] \ No newline at end of file + item-1 at level 1: title: Duck + item-2 at level 2: list: group list + item-3 at level 3: list_item: Acèh + item-4 at level 3: list_item: Afrikaans + item-5 at level 3: list_item: Alemannisch + item-6 at level 3: list_item: አማርኛ + item-7 at level 3: list_item: Ænglisc + item-8 at level 3: list_item: العربية + item-9 at level 3: list_item: Aragonés + item-10 at level 3: list_item: ܐܪܡܝܐ + item-11 at level 3: list_item: Armãneashti + item-12 at level 3: list_item: Asturianu + item-13 at level 3: list_item: Atikamekw + item-14 at level 3: list_item: Авар + item-15 at level 3: list_item: Aymar aru + item-16 at level 3: list_item: تۆرکجه + item-17 at level 3: list_item: Basa Bali + item-18 at level 3: list_item: বাংলা + item-19 at level 3: list_item: 閩南語 / Bân-lâm-gú + item-20 at level 3: list_item: Беларуская + item-21 at level 3: list_item: Беларуская (тарашкевіца) + item-22 at level 3: list_item: Bikol Central + item-23 at level 3: list_item: Български + item-24 at level 3: list_item: Brezhoneg + item-25 at level 3: list_item: Буряад + item-26 at level 3: list_item: Català + item-27 at level 3: list_item: Чӑвашла + item-28 at level 3: list_item: Čeština + item-29 at level 3: list_item: ChiShona + item-30 at level 3: list_item: Cymraeg + item-31 at level 3: list_item: Dagbanli + item-32 at level 3: list_item: Dansk + item-33 at level 3: list_item: Deitsch + item-34 at level 3: list_item: Deutsch + item-35 at level 3: list_item: डोटेली + item-36 at level 3: list_item: Ελληνικά + item-37 at level 3: list_item: Emiliàn e rumagnòl + item-38 at level 3: list_item: Español + item-39 at level 3: list_item: Esperanto + item-40 at level 3: list_item: Euskara + item-41 at level 3: list_item: فارسی + item-42 at level 3: list_item: Français + item-43 at level 3: list_item: Gaeilge + item-44 at level 3: list_item: Galego + item-45 at level 3: list_item: ГӀалгӀай + item-46 at level 3: list_item: 贛語 + item-47 at level 3: list_item: گیلکی + item-48 at level 3: list_item: 𐌲𐌿𐍄𐌹𐍃𐌺 + item-49 at level 3: list_item: गोंयची कोंकणी / Gõychi Konknni + item-50 at level 3: list_item: 客家語 / Hak-kâ-ngî + item-51 at level 3: list_item: 한국어 + item-52 at level 3: list_item: Hausa + item-53 at level 3: list_item: Հայերեն + item-54 at level 3: list_item: हिन्दी + item-55 at level 3: list_item: Hrvatski + item-56 at level 3: list_item: Ido + item-57 at level 3: list_item: Bahasa Indonesia + item-58 at level 3: list_item: Iñupiatun + item-59 at level 3: list_item: Íslenska + item-60 at level 3: list_item: Italiano + item-61 at level 3: list_item: עברית + item-62 at level 3: list_item: Jawa + item-63 at level 3: list_item: ಕನ್ನಡ + item-64 at level 3: list_item: Kapampangan + item-65 at level 3: list_item: ქართული + item-66 at level 3: list_item: कॉशुर / کٲشُر + item-67 at level 3: list_item: Қазақша + item-68 at level 3: list_item: Ikirundi + item-69 at level 3: list_item: Kongo + item-70 at level 3: list_item: Kreyòl ayisyen + item-71 at level 3: list_item: Кырык мары + item-72 at level 3: list_item: ລາວ + item-73 at level 3: list_item: Latina + item-74 at level 3: list_item: Latviešu + item-75 at level 3: list_item: Lietuvių + item-76 at level 3: list_item: Li Niha + item-77 at level 3: list_item: Ligure + item-78 at level 3: list_item: Limburgs + item-79 at level 3: list_item: Lingála + item-80 at level 3: list_item: Malagasy + item-81 at level 3: list_item: മലയാളം + item-82 at level 3: list_item: मराठी + item-83 at level 3: list_item: مازِرونی + item-84 at level 3: list_item: Bahasa Melayu + item-85 at level 3: list_item: ꯃꯤꯇꯩ ꯂꯣꯟ + item-86 at level 3: list_item: 閩東語 / Mìng-dĕ̤ng-ngṳ̄ + item-87 at level 3: list_item: Мокшень + item-88 at level 3: list_item: Монгол + item-89 at level 3: list_item: မြန်မာဘာသာ + item-90 at level 3: list_item: Nederlands + item-91 at level 3: list_item: Nedersaksies + item-92 at level 3: list_item: नेपाली + item-93 at level 3: list_item: नेपाल भाषा + item-94 at level 3: list_item: 日本語 + item-95 at level 3: list_item: Нохчийн + item-96 at level 3: list_item: Norsk nynorsk + item-97 at level 3: list_item: Occitan + item-98 at level 3: list_item: Oromoo + item-99 at level 3: list_item: ਪੰਜਾਬੀ + item-100 at level 3: list_item: Picard + item-101 at level 3: list_item: Plattdüütsch + item-102 at level 3: list_item: Polski + item-103 at level 3: list_item: Português + item-104 at level 3: list_item: Qırımtatarca + item-105 at level 3: list_item: Română + item-106 at level 3: list_item: Русский + item-107 at level 3: list_item: Саха тыла + item-108 at level 3: list_item: ᱥᱟᱱᱛᱟᱲᱤ + item-109 at level 3: list_item: Sardu + item-110 at level 3: list_item: Scots + item-111 at level 3: list_item: Seeltersk + item-112 at level 3: list_item: Shqip + item-113 at level 3: list_item: Sicilianu + item-114 at level 3: list_item: සිංහල + item-115 at level 3: list_item: Simple English + item-116 at level 3: list_item: سنڌي + item-117 at level 3: list_item: کوردی + item-118 at level 3: list_item: Српски / srpski + item-119 at level 3: list_item: Srpskohrvatski / српскохрватски + item-120 at level 3: list_item: Sunda + item-121 at level 3: list_item: Svenska + item-122 at level 3: list_item: Tagalog + item-123 at level 3: list_item: தமிழ் + item-124 at level 3: list_item: Taqbaylit + item-125 at level 3: list_item: Татарча / tatarça + item-126 at level 3: list_item: ไทย + item-127 at level 3: list_item: Türkçe + item-128 at level 3: list_item: Українська + item-129 at level 3: list_item: ئۇيغۇرچە / Uyghurche + item-130 at level 3: list_item: Vahcuengh + item-131 at level 3: list_item: Tiếng Việt + item-132 at level 3: list_item: Walon + item-133 at level 3: list_item: 文言 + item-134 at level 3: list_item: Winaray + item-135 at level 3: list_item: 吴语 + item-136 at level 3: list_item: 粵語 + item-137 at level 3: list_item: Žemaitėška + item-138 at level 3: list_item: 中文 + item-139 at level 2: list: group list + item-140 at level 3: list_item: Article + item-141 at level 3: list_item: Talk + item-142 at level 2: list: group list + item-143 at level 2: list: group list + item-144 at level 3: list_item: Read + item-145 at level 3: list_item: View source + item-146 at level 3: list_item: View history + item-147 at level 2: paragraph: Tools + item-148 at level 2: paragraph: Actions + item-149 at level 2: list: group list + item-150 at level 3: list_item: Read + item-151 at level 3: list_item: View source + item-152 at level 3: list_item: View history + item-153 at level 2: paragraph: General + item-154 at level 2: list: group list + item-155 at level 3: list_item: What links here + item-156 at level 3: list_item: Related changes + item-157 at level 3: list_item: Upload file + item-158 at level 3: list_item: Special pages + item-159 at level 3: list_item: Permanent link + item-160 at level 3: list_item: Page information + item-161 at level 3: list_item: Cite this page + item-162 at level 3: list_item: Get shortened URL + item-163 at level 3: list_item: Download QR code + item-164 at level 3: list_item: Wikidata item + item-165 at level 2: paragraph: Print/export + item-166 at level 2: list: group list + item-167 at level 3: list_item: Download as PDF + item-168 at level 3: list_item: Printable version + item-169 at level 2: paragraph: In other projects + item-170 at level 2: list: group list + item-171 at level 3: list_item: Wikimedia Commons + item-172 at level 3: list_item: Wikiquote + item-173 at level 2: paragraph: Appearance + item-174 at level 2: picture + item-175 at level 2: paragraph: From Wikipedia, the free encyclopedia + item-176 at level 2: paragraph: Common name for many species of bird + item-177 at level 2: paragraph: This article is about the bird. ... as a food, see . For other uses, see . + item-178 at level 2: paragraph: "Duckling" redirects here. For other uses, see . + item-179 at level 2: table with [13x2] + item-180 at level 2: paragraph: Duck is the common name for nume ... und in both fresh water and sea water. + item-181 at level 2: paragraph: Ducks are sometimes confused wit ... divers, grebes, gallinules and coots. + item-182 at level 2: section_header: Etymology + item-183 at level 3: paragraph: The word duck comes from Old Eng ... h duiken and German tauchen 'to dive'. + item-184 at level 3: picture + item-184 at level 4: caption: Pacific black duck displaying the characteristic upending "duck" + item-185 at level 3: paragraph: This word replaced Old English e ... nskrit ātí 'water bird', among others. + item-186 at level 3: paragraph: A duckling is a young duck in do ... , is sometimes labelled as a duckling. + item-187 at level 3: paragraph: A male is called a drake and the ... a duck, or in ornithology a hen.[3][4] + item-188 at level 3: picture + item-188 at level 4: caption: Male mallard. + item-189 at level 3: picture + item-189 at level 4: caption: Wood ducks. + item-190 at level 2: section_header: Taxonomy + item-191 at level 3: paragraph: All ducks belong to the biologic ... ationships between various species.[9] + item-192 at level 3: picture + item-192 at level 4: caption: Mallard landing in approach + item-193 at level 3: paragraph: In most modern classifications, ... all size and stiff, upright tails.[14] + item-194 at level 3: paragraph: A number of other species called ... shelducks in the tribe Tadornini.[15] + item-195 at level 2: section_header: Morphology + item-196 at level 3: picture + item-196 at level 4: caption: Male Mandarin duck + item-197 at level 3: paragraph: The overall body plan of ducks i ... is moult typically precedes migration. + item-198 at level 3: paragraph: The drakes of northern species o ... rkscrew shaped vagina to prevent rape. + item-199 at level 2: section_header: Distribution and habitat + item-200 at level 3: picture + item-200 at level 4: caption: Flying steamer ducks in Ushuaia, Argentina + item-201 at level 3: paragraph: Ducks have a cosmopolitan distri ... endemic to such far-flung islands.[21] + item-202 at level 3: picture + item-202 at level 4: caption: Female mallard in Cornwall, England + item-203 at level 3: paragraph: Some duck species, mainly those ... t form after localised heavy rain.[23] + item-204 at level 2: section_header: Behaviour + item-205 at level 3: section_header: Feeding + item-206 at level 4: picture + item-206 at level 5: caption: Pecten along the bill + item-207 at level 4: picture + item-207 at level 5: caption: Mallard duckling preening + item-208 at level 4: paragraph: Ducks eat food sources such as g ... amphibians, worms, and small molluscs. + item-209 at level 4: paragraph: Dabbling ducks feed on the surfa ... thers and to hold slippery food items. + item-210 at level 4: paragraph: Diving ducks and sea ducks forag ... ave more difficulty taking off to fly. + item-211 at level 4: paragraph: A few specialized species such a ... apted to catch and swallow large fish. + item-212 at level 4: paragraph: The others have the characterist ... e nostrils come out through hard horn. + item-213 at level 4: paragraph: The Guardian published an articl ... the ducks and pollutes waterways.[25] + item-214 at level 3: section_header: Breeding + item-215 at level 4: picture + item-215 at level 5: caption: A Muscovy duckling + item-216 at level 4: paragraph: Ducks generally only have one pa ... st and led her ducklings to water.[28] + item-217 at level 3: section_header: Communication + item-218 at level 4: paragraph: Female mallard ducks (as well as ... laying calls or quieter contact calls. + item-219 at level 4: paragraph: A common urban legend claims tha ... annel television show MythBusters.[32] + item-220 at level 3: section_header: Predators + item-221 at level 4: picture + item-221 at level 5: caption: Ringed teal + item-222 at level 4: paragraph: Ducks have many predators. Duckl ... or large birds, such as hawks or owls. + item-223 at level 4: paragraph: Adult ducks are fast fliers, but ... its speed and strength to catch ducks. + item-224 at level 2: section_header: Relationship with humans + item-225 at level 3: section_header: Hunting + item-226 at level 4: paragraph: Humans have hunted ducks since p ... evidence of this is uncommon.[35][42] + item-227 at level 4: paragraph: In many areas, wild ducks (inclu ... inated by pollutants such as PCBs.[44] + item-228 at level 3: section_header: Domestication + item-229 at level 4: picture + item-229 at level 5: caption: Indian Runner ducks, a common breed of domestic ducks + item-230 at level 4: paragraph: Ducks have many economic uses, b ... it weighs less than 1 kg (2.2 lb).[48] + item-231 at level 3: section_header: Heraldry + item-232 at level 4: picture + item-232 at level 5: caption: Three black-colored ducks in the coat of arms of Maaninka[49] + item-233 at level 4: paragraph: Ducks appear on several coats of ... the coat of arms of Föglö (Åland).[51] + item-234 at level 3: section_header: Cultural references + item-235 at level 4: paragraph: In 2002, psychologist Richard Wi ... 54] and was made into a movie in 1986. + item-236 at level 4: paragraph: The 1992 Disney film The Mighty ... Ducks minor league baseball team.[55] + item-237 at level 2: section_header: See also + item-238 at level 3: list: group list + item-239 at level 4: list_item: Birds portal + item-240 at level 3: list: group list + item-241 at level 4: list_item: Domestic duck + item-242 at level 4: list_item: Duck as food + item-243 at level 4: list_item: Duck test + item-244 at level 4: list_item: Duck breeds + item-245 at level 4: list_item: Fictional ducks + item-246 at level 4: list_item: Rubber duck + item-247 at level 2: section_header: Notes + item-248 at level 3: section_header: Citations + item-249 at level 4: ordered_list: group ordered list + item-250 at level 5: list_item: ^ "Duckling". The American Herit ... n Company. 2006. Retrieved 2015-05-22. + item-251 at level 5: list_item: ^ "Duckling". Kernerman English ... Ltd. 2000–2006. Retrieved 2015-05-22. + item-252 at level 5: list_item: ^ Dohner, Janet Vorwald (2001). ... University Press. ISBN 978-0300138139. + item-253 at level 5: list_item: ^ Visca, Curt; Visca, Kelley (20 ... Publishing Group. ISBN 9780823961566. + item-254 at level 5: list_item: ^ a b c d Carboneras 1992, p. 536. + item-255 at level 5: list_item: ^ Livezey 1986, pp. 737–738. + item-256 at level 5: list_item: ^ Madsen, McHugh & de Kloet 1988, p. 452. + item-257 at level 5: list_item: ^ Donne-Goussé, Laudet & Hänni 2002, pp. 353–354. + item-258 at level 5: list_item: ^ a b c d e f Carboneras 1992, p. 540. + item-259 at level 5: list_item: ^ Elphick, Dunning & Sibley 2001, p. 191. + item-260 at level 5: list_item: ^ Kear 2005, p. 448. + item-261 at level 5: list_item: ^ Kear 2005, p. 622–623. + item-262 at level 5: list_item: ^ Kear 2005, p. 686. + item-263 at level 5: list_item: ^ Elphick, Dunning & Sibley 2001, p. 193. + item-264 at level 5: list_item: ^ a b c d e f g Carboneras 1992, p. 537. + item-265 at level 5: list_item: ^ American Ornithologists' Union 1998, p. xix. + item-266 at level 5: list_item: ^ American Ornithologists' Union 1998. + item-267 at level 5: list_item: ^ Carboneras 1992, p. 538. + item-268 at level 5: list_item: ^ Christidis & Boles 2008, p. 62. + item-269 at level 5: list_item: ^ Shirihai 2008, pp. 239, 245. + item-270 at level 5: list_item: ^ a b Pratt, Bruner & Berrett 1987, pp. 98–107. + item-271 at level 5: list_item: ^ Fitter, Fitter & Hosking 2000, pp. 52–3. + item-272 at level 5: list_item: ^ "Pacific Black Duck". www.wiresnr.org. Retrieved 2018-04-27. + item-273 at level 5: list_item: ^ Ogden, Evans. "Dabbling Ducks". CWE. Retrieved 2006-11-02. + item-274 at level 5: list_item: ^ Karl Mathiesen (16 March 2015) ... Guardian. Retrieved 13 November 2016. + item-275 at level 5: list_item: ^ Rohwer, Frank C.; Anderson, Mi ... 4615-6787-5_4. ISBN 978-1-4615-6789-9. + item-276 at level 5: list_item: ^ Smith, Cyndi M.; Cooke, Fred; ... 093/condor/102.1.201. hdl:10315/13797. + item-277 at level 5: list_item: ^ "If You Find An Orphaned Duckl ... l on 2018-09-23. Retrieved 2018-12-22. + item-278 at level 5: list_item: ^ Carver, Heather (2011). The Du ...  9780557901562.[self-published source] + item-279 at level 5: list_item: ^ Titlow, Budd (2013-09-03). Bir ... man & Littlefield. ISBN 9780762797707. + item-280 at level 5: list_item: ^ Amos, Jonathan (2003-09-08). " ... kers". BBC News. Retrieved 2006-11-02. + item-281 at level 5: list_item: ^ "Mythbusters Episode 8". 12 December 2003. + item-282 at level 5: list_item: ^ Erlandson 1994, p. 171. + item-283 at level 5: list_item: ^ Jeffries 2008, pp. 168, 243. + item-284 at level 5: list_item: ^ a b Sued-Badillo 2003, p. 65. + item-285 at level 5: list_item: ^ Thorpe 1996, p. 68. + item-286 at level 5: list_item: ^ Maisels 1999, p. 42. + item-287 at level 5: list_item: ^ Rau 1876, p. 133. + item-288 at level 5: list_item: ^ Higman 2012, p. 23. + item-289 at level 5: list_item: ^ Hume 2012, p. 53. + item-290 at level 5: list_item: ^ Hume 2012, p. 52. + item-291 at level 5: list_item: ^ Fieldhouse 2002, p. 167. + item-292 at level 5: list_item: ^ Livingston, A. D. (1998-01-01) ... Editions, Limited. ISBN 9781853263774. + item-293 at level 5: list_item: ^ "Study plan for waterfowl inju ... on 2022-10-09. Retrieved 2 July 2019. + item-294 at level 5: list_item: ^ "FAOSTAT". www.fao.org. Retrieved 2019-10-25. + item-295 at level 5: list_item: ^ "Anas platyrhynchos, Domestic ... . Digimorph.org. Retrieved 2012-12-23. + item-296 at level 5: list_item: ^ Sy Montgomery. "Mallard; Encyc ... Britannica.com. Retrieved 2012-12-23. + item-297 at level 5: list_item: ^ Glenday, Craig (2014). Guinnes ... ited. pp. 135. ISBN 978-1-908843-15-9. + item-298 at level 5: list_item: ^ Suomen kunnallisvaakunat (in F ... tto. 1982. p. 147. ISBN 951-773-085-3. + item-299 at level 5: list_item: ^ "Lubānas simbolika" (in Latvian). Retrieved September 9, 2021. + item-300 at level 5: list_item: ^ "Föglö" (in Swedish). Retrieved September 9, 2021. + item-301 at level 5: list_item: ^ Young, Emma. "World's funniest ... w Scientist. Retrieved 7 January 2019. + item-302 at level 5: list_item: ^ "Howard the Duck (character)". Grand Comics Database. + item-303 at level 5: list_item: ^ Sanderson, Peter; Gilbert, Lau ... luding this bad-tempered talking duck. + item-304 at level 5: list_item: ^ "The Duck". University of Oregon Athletics. Retrieved 2022-01-20. + item-305 at level 3: section_header: Sources + item-306 at level 4: list: group list + item-307 at level 5: list_item: American Ornithologists' Union ( ... (PDF) from the original on 2022-10-09. + item-308 at level 5: list_item: Carboneras, Carlos (1992). del H ... Lynx Edicions. ISBN 978-84-87334-10-8. + item-309 at level 5: list_item: Christidis, Les; Boles, Walter E ... ro Publishing. ISBN 978-0-643-06511-6. + item-310 at level 5: list_item: Donne-Goussé, Carole; Laudet, Vi ... /S1055-7903(02)00019-2. PMID 12099792. + item-311 at level 5: list_item: Elphick, Chris; Dunning, John B. ... istopher Helm. ISBN 978-0-7136-6250-4. + item-312 at level 5: list_item: Erlandson, Jon M. (1994). Early ... usiness Media. ISBN 978-1-4419-3231-0. + item-313 at level 5: list_item: Fieldhouse, Paul (2002). Food, F ... ara: ABC-CLIO. ISBN 978-1-61069-412-4. + item-314 at level 5: list_item: Fitter, Julian; Fitter, Daniel; ... versity Press. ISBN 978-0-691-10295-5. + item-315 at level 5: list_item: Higman, B. W. (2012). How Food M ... Wiley & Sons. ISBN 978-1-4051-8947-7. + item-316 at level 5: list_item: Hume, Julian H. (2012). Extinct ... istopher Helm. ISBN 978-1-4729-3744-5. + item-317 at level 5: list_item: Jeffries, Richard (2008). Holoce ... Alabama Press. ISBN 978-0-8173-1658-7. + item-318 at level 5: list_item: Kear, Janet, ed. (2005). Ducks, ... versity Press. ISBN 978-0-19-861009-0. + item-319 at level 5: list_item: Livezey, Bradley C. (October 198 ... (PDF) from the original on 2022-10-09. + item-320 at level 5: list_item: Madsen, Cort S.; McHugh, Kevin P ... (PDF) from the original on 2022-10-09. + item-321 at level 5: list_item: Maisels, Charles Keith (1999). E ... on: Routledge. ISBN 978-0-415-10975-8. + item-322 at level 5: list_item: Pratt, H. Douglas; Bruner, Phill ... University Press. ISBN 0-691-02399-9. + item-323 at level 5: list_item: Rau, Charles (1876). Early Man i ... ork: Harper & Brothers. LCCN 05040168. + item-324 at level 5: list_item: Shirihai, Hadoram (2008). A Comp ... versity Press. ISBN 978-0-691-13666-0. + item-325 at level 5: list_item: Sued-Badillo, Jalil (2003). Auto ... Paris: UNESCO. ISBN 978-92-3-103832-7. + item-326 at level 5: list_item: Thorpe, I. J. (1996). The Origin ... rk: Routledge. ISBN 978-0-415-08009-5. + item-327 at level 2: section_header: External links + item-328 at level 3: list: group list + item-329 at level 4: list_item: Definitions from Wiktionary + item-330 at level 4: list_item: Media from Commons + item-331 at level 4: list_item: Quotations from Wikiquote + item-332 at level 4: list_item: Recipes from Wikibooks + item-333 at level 4: list_item: Taxa from Wikispecies + item-334 at level 4: list_item: Data from Wikidata + item-335 at level 3: list: group list + item-336 at level 4: list_item: list of books (useful looking abstracts) + item-337 at level 4: list_item: Ducks on postage stamps Archived 2013-05-13 at the Wayback Machine + item-338 at level 4: list_item: Ducks at a Distance, by Rob Hine ... uide to identification of US waterfowl + item-339 at level 3: table with [3x2] + item-340 at level 3: picture + item-341 at level 3: paragraph: Retrieved from "" + item-342 at level 3: paragraph: : + item-343 at level 3: list: group list + item-344 at level 4: list_item: Ducks + item-345 at level 4: list_item: Game birds + item-346 at level 4: list_item: Bird common names + item-347 at level 3: paragraph: Hidden categories: + item-348 at level 3: list: group list + item-349 at level 4: list_item: All accuracy disputes + item-350 at level 4: list_item: Accuracy disputes from February 2020 + item-351 at level 4: list_item: CS1 Finnish-language sources (fi) + item-352 at level 4: list_item: CS1 Latvian-language sources (lv) + item-353 at level 4: list_item: CS1 Swedish-language sources (sv) + item-354 at level 4: list_item: Articles with short description + item-355 at level 4: list_item: Short description is different from Wikidata + item-356 at level 4: list_item: Wikipedia indefinitely move-protected pages + item-357 at level 4: list_item: Wikipedia indefinitely semi-protected pages + item-358 at level 4: list_item: Articles with 'species' microformats + item-359 at level 4: list_item: Articles containing Old English (ca. 450-1100)-language text + item-360 at level 4: list_item: Articles containing Dutch-language text + item-361 at level 4: list_item: Articles containing German-language text + item-362 at level 4: list_item: Articles containing Norwegian-language text + item-363 at level 4: list_item: Articles containing Lithuanian-language text + item-364 at level 4: list_item: Articles containing Ancient Greek (to 1453)-language text + item-365 at level 4: list_item: All articles with self-published sources + item-366 at level 4: list_item: Articles with self-published sources from February 2020 + item-367 at level 4: list_item: All articles with unsourced statements + item-368 at level 4: list_item: Articles with unsourced statements from January 2022 + item-369 at level 4: list_item: CS1: long volume value + item-370 at level 4: list_item: Pages using Sister project links with wikidata mismatch + item-371 at level 4: list_item: Pages using Sister project links with hidden wikidata + item-372 at level 4: list_item: Webarchive template wayback links + item-373 at level 4: list_item: Articles with Project Gutenberg links + item-374 at level 4: list_item: Articles containing video clips + item-375 at level 3: list: group list + item-376 at level 4: list_item: This page was last edited on 21 September 2024, at 12:11 (UTC). + item-377 at level 4: list_item: Text is available under the Crea ... tion, Inc., a non-profit organization. + item-378 at level 3: list: group list + item-379 at level 4: list_item: Privacy policy + item-380 at level 4: list_item: About Wikipedia + item-381 at level 4: list_item: Disclaimers + item-382 at level 4: list_item: Contact Wikipedia + item-383 at level 4: list_item: Code of Conduct + item-384 at level 4: list_item: Developers + item-385 at level 4: list_item: Statistics + item-386 at level 4: list_item: Cookie statement + item-387 at level 4: list_item: Mobile view + item-388 at level 3: list: group list + item-389 at level 3: list: group list + item-390 at level 1: caption: Pacific black duck displaying the characteristic upending "duck" + item-391 at level 1: caption: Male mallard. + item-392 at level 1: caption: Wood ducks. + item-393 at level 1: caption: Mallard landing in approach + item-394 at level 1: caption: Male Mandarin duck + item-395 at level 1: caption: Flying steamer ducks in Ushuaia, Argentina + item-396 at level 1: caption: Female mallard in Cornwall, England + item-397 at level 1: caption: Pecten along the bill + item-398 at level 1: caption: Mallard duckling preening + item-399 at level 1: caption: A Muscovy duckling + item-400 at level 1: caption: Ringed teal + item-401 at level 1: caption: Indian Runner ducks, a common breed of domestic ducks + item-402 at level 1: caption: Three black-colored ducks in the coat of arms of Maaninka[49] \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v2/wiki_duck.html.json b/tests/data/groundtruth/docling_v2/wiki_duck.html.json index 3c61c1a5..f662a04e 100644 --- a/tests/data/groundtruth/docling_v2/wiki_duck.html.json +++ b/tests/data/groundtruth/docling_v2/wiki_duck.html.json @@ -138,7 +138,7 @@ "$ref": "#/texts/7" } ], - "content_layer": "body", + "content_layer": "furniture", "name": "list", "label": "list" }, @@ -164,7 +164,7 @@ "$ref": "#/texts/13" } ], - "content_layer": "body", + "content_layer": "furniture", "name": "list", "label": "list" }, @@ -174,7 +174,7 @@ "$ref": "#/body" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "name": "list", "label": "list" }, @@ -188,7 +188,7 @@ "$ref": "#/texts/14" } ], - "content_layer": "body", + "content_layer": "furniture", "name": "list", "label": "list" }, @@ -198,7 +198,7 @@ "$ref": "#/body" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "name": "list", "label": "list" }, @@ -215,7 +215,7 @@ "$ref": "#/texts/16" } ], - "content_layer": "body", + "content_layer": "furniture", "name": "list", "label": "list" }, @@ -232,7 +232,7 @@ "$ref": "#/texts/18" } ], - "content_layer": "body", + "content_layer": "furniture", "name": "list", "label": "list" }, @@ -249,7 +249,7 @@ "$ref": "#/texts/21" } ], - "content_layer": "body", + "content_layer": "furniture", "name": "list", "label": "list" }, @@ -263,7 +263,7 @@ "$ref": "#/texts/22" } ], - "content_layer": "body", + "content_layer": "furniture", "name": "header-1", "label": "section" }, @@ -304,7 +304,7 @@ "$ref": "#/texts/42" } ], - "content_layer": "body", + "content_layer": "furniture", "name": "list", "label": "list" }, @@ -314,7 +314,7 @@ "$ref": "#/texts/24" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "name": "list", "label": "list" }, @@ -324,7 +324,7 @@ "$ref": "#/texts/25" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "name": "list", "label": "list" }, @@ -334,7 +334,7 @@ "$ref": "#/texts/26" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "name": "list", "label": "list" }, @@ -344,7 +344,7 @@ "$ref": "#/texts/27" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "name": "list", "label": "list" }, @@ -367,7 +367,7 @@ "$ref": "#/texts/32" } ], - "content_layer": "body", + "content_layer": "furniture", "name": "list", "label": "list" }, @@ -377,7 +377,7 @@ "$ref": "#/texts/29" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "name": "list", "label": "list" }, @@ -387,7 +387,7 @@ "$ref": "#/texts/30" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "name": "list", "label": "list" }, @@ -397,7 +397,7 @@ "$ref": "#/texts/31" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "name": "list", "label": "list" }, @@ -407,7 +407,7 @@ "$ref": "#/texts/32" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "name": "list", "label": "list" }, @@ -430,7 +430,7 @@ "$ref": "#/texts/37" } ], - "content_layer": "body", + "content_layer": "furniture", "name": "list", "label": "list" }, @@ -440,7 +440,7 @@ "$ref": "#/texts/34" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "name": "list", "label": "list" }, @@ -450,7 +450,7 @@ "$ref": "#/texts/35" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "name": "list", "label": "list" }, @@ -460,7 +460,7 @@ "$ref": "#/texts/36" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "name": "list", "label": "list" }, @@ -470,7 +470,7 @@ "$ref": "#/texts/37" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "name": "list", "label": "list" }, @@ -480,7 +480,7 @@ "$ref": "#/texts/38" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "name": "list", "label": "list" }, @@ -497,7 +497,7 @@ "$ref": "#/texts/41" } ], - "content_layer": "body", + "content_layer": "furniture", "name": "list", "label": "list" }, @@ -507,7 +507,7 @@ "$ref": "#/texts/40" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "name": "list", "label": "list" }, @@ -517,7 +517,7 @@ "$ref": "#/texts/41" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "name": "list", "label": "list" }, @@ -527,7 +527,7 @@ "$ref": "#/texts/42" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "name": "list", "label": "list" }, @@ -1623,7 +1623,7 @@ "$ref": "#/body" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "label": "paragraph", "prov": [], "orig": "Main menu", @@ -1635,7 +1635,7 @@ "$ref": "#/body" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "label": "paragraph", "prov": [], "orig": "Navigation", @@ -1647,7 +1647,7 @@ "$ref": "#/groups/0" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "Main page", @@ -1661,7 +1661,7 @@ "$ref": "#/groups/0" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "Contents", @@ -1675,7 +1675,7 @@ "$ref": "#/groups/0" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "Current events", @@ -1689,7 +1689,7 @@ "$ref": "#/groups/0" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "Random article", @@ -1703,7 +1703,7 @@ "$ref": "#/groups/0" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "About Wikipedia", @@ -1717,7 +1717,7 @@ "$ref": "#/groups/0" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "Contact us", @@ -1731,7 +1731,7 @@ "$ref": "#/body" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "label": "paragraph", "prov": [], "orig": "Contribute", @@ -1743,7 +1743,7 @@ "$ref": "#/groups/1" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "Help", @@ -1757,7 +1757,7 @@ "$ref": "#/groups/1" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "Learn to edit", @@ -1771,7 +1771,7 @@ "$ref": "#/groups/1" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "Community portal", @@ -1785,7 +1785,7 @@ "$ref": "#/groups/1" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "Recent changes", @@ -1799,7 +1799,7 @@ "$ref": "#/groups/1" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "Upload file", @@ -1813,7 +1813,7 @@ "$ref": "#/groups/3" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "Donate", @@ -1827,7 +1827,7 @@ "$ref": "#/groups/5" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "Create account", @@ -1841,7 +1841,7 @@ "$ref": "#/groups/5" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "Log in", @@ -1855,7 +1855,7 @@ "$ref": "#/groups/6" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "Create account", @@ -1869,7 +1869,7 @@ "$ref": "#/groups/6" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "Log in", @@ -1883,7 +1883,7 @@ "$ref": "#/body" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "label": "paragraph", "prov": [], "orig": "Pages for logged out editors", @@ -1895,7 +1895,7 @@ "$ref": "#/groups/7" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "Contributions", @@ -1909,7 +1909,7 @@ "$ref": "#/groups/7" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "Talk", @@ -1927,7 +1927,7 @@ "$ref": "#/groups/9" } ], - "content_layer": "body", + "content_layer": "furniture", "label": "section_header", "prov": [], "orig": "Contents", @@ -1940,7 +1940,7 @@ "$ref": "#/groups/9" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "(Top)", @@ -1958,7 +1958,7 @@ "$ref": "#/groups/10" } ], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "1 Etymology", @@ -1976,7 +1976,7 @@ "$ref": "#/groups/11" } ], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "2 Taxonomy", @@ -1994,7 +1994,7 @@ "$ref": "#/groups/12" } ], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "3 Morphology", @@ -2012,7 +2012,7 @@ "$ref": "#/groups/13" } ], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "4 Distribution and habitat", @@ -2030,7 +2030,7 @@ "$ref": "#/groups/14" } ], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "5 Behaviour Toggle Behaviour subsection", @@ -2048,7 +2048,7 @@ "$ref": "#/groups/15" } ], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "5.1 Feeding", @@ -2066,7 +2066,7 @@ "$ref": "#/groups/16" } ], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "5.2 Breeding", @@ -2084,7 +2084,7 @@ "$ref": "#/groups/17" } ], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "5.3 Communication", @@ -2102,7 +2102,7 @@ "$ref": "#/groups/18" } ], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "5.4 Predators", @@ -2120,7 +2120,7 @@ "$ref": "#/groups/19" } ], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "6 Relationship with humans Toggle Relationship with humans subsection", @@ -2138,7 +2138,7 @@ "$ref": "#/groups/20" } ], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "6.1 Hunting", @@ -2156,7 +2156,7 @@ "$ref": "#/groups/21" } ], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "6.2 Domestication", @@ -2174,7 +2174,7 @@ "$ref": "#/groups/22" } ], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "6.3 Heraldry", @@ -2192,7 +2192,7 @@ "$ref": "#/groups/23" } ], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "6.4 Cultural references", @@ -2210,7 +2210,7 @@ "$ref": "#/groups/24" } ], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "7 See also", @@ -2228,7 +2228,7 @@ "$ref": "#/groups/25" } ], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "8 Notes Toggle Notes subsection", @@ -2246,7 +2246,7 @@ "$ref": "#/groups/26" } ], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "8.1 Citations", @@ -2264,7 +2264,7 @@ "$ref": "#/groups/27" } ], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "8.2 Sources", @@ -2282,7 +2282,7 @@ "$ref": "#/groups/28" } ], - "content_layer": "body", + "content_layer": "furniture", "label": "list_item", "prov": [], "orig": "9 External links", @@ -7592,7 +7592,7 @@ "$ref": "#/body" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "label": "picture", "prov": [], "captions": [], @@ -7606,7 +7606,7 @@ "$ref": "#/body" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "label": "picture", "prov": [], "captions": [], @@ -7620,7 +7620,7 @@ "$ref": "#/body" }, "children": [], - "content_layer": "body", + "content_layer": "furniture", "label": "picture", "prov": [], "captions": [], diff --git a/tests/data/groundtruth/docling_v2/wiki_duck.html.md b/tests/data/groundtruth/docling_v2/wiki_duck.html.md index 26c42e26..b08b31c6 100644 --- a/tests/data/groundtruth/docling_v2/wiki_duck.html.md +++ b/tests/data/groundtruth/docling_v2/wiki_duck.html.md @@ -1,62 +1,3 @@ -Main menu - -Navigation - -- Main page -- Contents -- Current events -- Random article -- About Wikipedia -- Contact us - -Contribute - -- Help -- Learn to edit -- Community portal -- Recent changes -- Upload file - - - - - - - - - Donate - - Create account - - Log in - - Create account - - Log in - -Pages for logged out editors - - - Contributions - - Talk - -## Contents - - - (Top) - - 1 Etymology - - 2 Taxonomy - - 3 Morphology - - 4 Distribution and habitat - - 5 Behaviour Toggle Behaviour subsection - - 5.1 Feeding - - 5.2 Breeding - - 5.3 Communication - - 5.4 Predators - - 6 Relationship with humans Toggle Relationship with humans subsection - - 6.1 Hunting - - 6.2 Domestication - - 6.3 Heraldry - - 6.4 Cultural references -- 7 See also -- 8 Notes Toggle Notes subsection - - 8.1 Citations - - 8.2 Sources -- 9 External links - # Duck - Acèh