Merge branch 'main' of github.com:DS4SD/docling into cau/async-pipeline-and-converter

This commit is contained in:
Christoph Auer 2025-07-24 15:07:00 +02:00
commit 4040bd6618
15 changed files with 376 additions and 137 deletions

View File

@ -1,3 +1,16 @@
## [v2.42.2](https://github.com/docling-project/docling/releases/tag/v2.42.2) - 2025-07-24
### Fix
* **HTML:** Concatenation of child strings in table cells and list items ([#1981](https://github.com/docling-project/docling/issues/1981)) ([`5132f06`](https://github.com/docling-project/docling/commit/5132f061a8125332ba10a4a30e0dd4973637a11b))
* **docx:** Adding plain latex equations to table cells ([#1986](https://github.com/docling-project/docling/issues/1986)) ([`0b83609`](https://github.com/docling-project/docling/commit/0b836095319ebf2133c4a3a77602718034915e55))
* Preserve PARTIAL_SUCCESS status when document timeout hits ([#1975](https://github.com/docling-project/docling/issues/1975)) ([`98e2fcf`](https://github.com/docling-project/docling/commit/98e2fcff63660c158bafb9a1b5584c1439d7a533))
* Multi-page image support (tiff) ([#1928](https://github.com/docling-project/docling/issues/1928)) ([`8d50a59`](https://github.com/docling-project/docling/commit/8d50a59d4887caac1c214add8037ed0b5250f68c))
### Documentation
* Add chat with dosu ([#1984](https://github.com/docling-project/docling/issues/1984)) ([`7b5f860`](https://github.com/docling-project/docling/commit/7b5f86098d07b734f2b6aa8c88ae7cafa265246a))
## [v2.42.1](https://github.com/docling-project/docling/releases/tag/v2.42.1) - 2025-07-22
### Fix

View File

@ -21,6 +21,7 @@
[![License MIT](https://img.shields.io/github/license/docling-project/docling)](https://opensource.org/licenses/MIT)
[![PyPI Downloads](https://static.pepy.tech/badge/docling/month)](https://pepy.tech/projects/docling)
[![Docling Actor](https://apify.com/actor-badge?actor=vancura/docling?fpr=docling)](https://apify.com/vancura/docling)
[![Chat with Dosu](https://dosu.dev/dosu-chat-badge.svg)](https://app.dosu.dev/097760a8-135e-4789-8234-90c8837d7f1c/ask?utm_source=github)
[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/10101/badge)](https://www.bestpractices.dev/projects/10101)
[![LF AI & Data](https://img.shields.io/badge/LF%20AI%20%26%20Data-003778?logo=linuxfoundation&logoColor=fff&color=0094ff&labelColor=003778)](https://lfaidata.foundation/projects/)

View File

@ -5,7 +5,7 @@ from io import BytesIO
from pathlib import Path
from typing import Final, Optional, Union, cast
from bs4 import BeautifulSoup, NavigableString, Tag
from bs4 import BeautifulSoup, NavigableString, PageElement, Tag
from bs4.element import PreformattedString
from docling_core.types.doc import (
DocItem,
@ -297,7 +297,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
):
parts.append(child)
elif isinstance(child, Tag) and child.name not in ("ul", "ol"):
text_part = child.get_text()
text_part = HTMLDocumentBackend.get_text(child)
if text_part:
parts.append(text_part)
li_text = re.sub(r"\s+|\n+", " ", "".join(parts)).strip()
@ -417,6 +417,36 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
content_layer=self.content_layer,
)
@staticmethod
def get_text(item: PageElement) -> str:
"""Concatenate all child strings of a PageElement.
This method is equivalent to `PageElement.get_text()` but also considers
certain tags. When called on a <p> or <li> tags, it returns the text with a
trailing space, otherwise the text is concatenated without separators.
"""
def _extract_text_recursively(item: PageElement) -> list[str]:
"""Recursively extract text from all child nodes."""
result: list[str] = []
if isinstance(item, NavigableString):
result = [item]
elif isinstance(item, Tag):
tag = cast(Tag, item)
parts: list[str] = []
for child in tag:
parts.extend(_extract_text_recursively(child))
result.append(
"".join(parts) + " " if tag.name in {"p", "li"} else "".join(parts)
)
return result
parts: list[str] = _extract_text_recursively(item)
return "".join(parts)
@staticmethod
def _get_cell_spans(cell: Tag) -> tuple[int, int]:
"""Extract colspan and rowspan values from a table cell tag.
@ -510,9 +540,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
formula.replace_with(NavigableString(math_formula))
# TODO: extract content correctly from table-cells with lists
text = html_cell.text
# label = html_cell.name
text = HTMLDocumentBackend.get_text(html_cell).strip()
col_span, row_span = HTMLDocumentBackend._get_cell_spans(html_cell)
if row_header:
row_span -= 1

View File

@ -1104,8 +1104,17 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
)
_log.debug(f" spanned before row {spanned_idx}")
# Detect equations in cell text
text, equations = self._handle_equations_in_text(
element=cell._element, text=cell.text
)
if len(equations) == 0:
text = cell.text
else:
text = text.replace("<eq>", "$").replace("</eq>", "$")
table_cell = TableCell(
text=cell.text,
text=text,
row_span=spanned_idx - row_idx,
col_span=cell.grid_span,
start_row_offset_idx=row.grid_cols_before + row_idx,

8
docs/index.md vendored
View File

@ -13,6 +13,7 @@
[![License MIT](https://img.shields.io/github/license/docling-project/docling)](https://opensource.org/licenses/MIT)
[![PyPI Downloads](https://static.pepy.tech/badge/docling/month)](https://pepy.tech/projects/docling)
[![Docling Actor](https://apify.com/actor-badge?actor=vancura/docling?fpr=docling)](https://apify.com/vancura/docling)
[![Chat with Dosu](https://dosu.dev/dosu-chat-badge.svg)](https://app.dosu.dev/097760a8-135e-4789-8234-90c8837d7f1c/ask?utm_source=github)
[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/10101/badge)](https://www.bestpractices.dev/projects/10101)
[![LF AI & Data](https://img.shields.io/badge/LF%20AI%20%26%20Data-003778?logo=linuxfoundation&logoColor=fff&color=0094ff&labelColor=003778)](https://lfaidata.foundation/projects/)
@ -46,6 +47,13 @@ Docling simplifies document processing, parsing diverse formats — including ad
<a href="reference/document_converter/" class="card"><b>Reference</b><br />See more API details</a>
</div>
## Live assistant
Do you want to leverage the power of AI and get a live support on Docling?
Try out the [Chat with Dosu](https://app.dosu.dev/097760a8-135e-4789-8234-90c8837d7f1c/ask?utm_source=github) functionalities provided by our friends at [Dosu](https://dosu.dev/).
[![Chat with Dosu](https://dosu.dev/dosu-chat-badge.svg)](https://app.dosu.dev/097760a8-135e-4789-8234-90c8837d7f1c/ask?utm_source=github)
## LF AI & Data
Docling is hosted as a project in the [LF AI & Data Foundation](https://lfaidata.foundation/projects/).

View File

@ -1,6 +1,6 @@
[project]
name = "docling"
version = "2.42.1" # DO NOT EDIT, updated automatically
version = "2.42.2" # DO NOT EDIT, updated automatically
description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications."
license = "MIT"
keywords = [

Binary file not shown.

View File

@ -5839,7 +5839,7 @@
"end_row_offset_idx": 4,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": " Number of total districts",
"text": "Number of total districts",
"column_header": false,
"row_header": false,
"row_section": false
@ -6642,7 +6642,7 @@
"end_row_offset_idx": 4,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": " Number of total districts",
"text": "Number of total districts",
"column_header": false,
"row_header": false,
"row_section": false

View File

@ -4166,7 +4166,7 @@
"end_row_offset_idx": 6,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Ground corn",
"text": "Ground corn",
"column_header": false,
"row_header": false,
"row_section": false
@ -4298,7 +4298,7 @@
"end_row_offset_idx": 7,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Soybean meal",
"text": "Soybean meal",
"column_header": false,
"row_header": false,
"row_section": false
@ -4430,7 +4430,7 @@
"end_row_offset_idx": 8,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Corn silage",
"text": "Corn silage",
"column_header": false,
"row_header": false,
"row_section": false
@ -4562,7 +4562,7 @@
"end_row_offset_idx": 9,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Ann temperate pasture",
"text": "Ann temperate pasture",
"column_header": false,
"row_header": false,
"row_section": false
@ -4694,7 +4694,7 @@
"end_row_offset_idx": 10,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Ann tropical pasture",
"text": "Ann tropical pasture",
"column_header": false,
"row_header": false,
"row_section": false
@ -4826,7 +4826,7 @@
"end_row_offset_idx": 11,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Perenn tropical pasture",
"text": "Perenn tropical pasture",
"column_header": false,
"row_header": false,
"row_section": false
@ -4970,7 +4970,7 @@
"end_row_offset_idx": 13,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Organic matter",
"text": "Organic matter",
"column_header": false,
"row_header": false,
"row_section": false
@ -5102,7 +5102,7 @@
"end_row_offset_idx": 14,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Crude protein",
"text": "Crude protein",
"column_header": false,
"row_header": false,
"row_section": false
@ -5234,7 +5234,7 @@
"end_row_offset_idx": 15,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Neutral detergent fibre",
"text": "Neutral detergent fibre",
"column_header": false,
"row_header": false,
"row_section": false
@ -5366,7 +5366,7 @@
"end_row_offset_idx": 16,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Acid detergent fibre",
"text": "Acid detergent fibre",
"column_header": false,
"row_header": false,
"row_section": false
@ -5498,7 +5498,7 @@
"end_row_offset_idx": 17,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Ether extract",
"text": "Ether extract",
"column_header": false,
"row_header": false,
"row_section": false
@ -5642,7 +5642,7 @@
"end_row_offset_idx": 19,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    OM digestibility, %",
"text": "OM digestibility, %",
"column_header": false,
"row_header": false,
"row_section": false
@ -5774,7 +5774,7 @@
"end_row_offset_idx": 20,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    NEL, Mcal (kg DM)-1",
"text": "NEL, Mcal (kg DM)-1",
"column_header": false,
"row_header": false,
"row_section": false
@ -5906,7 +5906,7 @@
"end_row_offset_idx": 21,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    MP, g (kg DM)-1",
"text": "MP, g (kg DM)-1",
"column_header": false,
"row_header": false,
"row_section": false
@ -6713,7 +6713,7 @@
"end_row_offset_idx": 6,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Ground corn",
"text": "Ground corn",
"column_header": false,
"row_header": false,
"row_section": false
@ -6847,7 +6847,7 @@
"end_row_offset_idx": 7,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Soybean meal",
"text": "Soybean meal",
"column_header": false,
"row_header": false,
"row_section": false
@ -6981,7 +6981,7 @@
"end_row_offset_idx": 8,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Corn silage",
"text": "Corn silage",
"column_header": false,
"row_header": false,
"row_section": false
@ -7115,7 +7115,7 @@
"end_row_offset_idx": 9,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Ann temperate pasture",
"text": "Ann temperate pasture",
"column_header": false,
"row_header": false,
"row_section": false
@ -7249,7 +7249,7 @@
"end_row_offset_idx": 10,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Ann tropical pasture",
"text": "Ann tropical pasture",
"column_header": false,
"row_header": false,
"row_section": false
@ -7383,7 +7383,7 @@
"end_row_offset_idx": 11,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Perenn tropical pasture",
"text": "Perenn tropical pasture",
"column_header": false,
"row_header": false,
"row_section": false
@ -7651,7 +7651,7 @@
"end_row_offset_idx": 13,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Organic matter",
"text": "Organic matter",
"column_header": false,
"row_header": false,
"row_section": false
@ -7785,7 +7785,7 @@
"end_row_offset_idx": 14,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Crude protein",
"text": "Crude protein",
"column_header": false,
"row_header": false,
"row_section": false
@ -7919,7 +7919,7 @@
"end_row_offset_idx": 15,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Neutral detergent fibre",
"text": "Neutral detergent fibre",
"column_header": false,
"row_header": false,
"row_section": false
@ -8053,7 +8053,7 @@
"end_row_offset_idx": 16,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Acid detergent fibre",
"text": "Acid detergent fibre",
"column_header": false,
"row_header": false,
"row_section": false
@ -8187,7 +8187,7 @@
"end_row_offset_idx": 17,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Ether extract",
"text": "Ether extract",
"column_header": false,
"row_header": false,
"row_section": false
@ -8455,7 +8455,7 @@
"end_row_offset_idx": 19,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    OM digestibility, %",
"text": "OM digestibility, %",
"column_header": false,
"row_header": false,
"row_section": false
@ -8589,7 +8589,7 @@
"end_row_offset_idx": 20,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    NEL, Mcal (kg DM)-1",
"text": "NEL, Mcal (kg DM)-1",
"column_header": false,
"row_header": false,
"row_section": false
@ -8723,7 +8723,7 @@
"end_row_offset_idx": 21,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    MP, g (kg DM)-1",
"text": "MP, g (kg DM)-1",
"column_header": false,
"row_header": false,
"row_section": false
@ -8998,7 +8998,7 @@
"end_row_offset_idx": 3,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Corn grain",
"text": "Corn grain",
"column_header": false,
"row_header": false,
"row_section": false
@ -9058,7 +9058,7 @@
"end_row_offset_idx": 4,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Soybean",
"text": "Soybean",
"column_header": false,
"row_header": false,
"row_section": false
@ -9178,7 +9178,7 @@
"end_row_offset_idx": 6,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Corn silageb",
"text": "Corn silageb",
"column_header": false,
"row_header": false,
"row_section": false
@ -9238,7 +9238,7 @@
"end_row_offset_idx": 7,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Annual ryegrassc",
"text": "Annual ryegrassc",
"column_header": false,
"row_header": false,
"row_section": false
@ -9298,7 +9298,7 @@
"end_row_offset_idx": 8,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Pearl milletd",
"text": "Pearl milletd",
"column_header": false,
"row_header": false,
"row_section": false
@ -9358,7 +9358,7 @@
"end_row_offset_idx": 9,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Kikuyu grasse",
"text": "Kikuyu grasse",
"column_header": false,
"row_header": false,
"row_section": false
@ -9547,7 +9547,7 @@
"end_row_offset_idx": 3,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Corn grain",
"text": "Corn grain",
"column_header": false,
"row_header": false,
"row_section": false
@ -9609,7 +9609,7 @@
"end_row_offset_idx": 4,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Soybean",
"text": "Soybean",
"column_header": false,
"row_header": false,
"row_section": false
@ -9733,7 +9733,7 @@
"end_row_offset_idx": 6,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Corn silageb",
"text": "Corn silageb",
"column_header": false,
"row_header": false,
"row_section": false
@ -9795,7 +9795,7 @@
"end_row_offset_idx": 7,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Annual ryegrassc",
"text": "Annual ryegrassc",
"column_header": false,
"row_header": false,
"row_section": false
@ -9857,7 +9857,7 @@
"end_row_offset_idx": 8,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Pearl milletd",
"text": "Pearl milletd",
"column_header": false,
"row_header": false,
"row_section": false
@ -9919,7 +9919,7 @@
"end_row_offset_idx": 9,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Kikuyu grasse",
"text": "Kikuyu grasse",
"column_header": false,
"row_header": false,
"row_section": false
@ -10182,7 +10182,7 @@
"end_row_offset_idx": 4,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    N organic fertilizer, kg ha-1a",
"text": "N organic fertilizer, kg ha-1a",
"column_header": false,
"row_header": false,
"row_section": false
@ -10242,7 +10242,7 @@
"end_row_offset_idx": 5,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    N synthetic fertilizer",
"text": "N synthetic fertilizer",
"column_header": false,
"row_header": false,
"row_section": false
@ -10302,7 +10302,7 @@
"end_row_offset_idx": 6,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    N from residual DM, kg ha-1b",
"text": "N from residual DM, kg ha-1b",
"column_header": false,
"row_header": false,
"row_section": false
@ -10362,7 +10362,7 @@
"end_row_offset_idx": 7,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Emission fator, kg N2O-N (kg N)-1c",
"text": "Emission fator, kg N2O-N (kg N)-1c",
"column_header": false,
"row_header": false,
"row_section": false
@ -10422,7 +10422,7 @@
"end_row_offset_idx": 8,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    kg N2O ha-1 from direct emissions",
"text": "kg N2O ha-1 from direct emissions",
"column_header": false,
"row_header": false,
"row_section": false
@ -10542,7 +10542,7 @@
"end_row_offset_idx": 10,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    kg NH3-N+NOx-N (kg organic N)-1b",
"text": "kg NH3-N+NOx-N (kg organic N)-1b",
"column_header": false,
"row_header": false,
"row_section": false
@ -10602,7 +10602,7 @@
"end_row_offset_idx": 11,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    kg NH3-N+NOx-N (kg synthetic N)-1b",
"text": "kg NH3-N+NOx-N (kg synthetic N)-1b",
"column_header": false,
"row_header": false,
"row_section": false
@ -10662,7 +10662,7 @@
"end_row_offset_idx": 12,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    kg N2O-N (kg NH3-N+NOx-N)-1b",
"text": "kg N2O-N (kg NH3-N+NOx-N)-1b",
"column_header": false,
"row_header": false,
"row_section": false
@ -10722,7 +10722,7 @@
"end_row_offset_idx": 13,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    kg N2O ha-1 from NH3+NOx volatilized",
"text": "kg N2O ha-1 from NH3+NOx volatilized",
"column_header": false,
"row_header": false,
"row_section": false
@ -10842,7 +10842,7 @@
"end_row_offset_idx": 15,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    kg N losses by leaching (kg N)-1b",
"text": "kg N losses by leaching (kg N)-1b",
"column_header": false,
"row_header": false,
"row_section": false
@ -10902,7 +10902,7 @@
"end_row_offset_idx": 16,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    kg N2O-N (kg N leaching)-1",
"text": "kg N2O-N (kg N leaching)-1",
"column_header": false,
"row_header": false,
"row_section": false
@ -10962,7 +10962,7 @@
"end_row_offset_idx": 17,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    kg N2O ha-1 from N losses by leaching",
"text": "kg N2O ha-1 from N losses by leaching",
"column_header": false,
"row_header": false,
"row_section": false
@ -11873,7 +11873,7 @@
"end_row_offset_idx": 4,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    N organic fertilizer, kg ha-1a",
"text": "N organic fertilizer, kg ha-1a",
"column_header": false,
"row_header": false,
"row_section": false
@ -11935,7 +11935,7 @@
"end_row_offset_idx": 5,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    N synthetic fertilizer",
"text": "N synthetic fertilizer",
"column_header": false,
"row_header": false,
"row_section": false
@ -11997,7 +11997,7 @@
"end_row_offset_idx": 6,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    N from residual DM, kg ha-1b",
"text": "N from residual DM, kg ha-1b",
"column_header": false,
"row_header": false,
"row_section": false
@ -12059,7 +12059,7 @@
"end_row_offset_idx": 7,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Emission fator, kg N2O-N (kg N)-1c",
"text": "Emission fator, kg N2O-N (kg N)-1c",
"column_header": false,
"row_header": false,
"row_section": false
@ -12121,7 +12121,7 @@
"end_row_offset_idx": 8,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    kg N2O ha-1 from direct emissions",
"text": "kg N2O ha-1 from direct emissions",
"column_header": false,
"row_header": false,
"row_section": false
@ -12245,7 +12245,7 @@
"end_row_offset_idx": 10,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    kg NH3-N+NOx-N (kg organic N)-1b",
"text": "kg NH3-N+NOx-N (kg organic N)-1b",
"column_header": false,
"row_header": false,
"row_section": false
@ -12307,7 +12307,7 @@
"end_row_offset_idx": 11,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    kg NH3-N+NOx-N (kg synthetic N)-1b",
"text": "kg NH3-N+NOx-N (kg synthetic N)-1b",
"column_header": false,
"row_header": false,
"row_section": false
@ -12369,7 +12369,7 @@
"end_row_offset_idx": 12,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    kg N2O-N (kg NH3-N+NOx-N)-1b",
"text": "kg N2O-N (kg NH3-N+NOx-N)-1b",
"column_header": false,
"row_header": false,
"row_section": false
@ -12431,7 +12431,7 @@
"end_row_offset_idx": 13,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    kg N2O ha-1 from NH3+NOx volatilized",
"text": "kg N2O ha-1 from NH3+NOx volatilized",
"column_header": false,
"row_header": false,
"row_section": false
@ -12555,7 +12555,7 @@
"end_row_offset_idx": 15,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    kg N losses by leaching (kg N)-1b",
"text": "kg N losses by leaching (kg N)-1b",
"column_header": false,
"row_header": false,
"row_section": false
@ -12617,7 +12617,7 @@
"end_row_offset_idx": 16,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    kg N2O-N (kg N leaching)-1",
"text": "kg N2O-N (kg N leaching)-1",
"column_header": false,
"row_header": false,
"row_section": false
@ -12679,7 +12679,7 @@
"end_row_offset_idx": 17,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    kg N2O ha-1 from N losses by leaching",
"text": "kg N2O ha-1 from N losses by leaching",
"column_header": false,
"row_header": false,
"row_section": false
@ -13780,7 +13780,7 @@
"end_row_offset_idx": 8,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Fuel for manure handling",
"text": "Fuel for manure handling",
"column_header": false,
"row_header": false,
"row_section": false
@ -13828,7 +13828,7 @@
"end_row_offset_idx": 9,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Machinery for manure handling",
"text": "Machinery for manure handling",
"column_header": false,
"row_header": false,
"row_section": false
@ -13924,7 +13924,7 @@
"end_row_offset_idx": 11,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Electricity for milking",
"text": "Electricity for milking",
"column_header": false,
"row_header": false,
"row_section": false
@ -13972,7 +13972,7 @@
"end_row_offset_idx": 12,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Electricity for lightingd",
"text": "Electricity for lightingd",
"column_header": false,
"row_header": false,
"row_section": false
@ -14375,7 +14375,7 @@
"end_row_offset_idx": 8,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Fuel for manure handling",
"text": "Fuel for manure handling",
"column_header": false,
"row_header": false,
"row_section": false
@ -14425,7 +14425,7 @@
"end_row_offset_idx": 9,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Machinery for manure handling",
"text": "Machinery for manure handling",
"column_header": false,
"row_header": false,
"row_section": false
@ -14525,7 +14525,7 @@
"end_row_offset_idx": 11,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Electricity for milking",
"text": "Electricity for milking",
"column_header": false,
"row_header": false,
"row_section": false
@ -14575,7 +14575,7 @@
"end_row_offset_idx": 12,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "    Electricity for lightingd",
"text": "Electricity for lightingd",
"column_header": false,
"row_header": false,
"row_section": false

View File

@ -0,0 +1,3 @@
item-0 at level 0: unspecified: group _root_
item-1 at level 1: table with [2x2]
item-2 at level 1: paragraph:

View File

@ -0,0 +1,174 @@
{
"schema_name": "DoclingDocument",
"version": "1.5.0",
"name": "table_with_equations",
"origin": {
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"binary_hash": 6528760837820727976,
"filename": "table_with_equations.docx"
},
"furniture": {
"self_ref": "#/furniture",
"children": [],
"content_layer": "furniture",
"name": "_root_",
"label": "unspecified"
},
"body": {
"self_ref": "#/body",
"children": [
{
"$ref": "#/tables/0"
},
{
"$ref": "#/texts/0"
}
],
"content_layer": "body",
"name": "_root_",
"label": "unspecified"
},
"groups": [],
"texts": [
{
"self_ref": "#/texts/0",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
}
],
"pictures": [],
"tables": [
{
"self_ref": "#/tables/0",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "table",
"prov": [],
"captions": [],
"references": [],
"footnotes": [],
"data": {
"table_cells": [
{
"row_span": 1,
"col_span": 1,
"start_row_offset_idx": 0,
"end_row_offset_idx": 1,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "The next cell has an equation",
"column_header": true,
"row_header": false,
"row_section": false
},
{
"row_span": 1,
"col_span": 1,
"start_row_offset_idx": 0,
"end_row_offset_idx": 1,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "$A= \\pi r^{2}$",
"column_header": true,
"row_header": false,
"row_section": false
},
{
"row_span": 1,
"col_span": 1,
"start_row_offset_idx": 1,
"end_row_offset_idx": 2,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "The next cell has another equation",
"column_header": false,
"row_header": false,
"row_section": false
},
{
"row_span": 1,
"col_span": 1,
"start_row_offset_idx": 1,
"end_row_offset_idx": 2,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "$x=\\frac{-b \\pm \\sqrt{b^{2}-4ac}}{2a}$",
"column_header": false,
"row_header": false,
"row_section": false
}
],
"num_rows": 2,
"num_cols": 2,
"grid": [
[
{
"row_span": 1,
"col_span": 1,
"start_row_offset_idx": 0,
"end_row_offset_idx": 1,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "The next cell has an equation",
"column_header": true,
"row_header": false,
"row_section": false
},
{
"row_span": 1,
"col_span": 1,
"start_row_offset_idx": 0,
"end_row_offset_idx": 1,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "$A= \\pi r^{2}$",
"column_header": true,
"row_header": false,
"row_section": false
}
],
[
{
"row_span": 1,
"col_span": 1,
"start_row_offset_idx": 1,
"end_row_offset_idx": 2,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "The next cell has another equation",
"column_header": false,
"row_header": false,
"row_section": false
},
{
"row_span": 1,
"col_span": 1,
"start_row_offset_idx": 1,
"end_row_offset_idx": 2,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "$x=\\frac{-b \\pm \\sqrt{b^{2}-4ac}}{2a}$",
"column_header": false,
"row_header": false,
"row_section": false
}
]
]
},
"annotations": []
}
],
"key_value_items": [],
"form_items": [],
"pages": {}
}

View File

@ -0,0 +1,3 @@
| The next cell has an equation | $A= \pi r^{2}$ |
|------------------------------------|----------------------------------------|
| The next cell has another equation | $x=\frac{-b \pm \sqrt{b^{2}-4ac}}{2a}$ |

View File

@ -8410,7 +8410,7 @@
"end_row_offset_idx": 1,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Duck\n",
"text": "Duck",
"column_header": true,
"row_header": false,
"row_section": false
@ -8422,7 +8422,7 @@
"end_row_offset_idx": 2,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "\n",
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
@ -8434,7 +8434,7 @@
"end_row_offset_idx": 3,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Bufflehead\n(Bucephala albeola)\n",
"text": "Bufflehead\n(Bucephala albeola)",
"column_header": false,
"row_header": false,
"row_section": false
@ -8446,7 +8446,7 @@
"end_row_offset_idx": 4,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Scientific classification \n",
"text": "Scientific classification",
"column_header": true,
"row_header": false,
"row_section": false
@ -8458,7 +8458,7 @@
"end_row_offset_idx": 5,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Domain:\n",
"text": "Domain:",
"column_header": false,
"row_header": false,
"row_section": false
@ -8470,7 +8470,7 @@
"end_row_offset_idx": 5,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Eukaryota\n",
"text": "Eukaryota",
"column_header": false,
"row_header": false,
"row_section": false
@ -8482,7 +8482,7 @@
"end_row_offset_idx": 6,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Kingdom:\n",
"text": "Kingdom:",
"column_header": false,
"row_header": false,
"row_section": false
@ -8494,7 +8494,7 @@
"end_row_offset_idx": 6,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Animalia\n",
"text": "Animalia",
"column_header": false,
"row_header": false,
"row_section": false
@ -8506,7 +8506,7 @@
"end_row_offset_idx": 7,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Phylum:\n",
"text": "Phylum:",
"column_header": false,
"row_header": false,
"row_section": false
@ -8518,7 +8518,7 @@
"end_row_offset_idx": 7,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Chordata\n",
"text": "Chordata",
"column_header": false,
"row_header": false,
"row_section": false
@ -8530,7 +8530,7 @@
"end_row_offset_idx": 8,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Class:\n",
"text": "Class:",
"column_header": false,
"row_header": false,
"row_section": false
@ -8542,7 +8542,7 @@
"end_row_offset_idx": 8,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Aves\n",
"text": "Aves",
"column_header": false,
"row_header": false,
"row_section": false
@ -8554,7 +8554,7 @@
"end_row_offset_idx": 9,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Order:\n",
"text": "Order:",
"column_header": false,
"row_header": false,
"row_section": false
@ -8566,7 +8566,7 @@
"end_row_offset_idx": 9,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Anseriformes\n",
"text": "Anseriformes",
"column_header": false,
"row_header": false,
"row_section": false
@ -8578,7 +8578,7 @@
"end_row_offset_idx": 10,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Superfamily:\n",
"text": "Superfamily:",
"column_header": false,
"row_header": false,
"row_section": false
@ -8590,7 +8590,7 @@
"end_row_offset_idx": 10,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Anatoidea\n",
"text": "Anatoidea",
"column_header": false,
"row_header": false,
"row_section": false
@ -8602,7 +8602,7 @@
"end_row_offset_idx": 11,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Family:\n",
"text": "Family:",
"column_header": false,
"row_header": false,
"row_section": false
@ -8614,7 +8614,7 @@
"end_row_offset_idx": 11,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Anatidae\n",
"text": "Anatidae",
"column_header": false,
"row_header": false,
"row_section": false
@ -8626,7 +8626,7 @@
"end_row_offset_idx": 12,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Subfamilies\n",
"text": "Subfamilies",
"column_header": true,
"row_header": false,
"row_section": false
@ -8638,7 +8638,7 @@
"end_row_offset_idx": 13,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "\nSee text\n\n",
"text": "See text",
"column_header": false,
"row_header": false,
"row_section": false
@ -8655,7 +8655,7 @@
"end_row_offset_idx": 1,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Duck\n",
"text": "Duck",
"column_header": true,
"row_header": false,
"row_section": false
@ -8667,7 +8667,7 @@
"end_row_offset_idx": 1,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Duck\n",
"text": "Duck",
"column_header": true,
"row_header": false,
"row_section": false
@ -8681,7 +8681,7 @@
"end_row_offset_idx": 2,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "\n",
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
@ -8693,7 +8693,7 @@
"end_row_offset_idx": 2,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "\n",
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
@ -8707,7 +8707,7 @@
"end_row_offset_idx": 3,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Bufflehead\n(Bucephala albeola)\n",
"text": "Bufflehead\n(Bucephala albeola)",
"column_header": false,
"row_header": false,
"row_section": false
@ -8719,7 +8719,7 @@
"end_row_offset_idx": 3,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Bufflehead\n(Bucephala albeola)\n",
"text": "Bufflehead\n(Bucephala albeola)",
"column_header": false,
"row_header": false,
"row_section": false
@ -8733,7 +8733,7 @@
"end_row_offset_idx": 4,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Scientific classification \n",
"text": "Scientific classification",
"column_header": true,
"row_header": false,
"row_section": false
@ -8745,7 +8745,7 @@
"end_row_offset_idx": 4,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Scientific classification \n",
"text": "Scientific classification",
"column_header": true,
"row_header": false,
"row_section": false
@ -8759,7 +8759,7 @@
"end_row_offset_idx": 5,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Domain:\n",
"text": "Domain:",
"column_header": false,
"row_header": false,
"row_section": false
@ -8771,7 +8771,7 @@
"end_row_offset_idx": 5,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Eukaryota\n",
"text": "Eukaryota",
"column_header": false,
"row_header": false,
"row_section": false
@ -8785,7 +8785,7 @@
"end_row_offset_idx": 6,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Kingdom:\n",
"text": "Kingdom:",
"column_header": false,
"row_header": false,
"row_section": false
@ -8797,7 +8797,7 @@
"end_row_offset_idx": 6,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Animalia\n",
"text": "Animalia",
"column_header": false,
"row_header": false,
"row_section": false
@ -8811,7 +8811,7 @@
"end_row_offset_idx": 7,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Phylum:\n",
"text": "Phylum:",
"column_header": false,
"row_header": false,
"row_section": false
@ -8823,7 +8823,7 @@
"end_row_offset_idx": 7,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Chordata\n",
"text": "Chordata",
"column_header": false,
"row_header": false,
"row_section": false
@ -8837,7 +8837,7 @@
"end_row_offset_idx": 8,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Class:\n",
"text": "Class:",
"column_header": false,
"row_header": false,
"row_section": false
@ -8849,7 +8849,7 @@
"end_row_offset_idx": 8,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Aves\n",
"text": "Aves",
"column_header": false,
"row_header": false,
"row_section": false
@ -8863,7 +8863,7 @@
"end_row_offset_idx": 9,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Order:\n",
"text": "Order:",
"column_header": false,
"row_header": false,
"row_section": false
@ -8875,7 +8875,7 @@
"end_row_offset_idx": 9,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Anseriformes\n",
"text": "Anseriformes",
"column_header": false,
"row_header": false,
"row_section": false
@ -8889,7 +8889,7 @@
"end_row_offset_idx": 10,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Superfamily:\n",
"text": "Superfamily:",
"column_header": false,
"row_header": false,
"row_section": false
@ -8901,7 +8901,7 @@
"end_row_offset_idx": 10,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Anatoidea\n",
"text": "Anatoidea",
"column_header": false,
"row_header": false,
"row_section": false
@ -8915,7 +8915,7 @@
"end_row_offset_idx": 11,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Family:\n",
"text": "Family:",
"column_header": false,
"row_header": false,
"row_section": false
@ -8927,7 +8927,7 @@
"end_row_offset_idx": 11,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Anatidae\n",
"text": "Anatidae",
"column_header": false,
"row_header": false,
"row_section": false
@ -8941,7 +8941,7 @@
"end_row_offset_idx": 12,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Subfamilies\n",
"text": "Subfamilies",
"column_header": true,
"row_header": false,
"row_section": false
@ -8953,7 +8953,7 @@
"end_row_offset_idx": 12,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Subfamilies\n",
"text": "Subfamilies",
"column_header": true,
"row_header": false,
"row_section": false
@ -8967,7 +8967,7 @@
"end_row_offset_idx": 13,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "\nSee text\n\n",
"text": "See text",
"column_header": false,
"row_header": false,
"row_section": false
@ -8979,7 +8979,7 @@
"end_row_offset_idx": 13,
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "\nSee text\n\n",
"text": "See text",
"column_header": false,
"row_header": false,
"row_section": false

View File

@ -512,7 +512,7 @@ Duck at Wikipedia's sister projects
<!-- image -->
| Authority control databases | Authority control databases |
|--------------------------------|----------------------------------------------|
|-------------------------------|---------------------------------------------------|
| National | United States France BnF data Japan Latvia Israel |
| Other | IdRef |

2
uv.lock generated
View File

@ -806,7 +806,7 @@ wheels = [
[[package]]
name = "docling"
version = "2.42.1"
version = "2.42.2"
source = { editable = "." }
dependencies = [
{ name = "accelerate" },