mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 12:48:28 +00:00
feat: Rich tables for MSWord backend (#2291)
* Adding support of rich table cells to MSWord backend Signed-off-by: Maksym Lysak <mly@zurich.ibm.com> * Fixes for properly accounting lists, pictures and headers in rich table cells Signed-off-by: Maksym Lysak <mly@zurich.ibm.com> * Cleaned up msword backend, re-generated docx tests Signed-off-by: Maksym Lysak <mly@zurich.ibm.com> * Added detection of simple table cells in word backend Signed-off-by: Maksym Lysak <mly@zurich.ibm.com> * Cleaned up Signed-off-by: Maksym Lysak <mly@zurich.ibm.com> --------- Signed-off-by: Maksym Lysak <mly@zurich.ibm.com> Co-authored-by: Maksym Lysak <mly@zurich.ibm.com>
This commit is contained in:
@@ -12,8 +12,11 @@ from docling_core.types.doc import (
|
|||||||
ImageRef,
|
ImageRef,
|
||||||
ListGroup,
|
ListGroup,
|
||||||
NodeItem,
|
NodeItem,
|
||||||
|
RefItem,
|
||||||
|
RichTableCell,
|
||||||
TableCell,
|
TableCell,
|
||||||
TableData,
|
TableData,
|
||||||
|
TextItem,
|
||||||
)
|
)
|
||||||
from docling_core.types.doc.document import Formatting
|
from docling_core.types.doc.document import Formatting
|
||||||
from docx import Document
|
from docx import Document
|
||||||
@@ -128,7 +131,8 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
doc = DoclingDocument(name=self.file.stem or "file", origin=origin)
|
doc = DoclingDocument(name=self.file.stem or "file", origin=origin)
|
||||||
if self.is_valid():
|
if self.is_valid():
|
||||||
assert self.docx_obj is not None
|
assert self.docx_obj is not None
|
||||||
doc = self._walk_linear(self.docx_obj.element.body, self.docx_obj, doc)
|
doc, _ = self._walk_linear(self.docx_obj.element.body, self.docx_obj, doc)
|
||||||
|
# doc, _ = doc_info
|
||||||
return doc
|
return doc
|
||||||
else:
|
else:
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
@@ -172,7 +176,9 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
body: BaseOxmlElement,
|
body: BaseOxmlElement,
|
||||||
docx_obj: DocxDocument,
|
docx_obj: DocxDocument,
|
||||||
doc: DoclingDocument,
|
doc: DoclingDocument,
|
||||||
) -> DoclingDocument:
|
# parent:
|
||||||
|
) -> tuple[DoclingDocument, list[RefItem]]:
|
||||||
|
added_elements = []
|
||||||
for element in body:
|
for element in body:
|
||||||
tag_name = etree.QName(element).localname
|
tag_name = etree.QName(element).localname
|
||||||
# Check for Inline Images (blip elements)
|
# Check for Inline Images (blip elements)
|
||||||
@@ -230,8 +236,9 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
parent=self.parents[level - 1],
|
parent=self.parents[level - 1],
|
||||||
name="shape-text",
|
name="shape-text",
|
||||||
)
|
)
|
||||||
|
added_elements.append(shape_group.get_ref())
|
||||||
doc.add_text(
|
doc.add_text(
|
||||||
label=DocItemLabel.PARAGRAPH,
|
label=DocItemLabel.TEXT,
|
||||||
parent=shape_group,
|
parent=shape_group,
|
||||||
text=text_content,
|
text=text_content,
|
||||||
)
|
)
|
||||||
@@ -246,23 +253,27 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
_log.debug(
|
_log.debug(
|
||||||
f"Found textbox content with {len(textbox_elements)} elements"
|
f"Found textbox content with {len(textbox_elements)} elements"
|
||||||
)
|
)
|
||||||
self._handle_textbox_content(textbox_elements, docx_obj, doc)
|
tbc = self._handle_textbox_content(textbox_elements, docx_obj, doc)
|
||||||
|
added_elements.extend(tbc)
|
||||||
|
|
||||||
# Check for Tables
|
# Check for Tables
|
||||||
if element.tag.endswith("tbl"):
|
if element.tag.endswith("tbl"):
|
||||||
try:
|
try:
|
||||||
self._handle_tables(element, docx_obj, doc)
|
t = self._handle_tables(element, docx_obj, doc)
|
||||||
|
added_elements.extend(t)
|
||||||
except Exception:
|
except Exception:
|
||||||
_log.debug("could not parse a table, broken docx table")
|
_log.debug("could not parse a table, broken docx table")
|
||||||
# Check for Image
|
# Check for Image
|
||||||
elif drawing_blip:
|
elif drawing_blip:
|
||||||
self._handle_pictures(docx_obj, drawing_blip, doc)
|
pics = self._handle_pictures(docx_obj, drawing_blip, doc)
|
||||||
|
added_elements.extend(pics)
|
||||||
# Check for Text after the Image
|
# Check for Text after the Image
|
||||||
if (
|
if (
|
||||||
tag_name in ["p"]
|
tag_name in ["p"]
|
||||||
and element.find(".//w:t", namespaces=namespaces) is not None
|
and element.find(".//w:t", namespaces=namespaces) is not None
|
||||||
):
|
):
|
||||||
self._handle_text_elements(element, docx_obj, doc)
|
te1 = self._handle_text_elements(element, docx_obj, doc)
|
||||||
|
added_elements.extend(te1)
|
||||||
# Check for the sdt containers, like table of contents
|
# Check for the sdt containers, like table of contents
|
||||||
elif tag_name in ["sdt"]:
|
elif tag_name in ["sdt"]:
|
||||||
sdt_content = element.find(".//w:sdtContent", namespaces=namespaces)
|
sdt_content = element.find(".//w:sdtContent", namespaces=namespaces)
|
||||||
@@ -270,15 +281,17 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
# Iterate paragraphs, runs, or text inside <w:sdtContent>.
|
# Iterate paragraphs, runs, or text inside <w:sdtContent>.
|
||||||
paragraphs = sdt_content.findall(".//w:p", namespaces=namespaces)
|
paragraphs = sdt_content.findall(".//w:p", namespaces=namespaces)
|
||||||
for p in paragraphs:
|
for p in paragraphs:
|
||||||
self._handle_text_elements(p, docx_obj, doc)
|
te = self._handle_text_elements(p, docx_obj, doc)
|
||||||
|
added_elements.extend(te)
|
||||||
# Check for Text
|
# Check for Text
|
||||||
elif tag_name in ["p"]:
|
elif tag_name in ["p"]:
|
||||||
# "tcPr", "sectPr"
|
# "tcPr", "sectPr"
|
||||||
self._handle_text_elements(element, docx_obj, doc)
|
te = self._handle_text_elements(element, docx_obj, doc)
|
||||||
|
added_elements.extend(te)
|
||||||
else:
|
else:
|
||||||
_log.debug(f"Ignoring element in DOCX with tag: {tag_name}")
|
_log.debug(f"Ignoring element in DOCX with tag: {tag_name}")
|
||||||
|
|
||||||
return doc
|
return doc, added_elements
|
||||||
|
|
||||||
def _str_to_int(
|
def _str_to_int(
|
||||||
self, s: Optional[str], default: Optional[int] = 0
|
self, s: Optional[str], default: Optional[int] = 0
|
||||||
@@ -674,14 +687,15 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
textbox_elements: list,
|
textbox_elements: list,
|
||||||
docx_obj: DocxDocument,
|
docx_obj: DocxDocument,
|
||||||
doc: DoclingDocument,
|
doc: DoclingDocument,
|
||||||
) -> None:
|
) -> List[RefItem]:
|
||||||
|
elem_ref: List[RefItem] = []
|
||||||
"""Process textbox content and add it to the document structure."""
|
"""Process textbox content and add it to the document structure."""
|
||||||
level = self._get_level()
|
level = self._get_level()
|
||||||
# Create a textbox group to contain all text from the textbox
|
# Create a textbox group to contain all text from the textbox
|
||||||
textbox_group = doc.add_group(
|
textbox_group = doc.add_group(
|
||||||
label=GroupLabel.SECTION, parent=self.parents[level - 1], name="textbox"
|
label=GroupLabel.SECTION, parent=self.parents[level - 1], name="textbox"
|
||||||
)
|
)
|
||||||
|
elem_ref.append(textbox_group.get_ref())
|
||||||
# Set this as the current parent to ensure textbox content
|
# Set this as the current parent to ensure textbox content
|
||||||
# is properly nested in document structure
|
# is properly nested in document structure
|
||||||
original_parent = self.parents[level]
|
original_parent = self.parents[level]
|
||||||
@@ -729,11 +743,11 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
# Mark this paragraph as processed
|
# Mark this paragraph as processed
|
||||||
processed_paragraphs.add(paragraph_id)
|
processed_paragraphs.add(paragraph_id)
|
||||||
|
|
||||||
self._handle_text_elements(p, docx_obj, doc)
|
elem_ref.extend(self._handle_text_elements(p, docx_obj, doc))
|
||||||
|
|
||||||
# Restore original parent
|
# Restore original parent
|
||||||
self.parents[level] = original_parent
|
self.parents[level] = original_parent
|
||||||
return
|
return elem_ref
|
||||||
|
|
||||||
def _handle_equations_in_text(self, element, text):
|
def _handle_equations_in_text(self, element, text):
|
||||||
only_texts = []
|
only_texts = []
|
||||||
@@ -803,7 +817,8 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
element: BaseOxmlElement,
|
element: BaseOxmlElement,
|
||||||
docx_obj: DocxDocument,
|
docx_obj: DocxDocument,
|
||||||
doc: DoclingDocument,
|
doc: DoclingDocument,
|
||||||
) -> None:
|
) -> List[RefItem]:
|
||||||
|
elem_ref: List[RefItem] = []
|
||||||
paragraph = Paragraph(element, docx_obj)
|
paragraph = Paragraph(element, docx_obj)
|
||||||
paragraph_elements = self._get_paragraph_elements(paragraph)
|
paragraph_elements = self._get_paragraph_elements(paragraph)
|
||||||
text, equations = self._handle_equations_in_text(
|
text, equations = self._handle_equations_in_text(
|
||||||
@@ -811,7 +826,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
)
|
)
|
||||||
|
|
||||||
if text is None:
|
if text is None:
|
||||||
return
|
return elem_ref
|
||||||
text = text.strip()
|
text = text.strip()
|
||||||
|
|
||||||
# Common styles for bullet and numbered lists.
|
# Common styles for bullet and numbered lists.
|
||||||
@@ -832,15 +847,16 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
# Check if this is actually a numbered list by examining the numFmt
|
# Check if this is actually a numbered list by examining the numFmt
|
||||||
is_numbered = self._is_numbered_list(docx_obj, numid, ilevel)
|
is_numbered = self._is_numbered_list(docx_obj, numid, ilevel)
|
||||||
|
|
||||||
self._add_list_item(
|
li = self._add_list_item(
|
||||||
doc=doc,
|
doc=doc,
|
||||||
numid=numid,
|
numid=numid,
|
||||||
ilevel=ilevel,
|
ilevel=ilevel,
|
||||||
elements=paragraph_elements,
|
elements=paragraph_elements,
|
||||||
is_numbered=is_numbered,
|
is_numbered=is_numbered,
|
||||||
)
|
)
|
||||||
|
elem_ref.extend(li) # MUST BE REF!!!
|
||||||
self._update_history(p_style_id, p_level, numid, ilevel)
|
self._update_history(p_style_id, p_level, numid, ilevel)
|
||||||
return
|
return elem_ref
|
||||||
elif (
|
elif (
|
||||||
numid is None
|
numid is None
|
||||||
and self._prev_numid() is not None
|
and self._prev_numid() is not None
|
||||||
@@ -860,9 +876,9 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
if p_style_id in ["Title"]:
|
if p_style_id in ["Title"]:
|
||||||
for key in range(len(self.parents)):
|
for key in range(len(self.parents)):
|
||||||
self.parents[key] = None
|
self.parents[key] = None
|
||||||
self.parents[0] = doc.add_text(
|
te = doc.add_text(parent=None, label=DocItemLabel.TITLE, text=text)
|
||||||
parent=None, label=DocItemLabel.TITLE, text=text
|
self.parents[0] = te
|
||||||
)
|
elem_ref.append(te.get_ref())
|
||||||
elif "Heading" in p_style_id:
|
elif "Heading" in p_style_id:
|
||||||
style_element = getattr(paragraph.style, "element", None)
|
style_element = getattr(paragraph.style, "element", None)
|
||||||
if style_element is not None:
|
if style_element is not None:
|
||||||
@@ -871,7 +887,8 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
is_numbered_style = False
|
is_numbered_style = False
|
||||||
self._add_header(doc, p_level, text, is_numbered_style)
|
h1 = self._add_header(doc, p_level, text, is_numbered_style)
|
||||||
|
elem_ref.extend(h1)
|
||||||
|
|
||||||
elif len(equations) > 0:
|
elif len(equations) > 0:
|
||||||
if (paragraph.text is None or len(paragraph.text.strip()) == 0) and len(
|
if (paragraph.text is None or len(paragraph.text.strip()) == 0) and len(
|
||||||
@@ -879,15 +896,17 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
) > 0:
|
) > 0:
|
||||||
# Standalone equation
|
# Standalone equation
|
||||||
level = self._get_level()
|
level = self._get_level()
|
||||||
doc.add_text(
|
t1 = doc.add_text(
|
||||||
label=DocItemLabel.FORMULA,
|
label=DocItemLabel.FORMULA,
|
||||||
parent=self.parents[level - 1],
|
parent=self.parents[level - 1],
|
||||||
text=text.replace("<eq>", "").replace("</eq>", ""),
|
text=text.replace("<eq>", "").replace("</eq>", ""),
|
||||||
)
|
)
|
||||||
|
elem_ref.append(t1.get_ref())
|
||||||
else:
|
else:
|
||||||
# Inline equation
|
# Inline equation
|
||||||
level = self._get_level()
|
level = self._get_level()
|
||||||
inline_equation = doc.add_inline_group(parent=self.parents[level - 1])
|
inline_equation = doc.add_inline_group(parent=self.parents[level - 1])
|
||||||
|
elem_ref.append(inline_equation.get_ref())
|
||||||
text_tmp = text
|
text_tmp = text
|
||||||
for eq in equations:
|
for eq in equations:
|
||||||
if len(text_tmp) == 0:
|
if len(text_tmp) == 0:
|
||||||
@@ -899,23 +918,26 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
text_tmp = "" if len(split_text_tmp) == 1 else split_text_tmp[1]
|
text_tmp = "" if len(split_text_tmp) == 1 else split_text_tmp[1]
|
||||||
|
|
||||||
if len(pre_eq_text) > 0:
|
if len(pre_eq_text) > 0:
|
||||||
doc.add_text(
|
e1 = doc.add_text(
|
||||||
label=DocItemLabel.PARAGRAPH,
|
label=DocItemLabel.TEXT,
|
||||||
parent=inline_equation,
|
parent=inline_equation,
|
||||||
text=pre_eq_text,
|
text=pre_eq_text,
|
||||||
)
|
)
|
||||||
doc.add_text(
|
elem_ref.append(e1.get_ref())
|
||||||
|
e2 = doc.add_text(
|
||||||
label=DocItemLabel.FORMULA,
|
label=DocItemLabel.FORMULA,
|
||||||
parent=inline_equation,
|
parent=inline_equation,
|
||||||
text=eq.replace("<eq>", "").replace("</eq>", ""),
|
text=eq.replace("<eq>", "").replace("</eq>", ""),
|
||||||
)
|
)
|
||||||
|
elem_ref.append(e2.get_ref())
|
||||||
|
|
||||||
if len(text_tmp) > 0:
|
if len(text_tmp) > 0:
|
||||||
doc.add_text(
|
e3 = doc.add_text(
|
||||||
label=DocItemLabel.PARAGRAPH,
|
label=DocItemLabel.TEXT,
|
||||||
parent=inline_equation,
|
parent=inline_equation,
|
||||||
text=text_tmp.strip(),
|
text=text_tmp.strip(),
|
||||||
)
|
)
|
||||||
|
elem_ref.append(e3.get_ref())
|
||||||
|
|
||||||
elif p_style_id in [
|
elif p_style_id in [
|
||||||
"Paragraph",
|
"Paragraph",
|
||||||
@@ -934,13 +956,14 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
paragraph_elements=paragraph_elements,
|
paragraph_elements=paragraph_elements,
|
||||||
)
|
)
|
||||||
for text, format, hyperlink in paragraph_elements:
|
for text, format, hyperlink in paragraph_elements:
|
||||||
doc.add_text(
|
t2 = doc.add_text(
|
||||||
label=DocItemLabel.PARAGRAPH,
|
label=DocItemLabel.TEXT,
|
||||||
parent=parent,
|
parent=parent,
|
||||||
text=text,
|
text=text,
|
||||||
formatting=format,
|
formatting=format,
|
||||||
hyperlink=hyperlink,
|
hyperlink=hyperlink,
|
||||||
)
|
)
|
||||||
|
elem_ref.append(t2.get_ref())
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# Text style names can, and will have, not only default values but user values too
|
# Text style names can, and will have, not only default values but user values too
|
||||||
@@ -952,16 +975,17 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
paragraph_elements=paragraph_elements,
|
paragraph_elements=paragraph_elements,
|
||||||
)
|
)
|
||||||
for text, format, hyperlink in paragraph_elements:
|
for text, format, hyperlink in paragraph_elements:
|
||||||
doc.add_text(
|
t3 = doc.add_text(
|
||||||
label=DocItemLabel.PARAGRAPH,
|
label=DocItemLabel.TEXT,
|
||||||
parent=parent,
|
parent=parent,
|
||||||
text=text,
|
text=text,
|
||||||
formatting=format,
|
formatting=format,
|
||||||
hyperlink=hyperlink,
|
hyperlink=hyperlink,
|
||||||
)
|
)
|
||||||
|
elem_ref.append(t3.get_ref())
|
||||||
|
|
||||||
self._update_history(p_style_id, p_level, numid, ilevel)
|
self._update_history(p_style_id, p_level, numid, ilevel)
|
||||||
return
|
return elem_ref
|
||||||
|
|
||||||
def _add_header(
|
def _add_header(
|
||||||
self,
|
self,
|
||||||
@@ -969,17 +993,21 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
curr_level: Optional[int],
|
curr_level: Optional[int],
|
||||||
text: str,
|
text: str,
|
||||||
is_numbered_style: bool = False,
|
is_numbered_style: bool = False,
|
||||||
) -> None:
|
) -> List[RefItem]:
|
||||||
|
elem_ref: List[RefItem] = []
|
||||||
level = self._get_level()
|
level = self._get_level()
|
||||||
if isinstance(curr_level, int):
|
if isinstance(curr_level, int):
|
||||||
if curr_level > level:
|
if curr_level > level:
|
||||||
# add invisible group
|
# add invisible group
|
||||||
for i in range(level, curr_level):
|
for i in range(level, curr_level):
|
||||||
self.parents[i] = doc.add_group(
|
gr1 = doc.add_group(
|
||||||
parent=self.parents[i - 1],
|
parent=self.parents[i - 1],
|
||||||
label=GroupLabel.SECTION,
|
label=GroupLabel.SECTION,
|
||||||
name=f"header-{i}",
|
name=f"header-{i}",
|
||||||
)
|
)
|
||||||
|
elem_ref.append(gr1.get_ref())
|
||||||
|
self.parents[i] = gr1
|
||||||
|
|
||||||
elif curr_level < level:
|
elif curr_level < level:
|
||||||
# remove the tail
|
# remove the tail
|
||||||
for key in range(len(self.parents)):
|
for key in range(len(self.parents)):
|
||||||
@@ -1019,12 +1047,14 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
text = f"{self.numbered_headers[previous_level]}.{text}"
|
text = f"{self.numbered_headers[previous_level]}.{text}"
|
||||||
previous_level -= 1
|
previous_level -= 1
|
||||||
|
|
||||||
self.parents[current_level] = doc.add_heading(
|
hd = doc.add_heading(
|
||||||
parent=self.parents[parent_level],
|
parent=self.parents[parent_level],
|
||||||
text=text,
|
text=text,
|
||||||
level=add_level,
|
level=add_level,
|
||||||
)
|
)
|
||||||
return
|
self.parents[current_level] = hd
|
||||||
|
elem_ref.append(hd.get_ref())
|
||||||
|
return elem_ref
|
||||||
|
|
||||||
def _add_formatted_list_item(
|
def _add_formatted_list_item(
|
||||||
self,
|
self,
|
||||||
@@ -1033,12 +1063,13 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
marker: str,
|
marker: str,
|
||||||
enumerated: bool,
|
enumerated: bool,
|
||||||
level: int,
|
level: int,
|
||||||
) -> None:
|
) -> List[RefItem]:
|
||||||
|
elem_ref: List[RefItem] = []
|
||||||
# This should not happen by construction
|
# This should not happen by construction
|
||||||
if not isinstance(self.parents[level], ListGroup):
|
if not isinstance(self.parents[level], ListGroup):
|
||||||
return
|
return elem_ref
|
||||||
if not elements:
|
if not elements:
|
||||||
return
|
return elem_ref
|
||||||
|
|
||||||
if len(elements) == 1:
|
if len(elements) == 1:
|
||||||
text, format, hyperlink = elements[0]
|
text, format, hyperlink = elements[0]
|
||||||
@@ -1068,6 +1099,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
formatting=format,
|
formatting=format,
|
||||||
hyperlink=hyperlink,
|
hyperlink=hyperlink,
|
||||||
)
|
)
|
||||||
|
return elem_ref
|
||||||
|
|
||||||
def _add_list_item(
|
def _add_list_item(
|
||||||
self,
|
self,
|
||||||
@@ -1077,10 +1109,11 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
ilevel: int,
|
ilevel: int,
|
||||||
elements: list,
|
elements: list,
|
||||||
is_numbered: bool = False,
|
is_numbered: bool = False,
|
||||||
) -> None:
|
) -> List[RefItem]:
|
||||||
# TODO: this method is always called with is_numbered. Numbered lists should be properly addressed.
|
elem_ref: List[RefItem] = []
|
||||||
|
# this method is always called with is_numbered. Numbered lists should be properly addressed.
|
||||||
if not elements:
|
if not elements:
|
||||||
return None
|
return elem_ref
|
||||||
enum_marker = ""
|
enum_marker = ""
|
||||||
|
|
||||||
level = self._get_level()
|
level = self._get_level()
|
||||||
@@ -1091,9 +1124,9 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
# Reset counters for the new numbering sequence
|
# Reset counters for the new numbering sequence
|
||||||
self._reset_list_counters_for_new_sequence(numid)
|
self._reset_list_counters_for_new_sequence(numid)
|
||||||
|
|
||||||
self.parents[level] = doc.add_list_group(
|
list_gr = doc.add_list_group(name="list", parent=self.parents[level - 1])
|
||||||
name="list", parent=self.parents[level - 1]
|
self.parents[level] = list_gr
|
||||||
)
|
elem_ref.append(list_gr.get_ref())
|
||||||
|
|
||||||
# Set marker and enumerated arguments if this is an enumeration element.
|
# Set marker and enumerated arguments if this is an enumeration element.
|
||||||
if is_numbered:
|
if is_numbered:
|
||||||
@@ -1114,9 +1147,9 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
self.level_at_new_list + prev_indent + 1,
|
self.level_at_new_list + prev_indent + 1,
|
||||||
self.level_at_new_list + ilevel + 1,
|
self.level_at_new_list + ilevel + 1,
|
||||||
):
|
):
|
||||||
self.parents[i] = doc.add_list_group(
|
list_gr1 = doc.add_list_group(name="list", parent=self.parents[i - 1])
|
||||||
name="list", parent=self.parents[i - 1]
|
self.parents[i] = list_gr1
|
||||||
)
|
elem_ref.append(list_gr1.get_ref())
|
||||||
|
|
||||||
# TODO: Set marker and enumerated arguments if this is an enumeration element.
|
# TODO: Set marker and enumerated arguments if this is an enumeration element.
|
||||||
if is_numbered:
|
if is_numbered:
|
||||||
@@ -1156,7 +1189,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
)
|
)
|
||||||
|
|
||||||
elif self._prev_numid() == numid or prev_indent == ilevel:
|
elif self._prev_numid() == numid or prev_indent == ilevel:
|
||||||
# TODO: Set marker and enumerated arguments if this is an enumeration element.
|
# Set marker and enumerated arguments if this is an enumeration element.
|
||||||
if is_numbered:
|
if is_numbered:
|
||||||
counter = self._get_list_counter(numid, ilevel)
|
counter = self._get_list_counter(numid, ilevel)
|
||||||
enum_marker = str(counter) + "."
|
enum_marker = str(counter) + "."
|
||||||
@@ -1165,15 +1198,15 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
self._add_formatted_list_item(
|
self._add_formatted_list_item(
|
||||||
doc, elements, enum_marker, is_numbered, level - 1
|
doc, elements, enum_marker, is_numbered, level - 1
|
||||||
)
|
)
|
||||||
|
return elem_ref
|
||||||
return
|
|
||||||
|
|
||||||
def _handle_tables(
|
def _handle_tables(
|
||||||
self,
|
self,
|
||||||
element: BaseOxmlElement,
|
element: BaseOxmlElement,
|
||||||
docx_obj: DocxDocument,
|
docx_obj: DocxDocument,
|
||||||
doc: DoclingDocument,
|
doc: DoclingDocument,
|
||||||
) -> None:
|
) -> List[RefItem]:
|
||||||
|
elem_ref: List[RefItem] = []
|
||||||
table: Table = Table(element, docx_obj)
|
table: Table = Table(element, docx_obj)
|
||||||
num_rows = len(table.rows)
|
num_rows = len(table.rows)
|
||||||
num_cols = len(table.columns)
|
num_cols = len(table.columns)
|
||||||
@@ -1184,9 +1217,13 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
# In case we have a table of only 1 cell, we consider it furniture
|
# In case we have a table of only 1 cell, we consider it furniture
|
||||||
# And proceed processing the content of the cell as though it's in the document body
|
# And proceed processing the content of the cell as though it's in the document body
|
||||||
self._walk_linear(cell_element._element, docx_obj, doc)
|
self._walk_linear(cell_element._element, docx_obj, doc)
|
||||||
return
|
return elem_ref
|
||||||
|
|
||||||
data = TableData(num_rows=num_rows, num_cols=num_cols)
|
data = TableData(num_rows=num_rows, num_cols=num_cols)
|
||||||
|
level = self._get_level()
|
||||||
|
docling_table = doc.add_table(data=data, parent=self.parents[level - 1])
|
||||||
|
elem_ref.append(docling_table.get_ref())
|
||||||
|
|
||||||
cell_set: set[CT_Tc] = set()
|
cell_set: set[CT_Tc] = set()
|
||||||
for row_idx, row in enumerate(table.rows):
|
for row_idx, row in enumerate(table.rows):
|
||||||
_log.debug(f"Row index {row_idx} with {len(row.cells)} populated cells")
|
_log.debug(f"Row index {row_idx} with {len(row.cells)} populated cells")
|
||||||
@@ -1223,7 +1260,70 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
else:
|
else:
|
||||||
text = text.replace("<eq>", "$").replace("</eq>", "$")
|
text = text.replace("<eq>", "$").replace("</eq>", "$")
|
||||||
|
|
||||||
table_cell = TableCell(
|
provs_in_cell: List[RefItem] = []
|
||||||
|
_, provs_in_cell = self._walk_linear(cell._element, docx_obj, doc)
|
||||||
|
ref_for_rich_cell = provs_in_cell[0]
|
||||||
|
rich_table_cell = False
|
||||||
|
|
||||||
|
def group_cell_elements(
|
||||||
|
group_name: str, doc: DoclingDocument, provs_in_cell: List[RefItem]
|
||||||
|
) -> RefItem:
|
||||||
|
group_element = doc.add_group(
|
||||||
|
label=GroupLabel.UNSPECIFIED,
|
||||||
|
name=group_name,
|
||||||
|
parent=docling_table,
|
||||||
|
)
|
||||||
|
for prov in provs_in_cell:
|
||||||
|
group_element.children.append(prov)
|
||||||
|
pr_item = prov.resolve(doc)
|
||||||
|
item_parent = pr_item.parent.resolve(doc)
|
||||||
|
if pr_item.get_ref() in item_parent.children:
|
||||||
|
item_parent.children.remove(pr_item.get_ref())
|
||||||
|
pr_item.parent = group_element.get_ref()
|
||||||
|
ref_for_rich_cell = group_element.get_ref()
|
||||||
|
return ref_for_rich_cell
|
||||||
|
|
||||||
|
if len(provs_in_cell) > 1:
|
||||||
|
# Cell has multiple elements, we need to group them
|
||||||
|
rich_table_cell = True
|
||||||
|
group_name = f"rich_cell_group_{len(doc.tables)}_{col_idx}_{row.grid_cols_before + row_idx}"
|
||||||
|
ref_for_rich_cell = group_cell_elements(
|
||||||
|
group_name, doc, provs_in_cell
|
||||||
|
)
|
||||||
|
|
||||||
|
elif len(provs_in_cell) == 1:
|
||||||
|
item_ref = provs_in_cell[0]
|
||||||
|
pr_item = item_ref.resolve(doc)
|
||||||
|
if isinstance(pr_item, TextItem):
|
||||||
|
# Cell has only one element and it's just a text
|
||||||
|
rich_table_cell = False
|
||||||
|
doc.delete_items(node_items=[pr_item])
|
||||||
|
else:
|
||||||
|
rich_table_cell = True
|
||||||
|
group_name = f"rich_cell_group_{len(doc.tables)}_{col_idx}_{row.grid_cols_before + row_idx}"
|
||||||
|
ref_for_rich_cell = group_cell_elements(
|
||||||
|
group_name, doc, provs_in_cell
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
rich_table_cell = False
|
||||||
|
|
||||||
|
if rich_table_cell:
|
||||||
|
rich_cell = RichTableCell(
|
||||||
|
text=text,
|
||||||
|
row_span=spanned_idx - row_idx,
|
||||||
|
col_span=cell.grid_span,
|
||||||
|
start_row_offset_idx=row.grid_cols_before + row_idx,
|
||||||
|
end_row_offset_idx=row.grid_cols_before + spanned_idx,
|
||||||
|
start_col_offset_idx=col_idx,
|
||||||
|
end_col_offset_idx=col_idx + cell.grid_span,
|
||||||
|
column_header=row.grid_cols_before + row_idx == 0,
|
||||||
|
row_header=False,
|
||||||
|
ref=ref_for_rich_cell, # points to an artificial group around children
|
||||||
|
)
|
||||||
|
doc.add_table_cell(table_item=docling_table, cell=rich_cell)
|
||||||
|
col_idx += cell.grid_span
|
||||||
|
else:
|
||||||
|
simple_cell = TableCell(
|
||||||
text=text,
|
text=text,
|
||||||
row_span=spanned_idx - row_idx,
|
row_span=spanned_idx - row_idx,
|
||||||
col_span=cell.grid_span,
|
col_span=cell.grid_span,
|
||||||
@@ -1234,16 +1334,13 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
column_header=row.grid_cols_before + row_idx == 0,
|
column_header=row.grid_cols_before + row_idx == 0,
|
||||||
row_header=False,
|
row_header=False,
|
||||||
)
|
)
|
||||||
data.table_cells.append(table_cell)
|
doc.add_table_cell(table_item=docling_table, cell=simple_cell)
|
||||||
col_idx += cell.grid_span
|
col_idx += cell.grid_span
|
||||||
|
return elem_ref
|
||||||
level = self._get_level()
|
|
||||||
doc.add_table(data=data, parent=self.parents[level - 1])
|
|
||||||
return
|
|
||||||
|
|
||||||
def _handle_pictures(
|
def _handle_pictures(
|
||||||
self, docx_obj: DocxDocument, drawing_blip: Any, doc: DoclingDocument
|
self, docx_obj: DocxDocument, drawing_blip: Any, doc: DoclingDocument
|
||||||
) -> None:
|
) -> List[RefItem]:
|
||||||
def get_docx_image(drawing_blip: Any) -> Optional[bytes]:
|
def get_docx_image(drawing_blip: Any) -> Optional[bytes]:
|
||||||
image_data: Optional[bytes] = None
|
image_data: Optional[bytes] = None
|
||||||
rId = drawing_blip[0].get(
|
rId = drawing_blip[0].get(
|
||||||
@@ -1255,28 +1352,32 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
image_data = image_part.blob # Get the binary image data
|
image_data = image_part.blob # Get the binary image data
|
||||||
return image_data
|
return image_data
|
||||||
|
|
||||||
|
elem_ref: List[RefItem] = []
|
||||||
level = self._get_level()
|
level = self._get_level()
|
||||||
# Open the BytesIO object with PIL to create an Image
|
# Open the BytesIO object with PIL to create an Image
|
||||||
image_data: Optional[bytes] = get_docx_image(drawing_blip)
|
image_data: Optional[bytes] = get_docx_image(drawing_blip)
|
||||||
if image_data is None:
|
if image_data is None:
|
||||||
_log.warning("Warning: image cannot be found")
|
_log.warning("Warning: image cannot be found")
|
||||||
doc.add_picture(
|
p1 = doc.add_picture(
|
||||||
parent=self.parents[level - 1],
|
parent=self.parents[level - 1],
|
||||||
caption=None,
|
caption=None,
|
||||||
)
|
)
|
||||||
|
elem_ref.append(p1.get_ref())
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
image_bytes = BytesIO(image_data)
|
image_bytes = BytesIO(image_data)
|
||||||
pil_image = Image.open(image_bytes)
|
pil_image = Image.open(image_bytes)
|
||||||
doc.add_picture(
|
p2 = doc.add_picture(
|
||||||
parent=self.parents[level - 1],
|
parent=self.parents[level - 1],
|
||||||
image=ImageRef.from_pil(image=pil_image, dpi=72),
|
image=ImageRef.from_pil(image=pil_image, dpi=72),
|
||||||
caption=None,
|
caption=None,
|
||||||
)
|
)
|
||||||
|
elem_ref.append(p2.get_ref())
|
||||||
except (UnidentifiedImageError, OSError):
|
except (UnidentifiedImageError, OSError):
|
||||||
_log.warning("Warning: image cannot be loaded by Pillow")
|
_log.warning("Warning: image cannot be loaded by Pillow")
|
||||||
doc.add_picture(
|
p3 = doc.add_picture(
|
||||||
parent=self.parents[level - 1],
|
parent=self.parents[level - 1],
|
||||||
caption=None,
|
caption=None,
|
||||||
)
|
)
|
||||||
return
|
elem_ref.append(p3.get_ref())
|
||||||
|
return elem_ref
|
||||||
|
|||||||
@@ -1,40 +1,40 @@
|
|||||||
item-0 at level 0: unspecified: group _root_
|
item-0 at level 0: unspecified: group _root_
|
||||||
item-1 at level 1: inline: group group
|
item-1 at level 1: inline: group group
|
||||||
item-2 at level 2: paragraph: This is a word document and this is an inline equation:
|
item-2 at level 2: text: This is a word document and this is an inline equation:
|
||||||
item-3 at level 2: formula: A= \pi r^{2}
|
item-3 at level 2: formula: A= \pi r^{2}
|
||||||
item-4 at level 2: paragraph: . If instead, I want an equation by line, I can do this:
|
item-4 at level 2: text: . If instead, I want an equation by line, I can do this:
|
||||||
item-5 at level 1: paragraph:
|
item-5 at level 1: text:
|
||||||
item-6 at level 1: formula: a^{2}+b^{2}=c^{2} \text{ \texttimes } 23
|
item-6 at level 1: formula: a^{2}+b^{2}=c^{2} \text{ \texttimes } 23
|
||||||
item-7 at level 1: paragraph: And that is an equation by itself. Cheers!
|
item-7 at level 1: text: And that is an equation by itself. Cheers!
|
||||||
item-8 at level 1: paragraph:
|
item-8 at level 1: text:
|
||||||
item-9 at level 1: paragraph: This is another equation:
|
item-9 at level 1: text: This is another equation:
|
||||||
item-10 at level 1: formula: f\left(x\right)=a_{0}+\sum_{n=1} ... })+b_{n}\sin(\frac{n \pi x}{L})\right)
|
item-10 at level 1: formula: f\left(x\right)=a_{0}+\sum_{n=1} ... })+b_{n}\sin(\frac{n \pi x}{L})\right)
|
||||||
item-11 at level 1: paragraph:
|
item-11 at level 1: text:
|
||||||
item-12 at level 1: paragraph: This is text. This is text. This ... s is text. This is text. This is text.
|
item-12 at level 1: text: This is text. This is text. This ... s is text. This is text. This is text.
|
||||||
item-13 at level 1: paragraph:
|
item-13 at level 1: text:
|
||||||
item-14 at level 1: paragraph:
|
item-14 at level 1: text:
|
||||||
item-15 at level 1: inline: group group
|
item-15 at level 1: inline: group group
|
||||||
item-16 at level 2: paragraph: This is a word document and this is an inline equation:
|
item-16 at level 2: text: This is a word document and this is an inline equation:
|
||||||
item-17 at level 2: formula: A= \pi r^{2}
|
item-17 at level 2: formula: A= \pi r^{2}
|
||||||
item-18 at level 2: paragraph: . If instead, I want an equation by line, I can do this:
|
item-18 at level 2: text: . If instead, I want an equation by line, I can do this:
|
||||||
item-19 at level 1: paragraph:
|
item-19 at level 1: text:
|
||||||
item-20 at level 1: formula: \left(x+a\right)^{n}=\sum_{k=0}^ ... ac{}{}{0pt}{}{n}{k}\right)x^{k}a^{n-k}
|
item-20 at level 1: formula: \left(x+a\right)^{n}=\sum_{k=0}^ ... ac{}{}{0pt}{}{n}{k}\right)x^{k}a^{n-k}
|
||||||
item-21 at level 1: paragraph:
|
item-21 at level 1: text:
|
||||||
item-22 at level 1: paragraph: And that is an equation by itself. Cheers!
|
item-22 at level 1: text: And that is an equation by itself. Cheers!
|
||||||
item-23 at level 1: paragraph:
|
item-23 at level 1: text:
|
||||||
item-24 at level 1: paragraph: This is another equation:
|
item-24 at level 1: text: This is another equation:
|
||||||
item-25 at level 1: paragraph:
|
item-25 at level 1: text:
|
||||||
item-26 at level 1: formula: \left(1+x\right)^{n}=1+\frac{nx} ... ght)x^{2}}{2!}+ \text{ \textellipsis }
|
item-26 at level 1: formula: \left(1+x\right)^{n}=1+\frac{nx} ... ght)x^{2}}{2!}+ \text{ \textellipsis }
|
||||||
item-27 at level 1: paragraph:
|
item-27 at level 1: text:
|
||||||
item-28 at level 1: paragraph: This is text. This is text. This ... s is text. This is text. This is text.
|
item-28 at level 1: text: This is text. This is text. This ... s is text. This is text. This is text.
|
||||||
item-29 at level 1: paragraph:
|
item-29 at level 1: text:
|
||||||
item-30 at level 1: paragraph:
|
item-30 at level 1: text:
|
||||||
item-31 at level 1: inline: group group
|
item-31 at level 1: inline: group group
|
||||||
item-32 at level 2: paragraph: This is a word document and this is an inline equation:
|
item-32 at level 2: text: This is a word document and this is an inline equation:
|
||||||
item-33 at level 2: formula: A= \pi r^{2}
|
item-33 at level 2: formula: A= \pi r^{2}
|
||||||
item-34 at level 2: paragraph: . If instead, I want an equation by line, I can do this:
|
item-34 at level 2: text: . If instead, I want an equation by line, I can do this:
|
||||||
item-35 at level 1: paragraph:
|
item-35 at level 1: text:
|
||||||
item-36 at level 1: formula: e^{x}=1+\frac{x}{1!}+\frac{x^{2} ... xtellipsis } , - \infty < x < \infty
|
item-36 at level 1: formula: e^{x}=1+\frac{x}{1!}+\frac{x^{2} ... xtellipsis } , - \infty < x < \infty
|
||||||
item-37 at level 1: paragraph:
|
item-37 at level 1: text:
|
||||||
item-38 at level 1: paragraph: And that is an equation by itself. Cheers!
|
item-38 at level 1: text: And that is an equation by itself. Cheers!
|
||||||
item-39 at level 1: paragraph:
|
item-39 at level 1: text:
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.6.0",
|
"version": "1.7.0",
|
||||||
"name": "equations",
|
"name": "equations",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
@@ -182,7 +182,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "This is a word document and this is an inline equation: ",
|
"orig": "This is a word document and this is an inline equation: ",
|
||||||
"text": "This is a word document and this is an inline equation: "
|
"text": "This is a word document and this is an inline equation: "
|
||||||
@@ -206,7 +206,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": ". If instead, I want an equation by line, I can do this:",
|
"orig": ". If instead, I want an equation by line, I can do this:",
|
||||||
"text": ". If instead, I want an equation by line, I can do this:"
|
"text": ". If instead, I want an equation by line, I can do this:"
|
||||||
@@ -218,7 +218,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -242,7 +242,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "And that is an equation by itself. Cheers!",
|
"orig": "And that is an equation by itself. Cheers!",
|
||||||
"text": "And that is an equation by itself. Cheers!",
|
"text": "And that is an equation by itself. Cheers!",
|
||||||
@@ -261,7 +261,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -273,7 +273,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "This is another equation:",
|
"orig": "This is another equation:",
|
||||||
"text": "This is another equation:",
|
"text": "This is another equation:",
|
||||||
@@ -304,7 +304,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -316,7 +316,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text.",
|
"orig": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text.",
|
||||||
"text": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text.",
|
"text": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text.",
|
||||||
@@ -335,7 +335,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -347,7 +347,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -359,7 +359,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "This is a word document and this is an inline equation: ",
|
"orig": "This is a word document and this is an inline equation: ",
|
||||||
"text": "This is a word document and this is an inline equation: "
|
"text": "This is a word document and this is an inline equation: "
|
||||||
@@ -383,7 +383,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": ". If instead, I want an equation by line, I can do this:",
|
"orig": ". If instead, I want an equation by line, I can do this:",
|
||||||
"text": ". If instead, I want an equation by line, I can do this:"
|
"text": ". If instead, I want an equation by line, I can do this:"
|
||||||
@@ -395,7 +395,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -419,7 +419,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -431,7 +431,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "And that is an equation by itself. Cheers!",
|
"orig": "And that is an equation by itself. Cheers!",
|
||||||
"text": "And that is an equation by itself. Cheers!",
|
"text": "And that is an equation by itself. Cheers!",
|
||||||
@@ -450,7 +450,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -462,7 +462,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "This is another equation:",
|
"orig": "This is another equation:",
|
||||||
"text": "This is another equation:",
|
"text": "This is another equation:",
|
||||||
@@ -481,7 +481,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -505,7 +505,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -517,7 +517,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text.",
|
"orig": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text.",
|
||||||
"text": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text.",
|
"text": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text.",
|
||||||
@@ -536,7 +536,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -548,7 +548,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -560,7 +560,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "This is a word document and this is an inline equation: ",
|
"orig": "This is a word document and this is an inline equation: ",
|
||||||
"text": "This is a word document and this is an inline equation: "
|
"text": "This is a word document and this is an inline equation: "
|
||||||
@@ -584,7 +584,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": ". If instead, I want an equation by line, I can do this:",
|
"orig": ". If instead, I want an equation by line, I can do this:",
|
||||||
"text": ". If instead, I want an equation by line, I can do this:"
|
"text": ". If instead, I want an equation by line, I can do this:"
|
||||||
@@ -596,7 +596,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -620,7 +620,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -632,7 +632,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "And that is an equation by itself. Cheers!",
|
"orig": "And that is an equation by itself. Cheers!",
|
||||||
"text": "And that is an equation by itself. Cheers!",
|
"text": "And that is an equation by itself. Cheers!",
|
||||||
@@ -651,7 +651,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
|
|||||||
@@ -1,10 +1,10 @@
|
|||||||
item-0 at level 0: unspecified: group _root_
|
item-0 at level 0: unspecified: group _root_
|
||||||
item-1 at level 1: paragraph: Lorem ipsum dolor sit amet, cons ... quam non, sodales sem. Nulla facilisi.
|
item-1 at level 1: text: Lorem ipsum dolor sit amet, cons ... quam non, sodales sem. Nulla facilisi.
|
||||||
item-2 at level 1: paragraph:
|
item-2 at level 1: text:
|
||||||
item-3 at level 1: paragraph: Duis condimentum dui eget ullamc ... cus tempor, et tristique ante aliquet.
|
item-3 at level 1: text: Duis condimentum dui eget ullamc ... cus tempor, et tristique ante aliquet.
|
||||||
item-4 at level 1: paragraph:
|
item-4 at level 1: text:
|
||||||
item-5 at level 1: paragraph: Maecenas id neque pharetra, elei ... ulla faucibus eu. Donec ut nisl metus.
|
item-5 at level 1: text: Maecenas id neque pharetra, elei ... ulla faucibus eu. Donec ut nisl metus.
|
||||||
item-6 at level 1: paragraph:
|
item-6 at level 1: text:
|
||||||
item-7 at level 1: paragraph: Duis ac tellus sed turpis feugia ... pellentesque rhoncus, blandit eu nisl.
|
item-7 at level 1: text: Duis ac tellus sed turpis feugia ... pellentesque rhoncus, blandit eu nisl.
|
||||||
item-8 at level 1: paragraph:
|
item-8 at level 1: text:
|
||||||
item-9 at level 1: paragraph: Nunc vehicula mattis erat ac con ... udin, vehicula turpis eu, tempus nibh.
|
item-9 at level 1: text: Nunc vehicula mattis erat ac con ... udin, vehicula turpis eu, tempus nibh.
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.6.0",
|
"version": "1.7.0",
|
||||||
"name": "lorem_ipsum",
|
"name": "lorem_ipsum",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
@@ -58,7 +58,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin elit mi, fermentum vitae dolor facilisis, porttitor mollis quam. Cras quam massa, venenatis faucibus libero vel, euismod sollicitudin ipsum. Aliquam semper sapien leo, ac ultrices nibh mollis congue. Cras luctus ultrices est, ut scelerisque eros euismod ut. Curabitur ac tincidunt felis, non scelerisque lectus. Praesent sollicitudin vulputate est id consequat. Vestibulum pharetra ligula sit amet varius porttitor. Sed eros diam, gravida non varius at, scelerisque in libero. Ut auctor finibus mauris sit amet ornare. Sed facilisis leo at urna rhoncus, in facilisis arcu eleifend. Sed tincidunt lacinia fermentum. Cras non purus fringilla, semper quam non, sodales sem. Nulla facilisi.",
|
"orig": "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin elit mi, fermentum vitae dolor facilisis, porttitor mollis quam. Cras quam massa, venenatis faucibus libero vel, euismod sollicitudin ipsum. Aliquam semper sapien leo, ac ultrices nibh mollis congue. Cras luctus ultrices est, ut scelerisque eros euismod ut. Curabitur ac tincidunt felis, non scelerisque lectus. Praesent sollicitudin vulputate est id consequat. Vestibulum pharetra ligula sit amet varius porttitor. Sed eros diam, gravida non varius at, scelerisque in libero. Ut auctor finibus mauris sit amet ornare. Sed facilisis leo at urna rhoncus, in facilisis arcu eleifend. Sed tincidunt lacinia fermentum. Cras non purus fringilla, semper quam non, sodales sem. Nulla facilisi.",
|
||||||
"text": "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin elit mi, fermentum vitae dolor facilisis, porttitor mollis quam. Cras quam massa, venenatis faucibus libero vel, euismod sollicitudin ipsum. Aliquam semper sapien leo, ac ultrices nibh mollis congue. Cras luctus ultrices est, ut scelerisque eros euismod ut. Curabitur ac tincidunt felis, non scelerisque lectus. Praesent sollicitudin vulputate est id consequat. Vestibulum pharetra ligula sit amet varius porttitor. Sed eros diam, gravida non varius at, scelerisque in libero. Ut auctor finibus mauris sit amet ornare. Sed facilisis leo at urna rhoncus, in facilisis arcu eleifend. Sed tincidunt lacinia fermentum. Cras non purus fringilla, semper quam non, sodales sem. Nulla facilisi.",
|
"text": "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin elit mi, fermentum vitae dolor facilisis, porttitor mollis quam. Cras quam massa, venenatis faucibus libero vel, euismod sollicitudin ipsum. Aliquam semper sapien leo, ac ultrices nibh mollis congue. Cras luctus ultrices est, ut scelerisque eros euismod ut. Curabitur ac tincidunt felis, non scelerisque lectus. Praesent sollicitudin vulputate est id consequat. Vestibulum pharetra ligula sit amet varius porttitor. Sed eros diam, gravida non varius at, scelerisque in libero. Ut auctor finibus mauris sit amet ornare. Sed facilisis leo at urna rhoncus, in facilisis arcu eleifend. Sed tincidunt lacinia fermentum. Cras non purus fringilla, semper quam non, sodales sem. Nulla facilisi.",
|
||||||
@@ -77,7 +77,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -89,7 +89,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Duis condimentum dui eget ullamcorper maximus. Nulla tortor lectus, hendrerit at diam fermentum, euismod ornare orci. Integer ac mauris sed augue ultricies pellentesque. Etiam condimentum turpis a risus dictum, sed tempor arcu vestibulum. Quisque at venenatis tellus. Morbi id lobortis elit. In gravida metus at ornare suscipit. Donec euismod nibh sit amet commodo porttitor. Integer commodo sit amet nisi vel accumsan. Donec lacinia posuere porta. Pellentesque vulputate porta risus, vel consectetur nisl gravida sit amet. Nam scelerisque enim sodales lacus tempor, et tristique ante aliquet.",
|
"orig": "Duis condimentum dui eget ullamcorper maximus. Nulla tortor lectus, hendrerit at diam fermentum, euismod ornare orci. Integer ac mauris sed augue ultricies pellentesque. Etiam condimentum turpis a risus dictum, sed tempor arcu vestibulum. Quisque at venenatis tellus. Morbi id lobortis elit. In gravida metus at ornare suscipit. Donec euismod nibh sit amet commodo porttitor. Integer commodo sit amet nisi vel accumsan. Donec lacinia posuere porta. Pellentesque vulputate porta risus, vel consectetur nisl gravida sit amet. Nam scelerisque enim sodales lacus tempor, et tristique ante aliquet.",
|
||||||
"text": "Duis condimentum dui eget ullamcorper maximus. Nulla tortor lectus, hendrerit at diam fermentum, euismod ornare orci. Integer ac mauris sed augue ultricies pellentesque. Etiam condimentum turpis a risus dictum, sed tempor arcu vestibulum. Quisque at venenatis tellus. Morbi id lobortis elit. In gravida metus at ornare suscipit. Donec euismod nibh sit amet commodo porttitor. Integer commodo sit amet nisi vel accumsan. Donec lacinia posuere porta. Pellentesque vulputate porta risus, vel consectetur nisl gravida sit amet. Nam scelerisque enim sodales lacus tempor, et tristique ante aliquet.",
|
"text": "Duis condimentum dui eget ullamcorper maximus. Nulla tortor lectus, hendrerit at diam fermentum, euismod ornare orci. Integer ac mauris sed augue ultricies pellentesque. Etiam condimentum turpis a risus dictum, sed tempor arcu vestibulum. Quisque at venenatis tellus. Morbi id lobortis elit. In gravida metus at ornare suscipit. Donec euismod nibh sit amet commodo porttitor. Integer commodo sit amet nisi vel accumsan. Donec lacinia posuere porta. Pellentesque vulputate porta risus, vel consectetur nisl gravida sit amet. Nam scelerisque enim sodales lacus tempor, et tristique ante aliquet.",
|
||||||
@@ -108,7 +108,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -120,7 +120,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Maecenas id neque pharetra, eleifend lectus a, vehicula sapien. Aliquam erat volutpat. Ut arcu erat, blandit id elementum at, aliquet pretium mauris. Nulla at semper orci. Nunc sed maximus metus. Duis eget tristique arcu. Phasellus fringilla augue est, ut bibendum est bibendum vitae. Nam et urna interdum, egestas velit a, consectetur metus. Pellentesque facilisis vehicula orci, eu posuere justo imperdiet non. Vestibulum tincidunt orci ac lorem consequat semper. Fusce semper sollicitudin orci, id lacinia nulla faucibus eu. Donec ut nisl metus.",
|
"orig": "Maecenas id neque pharetra, eleifend lectus a, vehicula sapien. Aliquam erat volutpat. Ut arcu erat, blandit id elementum at, aliquet pretium mauris. Nulla at semper orci. Nunc sed maximus metus. Duis eget tristique arcu. Phasellus fringilla augue est, ut bibendum est bibendum vitae. Nam et urna interdum, egestas velit a, consectetur metus. Pellentesque facilisis vehicula orci, eu posuere justo imperdiet non. Vestibulum tincidunt orci ac lorem consequat semper. Fusce semper sollicitudin orci, id lacinia nulla faucibus eu. Donec ut nisl metus.",
|
||||||
"text": "Maecenas id neque pharetra, eleifend lectus a, vehicula sapien. Aliquam erat volutpat. Ut arcu erat, blandit id elementum at, aliquet pretium mauris. Nulla at semper orci. Nunc sed maximus metus. Duis eget tristique arcu. Phasellus fringilla augue est, ut bibendum est bibendum vitae. Nam et urna interdum, egestas velit a, consectetur metus. Pellentesque facilisis vehicula orci, eu posuere justo imperdiet non. Vestibulum tincidunt orci ac lorem consequat semper. Fusce semper sollicitudin orci, id lacinia nulla faucibus eu. Donec ut nisl metus.",
|
"text": "Maecenas id neque pharetra, eleifend lectus a, vehicula sapien. Aliquam erat volutpat. Ut arcu erat, blandit id elementum at, aliquet pretium mauris. Nulla at semper orci. Nunc sed maximus metus. Duis eget tristique arcu. Phasellus fringilla augue est, ut bibendum est bibendum vitae. Nam et urna interdum, egestas velit a, consectetur metus. Pellentesque facilisis vehicula orci, eu posuere justo imperdiet non. Vestibulum tincidunt orci ac lorem consequat semper. Fusce semper sollicitudin orci, id lacinia nulla faucibus eu. Donec ut nisl metus.",
|
||||||
@@ -139,7 +139,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -151,7 +151,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Duis ac tellus sed turpis feugiat aliquam sed vel justo. Fusce sit amet volutpat massa. Duis tristique finibus metus quis tincidunt. Etiam dapibus fringilla diam at pharetra. Vivamus dolor est, hendrerit ac ligula nec, pharetra lacinia sapien. Phasellus at malesuada orci. Maecenas est justo, mollis non ultrices ut, sagittis commodo odio. Integer viverra mauris pellentesque bibendum vestibulum. Sed eu felis mattis, efficitur justo non, finibus lorem. Phasellus viverra diam et sapien imperdiet interdum. Cras a convallis libero. Integer maximus dui vel lorem hendrerit, sit amet convallis ligula lobortis. Duis eu lacus elementum, scelerisque nunc eget, dignissim libero. Suspendisse mi quam, vehicula sit amet pellentesque rhoncus, blandit eu nisl.",
|
"orig": "Duis ac tellus sed turpis feugiat aliquam sed vel justo. Fusce sit amet volutpat massa. Duis tristique finibus metus quis tincidunt. Etiam dapibus fringilla diam at pharetra. Vivamus dolor est, hendrerit ac ligula nec, pharetra lacinia sapien. Phasellus at malesuada orci. Maecenas est justo, mollis non ultrices ut, sagittis commodo odio. Integer viverra mauris pellentesque bibendum vestibulum. Sed eu felis mattis, efficitur justo non, finibus lorem. Phasellus viverra diam et sapien imperdiet interdum. Cras a convallis libero. Integer maximus dui vel lorem hendrerit, sit amet convallis ligula lobortis. Duis eu lacus elementum, scelerisque nunc eget, dignissim libero. Suspendisse mi quam, vehicula sit amet pellentesque rhoncus, blandit eu nisl.",
|
||||||
"text": "Duis ac tellus sed turpis feugiat aliquam sed vel justo. Fusce sit amet volutpat massa. Duis tristique finibus metus quis tincidunt. Etiam dapibus fringilla diam at pharetra. Vivamus dolor est, hendrerit ac ligula nec, pharetra lacinia sapien. Phasellus at malesuada orci. Maecenas est justo, mollis non ultrices ut, sagittis commodo odio. Integer viverra mauris pellentesque bibendum vestibulum. Sed eu felis mattis, efficitur justo non, finibus lorem. Phasellus viverra diam et sapien imperdiet interdum. Cras a convallis libero. Integer maximus dui vel lorem hendrerit, sit amet convallis ligula lobortis. Duis eu lacus elementum, scelerisque nunc eget, dignissim libero. Suspendisse mi quam, vehicula sit amet pellentesque rhoncus, blandit eu nisl.",
|
"text": "Duis ac tellus sed turpis feugiat aliquam sed vel justo. Fusce sit amet volutpat massa. Duis tristique finibus metus quis tincidunt. Etiam dapibus fringilla diam at pharetra. Vivamus dolor est, hendrerit ac ligula nec, pharetra lacinia sapien. Phasellus at malesuada orci. Maecenas est justo, mollis non ultrices ut, sagittis commodo odio. Integer viverra mauris pellentesque bibendum vestibulum. Sed eu felis mattis, efficitur justo non, finibus lorem. Phasellus viverra diam et sapien imperdiet interdum. Cras a convallis libero. Integer maximus dui vel lorem hendrerit, sit amet convallis ligula lobortis. Duis eu lacus elementum, scelerisque nunc eget, dignissim libero. Suspendisse mi quam, vehicula sit amet pellentesque rhoncus, blandit eu nisl.",
|
||||||
@@ -170,7 +170,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -182,7 +182,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Nunc vehicula mattis erat ac consectetur. Etiam pharetra mauris ut tempor pellentesque. Sed vel libero vitae ante tempus sagittis vel sit amet dolor. Etiam faucibus viverra sodales. Pellentesque ullamcorper magna libero, non malesuada dui bibendum quis. Donec sed dolor non sem luctus volutpat. Morbi vel diam ut urna euismod gravida a id lectus. Vestibulum vel mauris eu tellus hendrerit dapibus. Etiam scelerisque lacus vel ante ultricies vulputate. In ullamcorper malesuada justo, vel scelerisque nisl lacinia at. Donec sodales interdum ipsum, ac bibendum ipsum pharetra interdum. Vivamus condimentum ac ante vel aliquam. Ut consectetur eu nibh nec gravida. Vestibulum accumsan, purus at mollis rutrum, sapien tortor accumsan purus, vitae fermentum urna mauris ut lacus. Fusce vitae leo sollicitudin, vehicula turpis eu, tempus nibh.",
|
"orig": "Nunc vehicula mattis erat ac consectetur. Etiam pharetra mauris ut tempor pellentesque. Sed vel libero vitae ante tempus sagittis vel sit amet dolor. Etiam faucibus viverra sodales. Pellentesque ullamcorper magna libero, non malesuada dui bibendum quis. Donec sed dolor non sem luctus volutpat. Morbi vel diam ut urna euismod gravida a id lectus. Vestibulum vel mauris eu tellus hendrerit dapibus. Etiam scelerisque lacus vel ante ultricies vulputate. In ullamcorper malesuada justo, vel scelerisque nisl lacinia at. Donec sodales interdum ipsum, ac bibendum ipsum pharetra interdum. Vivamus condimentum ac ante vel aliquam. Ut consectetur eu nibh nec gravida. Vestibulum accumsan, purus at mollis rutrum, sapien tortor accumsan purus, vitae fermentum urna mauris ut lacus. Fusce vitae leo sollicitudin, vehicula turpis eu, tempus nibh.",
|
||||||
"text": "Nunc vehicula mattis erat ac consectetur. Etiam pharetra mauris ut tempor pellentesque. Sed vel libero vitae ante tempus sagittis vel sit amet dolor. Etiam faucibus viverra sodales. Pellentesque ullamcorper magna libero, non malesuada dui bibendum quis. Donec sed dolor non sem luctus volutpat. Morbi vel diam ut urna euismod gravida a id lectus. Vestibulum vel mauris eu tellus hendrerit dapibus. Etiam scelerisque lacus vel ante ultricies vulputate. In ullamcorper malesuada justo, vel scelerisque nisl lacinia at. Donec sodales interdum ipsum, ac bibendum ipsum pharetra interdum. Vivamus condimentum ac ante vel aliquam. Ut consectetur eu nibh nec gravida. Vestibulum accumsan, purus at mollis rutrum, sapien tortor accumsan purus, vitae fermentum urna mauris ut lacus. Fusce vitae leo sollicitudin, vehicula turpis eu, tempus nibh.",
|
"text": "Nunc vehicula mattis erat ac consectetur. Etiam pharetra mauris ut tempor pellentesque. Sed vel libero vitae ante tempus sagittis vel sit amet dolor. Etiam faucibus viverra sodales. Pellentesque ullamcorper magna libero, non malesuada dui bibendum quis. Donec sed dolor non sem luctus volutpat. Morbi vel diam ut urna euismod gravida a id lectus. Vestibulum vel mauris eu tellus hendrerit dapibus. Etiam scelerisque lacus vel ante ultricies vulputate. In ullamcorper malesuada justo, vel scelerisque nisl lacinia at. Donec sodales interdum ipsum, ac bibendum ipsum pharetra interdum. Vivamus condimentum ac ante vel aliquam. Ut consectetur eu nibh nec gravida. Vestibulum accumsan, purus at mollis rutrum, sapien tortor accumsan purus, vitae fermentum urna mauris ut lacus. Fusce vitae leo sollicitudin, vehicula turpis eu, tempus nibh.",
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
item-0 at level 0: unspecified: group _root_
|
item-0 at level 0: unspecified: group _root_
|
||||||
item-1 at level 1: table with [2x2]
|
item-1 at level 1: table with [2x2]
|
||||||
item-2 at level 1: paragraph:
|
item-2 at level 1: text:
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.6.0",
|
"version": "1.7.0",
|
||||||
"name": "table_with_equations",
|
"name": "table_with_equations",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
@@ -37,7 +37,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -69,7 +69,8 @@
|
|||||||
"text": "The next cell has an equation",
|
"text": "The next cell has an equation",
|
||||||
"column_header": true,
|
"column_header": true,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -81,7 +82,8 @@
|
|||||||
"text": "$A= \\pi r^{2}$",
|
"text": "$A= \\pi r^{2}$",
|
||||||
"column_header": true,
|
"column_header": true,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -93,7 +95,8 @@
|
|||||||
"text": "The next cell has another equation",
|
"text": "The next cell has another equation",
|
||||||
"column_header": false,
|
"column_header": false,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -105,7 +108,8 @@
|
|||||||
"text": "$x=\\frac{-b \\pm \\sqrt{b^{2}-4ac}}{2a}$",
|
"text": "$x=\\frac{-b \\pm \\sqrt{b^{2}-4ac}}{2a}$",
|
||||||
"column_header": false,
|
"column_header": false,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"num_rows": 2,
|
"num_rows": 2,
|
||||||
@@ -122,7 +126,8 @@
|
|||||||
"text": "The next cell has an equation",
|
"text": "The next cell has an equation",
|
||||||
"column_header": true,
|
"column_header": true,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -134,7 +139,8 @@
|
|||||||
"text": "$A= \\pi r^{2}$",
|
"text": "$A= \\pi r^{2}$",
|
||||||
"column_header": true,
|
"column_header": true,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
@@ -148,7 +154,8 @@
|
|||||||
"text": "The next cell has another equation",
|
"text": "The next cell has another equation",
|
||||||
"column_header": false,
|
"column_header": false,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -160,7 +167,8 @@
|
|||||||
"text": "$x=\\frac{-b \\pm \\sqrt{b^{2}-4ac}}{2a}$",
|
"text": "$x=\\frac{-b \\pm \\sqrt{b^{2}-4ac}}{2a}$",
|
||||||
"column_header": false,
|
"column_header": false,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -2,9 +2,9 @@ item-0 at level 0: unspecified: group _root_
|
|||||||
item-1 at level 1: list: group list
|
item-1 at level 1: list: group list
|
||||||
item-2 at level 2: list_item: Hello world1
|
item-2 at level 2: list_item: Hello world1
|
||||||
item-3 at level 2: list_item: Hello2
|
item-3 at level 2: list_item: Hello2
|
||||||
item-4 at level 1: paragraph:
|
item-4 at level 1: text:
|
||||||
item-5 at level 1: paragraph: Some text before
|
item-5 at level 1: text: Some text before
|
||||||
item-6 at level 1: table with [3x3]
|
item-6 at level 1: table with [3x3]
|
||||||
item-7 at level 1: paragraph:
|
item-7 at level 1: text:
|
||||||
item-8 at level 1: paragraph:
|
item-8 at level 1: text:
|
||||||
item-9 at level 1: paragraph: Some text after
|
item-9 at level 1: text: Some text after
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.6.0",
|
"version": "1.7.0",
|
||||||
"name": "tablecell",
|
"name": "tablecell",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
@@ -112,7 +112,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -124,7 +124,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Some text before",
|
"orig": "Some text before",
|
||||||
"text": "Some text before",
|
"text": "Some text before",
|
||||||
@@ -143,7 +143,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -155,7 +155,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -167,7 +167,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Some text after",
|
"orig": "Some text after",
|
||||||
"text": "Some text after",
|
"text": "Some text after",
|
||||||
@@ -206,7 +206,8 @@
|
|||||||
"text": "Tab1",
|
"text": "Tab1",
|
||||||
"column_header": true,
|
"column_header": true,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -218,7 +219,8 @@
|
|||||||
"text": "Tab2",
|
"text": "Tab2",
|
||||||
"column_header": true,
|
"column_header": true,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -230,7 +232,8 @@
|
|||||||
"text": "Tab3",
|
"text": "Tab3",
|
||||||
"column_header": true,
|
"column_header": true,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -242,7 +245,8 @@
|
|||||||
"text": "A",
|
"text": "A",
|
||||||
"column_header": false,
|
"column_header": false,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -254,7 +258,8 @@
|
|||||||
"text": "B",
|
"text": "B",
|
||||||
"column_header": false,
|
"column_header": false,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -266,7 +271,8 @@
|
|||||||
"text": "C",
|
"text": "C",
|
||||||
"column_header": false,
|
"column_header": false,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -278,7 +284,8 @@
|
|||||||
"text": "D",
|
"text": "D",
|
||||||
"column_header": false,
|
"column_header": false,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -290,7 +297,8 @@
|
|||||||
"text": "E",
|
"text": "E",
|
||||||
"column_header": false,
|
"column_header": false,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -302,7 +310,8 @@
|
|||||||
"text": "F",
|
"text": "F",
|
||||||
"column_header": false,
|
"column_header": false,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"num_rows": 3,
|
"num_rows": 3,
|
||||||
@@ -319,7 +328,8 @@
|
|||||||
"text": "Tab1",
|
"text": "Tab1",
|
||||||
"column_header": true,
|
"column_header": true,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -331,7 +341,8 @@
|
|||||||
"text": "Tab2",
|
"text": "Tab2",
|
||||||
"column_header": true,
|
"column_header": true,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -343,7 +354,8 @@
|
|||||||
"text": "Tab3",
|
"text": "Tab3",
|
||||||
"column_header": true,
|
"column_header": true,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
@@ -357,7 +369,8 @@
|
|||||||
"text": "A",
|
"text": "A",
|
||||||
"column_header": false,
|
"column_header": false,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -369,7 +382,8 @@
|
|||||||
"text": "B",
|
"text": "B",
|
||||||
"column_header": false,
|
"column_header": false,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -381,7 +395,8 @@
|
|||||||
"text": "C",
|
"text": "C",
|
||||||
"column_header": false,
|
"column_header": false,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
@@ -395,7 +410,8 @@
|
|||||||
"text": "D",
|
"text": "D",
|
||||||
"column_header": false,
|
"column_header": false,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -407,7 +423,8 @@
|
|||||||
"text": "E",
|
"text": "E",
|
||||||
"column_header": false,
|
"column_header": false,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -419,7 +436,8 @@
|
|||||||
"text": "F",
|
"text": "F",
|
||||||
"column_header": false,
|
"column_header": false,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
item-0 at level 0: unspecified: group _root_
|
item-0 at level 0: unspecified: group _root_
|
||||||
item-1 at level 1: paragraph: Test with three images in unusual formats
|
item-1 at level 1: text: Test with three images in unusual formats
|
||||||
item-2 at level 1: paragraph: Raster in emf:
|
item-2 at level 1: text: Raster in emf:
|
||||||
item-3 at level 1: picture
|
item-3 at level 1: picture
|
||||||
item-4 at level 1: paragraph: Vector in emf:
|
item-4 at level 1: text: Vector in emf:
|
||||||
item-5 at level 1: picture
|
item-5 at level 1: picture
|
||||||
item-6 at level 1: paragraph: Raster in webp:
|
item-6 at level 1: text: Raster in webp:
|
||||||
item-7 at level 1: picture
|
item-7 at level 1: picture
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.6.0",
|
"version": "1.7.0",
|
||||||
"name": "test_emf_docx",
|
"name": "test_emf_docx",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
@@ -52,7 +52,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Test with three images in unusual formats",
|
"orig": "Test with three images in unusual formats",
|
||||||
"text": "Test with three images in unusual formats",
|
"text": "Test with three images in unusual formats",
|
||||||
@@ -71,7 +71,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Raster in emf:",
|
"orig": "Raster in emf:",
|
||||||
"text": "Raster in emf:",
|
"text": "Raster in emf:",
|
||||||
@@ -90,7 +90,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Vector in emf:",
|
"orig": "Vector in emf:",
|
||||||
"text": "Vector in emf:",
|
"text": "Vector in emf:",
|
||||||
@@ -109,7 +109,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Raster in webp:",
|
"orig": "Raster in webp:",
|
||||||
"text": "Raster in webp:",
|
"text": "Raster in webp:",
|
||||||
|
|||||||
120
tests/data/groundtruth/docling_v2/textbox.docx.itxt
vendored
120
tests/data/groundtruth/docling_v2/textbox.docx.itxt
vendored
@@ -1,90 +1,90 @@
|
|||||||
item-0 at level 0: unspecified: group _root_
|
item-0 at level 0: unspecified: group _root_
|
||||||
item-1 at level 1: paragraph: Chiayi County Shuishang Township ... mentary School Affiliated Kindergarten
|
item-1 at level 1: text: Chiayi County Shuishang Township ... mentary School Affiliated Kindergarten
|
||||||
item-2 at level 1: paragraph: Infectious Disease Reporting Pro ... r the 113th Academic Year Kindergarten
|
item-2 at level 1: text: Infectious Disease Reporting Pro ... r the 113th Academic Year Kindergarten
|
||||||
item-3 at level 1: paragraph:
|
item-3 at level 1: text:
|
||||||
item-4 at level 1: section: group textbox
|
item-4 at level 1: section: group textbox
|
||||||
item-5 at level 2: paragraph: Student falls ill
|
item-5 at level 2: text: Student falls ill
|
||||||
item-6 at level 2: paragraph:
|
item-6 at level 2: text:
|
||||||
item-7 at level 2: list: group list
|
item-7 at level 2: list: group list
|
||||||
item-8 at level 3: list_item: Suggested Reportable Symptoms:
|
item-8 at level 3: list_item: Suggested Reportable Symptoms:
|
||||||
* ... sh
|
* ... sh
|
||||||
* Blisters
|
* Blisters
|
||||||
* Headache
|
* Headache
|
||||||
* Sore throat
|
* Sore throat
|
||||||
item-9 at level 1: paragraph:
|
item-9 at level 1: text:
|
||||||
item-10 at level 1: paragraph:
|
item-10 at level 1: text:
|
||||||
item-11 at level 1: section: group textbox
|
item-11 at level 1: section: group textbox
|
||||||
item-12 at level 2: paragraph: If a caregiver suspects that wit ... the same suggested reportable symptoms
|
item-12 at level 2: text: If a caregiver suspects that wit ... the same suggested reportable symptoms
|
||||||
item-13 at level 1: paragraph:
|
item-13 at level 1: text:
|
||||||
item-14 at level 1: paragraph:
|
item-14 at level 1: text:
|
||||||
item-15 at level 1: paragraph:
|
item-15 at level 1: text:
|
||||||
item-16 at level 1: paragraph:
|
item-16 at level 1: text:
|
||||||
item-17 at level 1: section: group textbox
|
item-17 at level 1: section: group textbox
|
||||||
item-18 at level 2: paragraph: Yes
|
item-18 at level 2: text: Yes
|
||||||
item-19 at level 1: paragraph:
|
item-19 at level 1: text:
|
||||||
item-20 at level 1: paragraph:
|
item-20 at level 1: text:
|
||||||
item-21 at level 1: section: group textbox
|
item-21 at level 1: section: group textbox
|
||||||
item-22 at level 2: list: group list
|
item-22 at level 2: list: group list
|
||||||
item-23 at level 3: list_item: A report must be submitted withi ... saster Prevention Information Network.
|
item-23 at level 3: list_item: A report must be submitted withi ... saster Prevention Information Network.
|
||||||
item-24 at level 3: list_item: A report must also be submitted ... d Infectious Disease Reporting System.
|
item-24 at level 3: list_item: A report must also be submitted ... d Infectious Disease Reporting System.
|
||||||
item-25 at level 2: paragraph:
|
item-25 at level 2: text:
|
||||||
item-26 at level 1: list: group list
|
item-26 at level 1: list: group list
|
||||||
item-27 at level 1: paragraph:
|
item-27 at level 1: text:
|
||||||
item-28 at level 1: paragraph:
|
item-28 at level 1: text:
|
||||||
item-29 at level 1: paragraph:
|
item-29 at level 1: text:
|
||||||
item-30 at level 1: paragraph:
|
item-30 at level 1: text:
|
||||||
item-31 at level 1: paragraph:
|
item-31 at level 1: text:
|
||||||
item-32 at level 1: section: group textbox
|
item-32 at level 1: section: group textbox
|
||||||
item-33 at level 2: paragraph: Health Bureau:
|
item-33 at level 2: text: Health Bureau:
|
||||||
item-34 at level 2: paragraph: Upon receiving a report from the ... rt to the Centers for Disease Control.
|
item-34 at level 2: text: Upon receiving a report from the ... rt to the Centers for Disease Control.
|
||||||
item-35 at level 2: list: group list
|
item-35 at level 2: list: group list
|
||||||
item-36 at level 3: list_item: If necessary, provide health edu ... vidual to undergo specimen collection.
|
item-36 at level 3: list_item: If necessary, provide health edu ... vidual to undergo specimen collection.
|
||||||
item-37 at level 3: list_item: Implement appropriate epidemic p ... the Communicable Disease Control Act.
|
item-37 at level 3: list_item: Implement appropriate epidemic p ... the Communicable Disease Control Act.
|
||||||
item-38 at level 2: paragraph:
|
item-38 at level 2: text:
|
||||||
item-39 at level 1: list: group list
|
item-39 at level 1: list: group list
|
||||||
item-40 at level 1: paragraph:
|
item-40 at level 1: text:
|
||||||
item-41 at level 1: section: group textbox
|
item-41 at level 1: section: group textbox
|
||||||
item-42 at level 2: paragraph: Department of Education:
|
item-42 at level 2: text: Department of Education:
|
||||||
Collabo ... vention measures at all school levels.
|
Collabo ... vention measures at all school levels.
|
||||||
item-43 at level 1: paragraph:
|
item-43 at level 1: text:
|
||||||
item-44 at level 1: paragraph:
|
item-44 at level 1: text:
|
||||||
item-45 at level 1: paragraph:
|
item-45 at level 1: text:
|
||||||
item-46 at level 1: paragraph:
|
item-46 at level 1: text:
|
||||||
item-47 at level 1: paragraph:
|
item-47 at level 1: text:
|
||||||
item-48 at level 1: paragraph:
|
item-48 at level 1: text:
|
||||||
item-49 at level 1: paragraph:
|
item-49 at level 1: text:
|
||||||
item-50 at level 1: section: group textbox
|
item-50 at level 1: section: group textbox
|
||||||
item-51 at level 2: inline: group group
|
item-51 at level 2: inline: group group
|
||||||
item-52 at level 3: paragraph: The Health Bureau will handle
|
item-52 at level 3: text: The Health Bureau will handle
|
||||||
item-53 at level 3: paragraph: reporting and specimen collection
|
item-53 at level 3: text: reporting and specimen collection
|
||||||
item-54 at level 3: paragraph: .
|
item-54 at level 3: text: .
|
||||||
item-55 at level 2: paragraph:
|
item-55 at level 2: text:
|
||||||
item-56 at level 1: paragraph:
|
item-56 at level 1: text:
|
||||||
item-57 at level 1: paragraph:
|
item-57 at level 1: text:
|
||||||
item-58 at level 1: paragraph:
|
item-58 at level 1: text:
|
||||||
item-59 at level 1: section: group textbox
|
item-59 at level 1: section: group textbox
|
||||||
item-60 at level 2: paragraph: Whether the epidemic has eased.
|
item-60 at level 2: text: Whether the epidemic has eased.
|
||||||
item-61 at level 2: paragraph:
|
item-61 at level 2: text:
|
||||||
item-62 at level 1: paragraph:
|
item-62 at level 1: text:
|
||||||
item-63 at level 1: section: group textbox
|
item-63 at level 1: section: group textbox
|
||||||
item-64 at level 2: paragraph: Whether the test results are pos ... legally designated infectious disease.
|
item-64 at level 2: text: Whether the test results are pos ... legally designated infectious disease.
|
||||||
item-65 at level 2: paragraph: No
|
item-65 at level 2: text: No
|
||||||
item-66 at level 1: paragraph:
|
item-66 at level 1: text:
|
||||||
item-67 at level 1: paragraph:
|
item-67 at level 1: text:
|
||||||
item-68 at level 1: section: group textbox
|
item-68 at level 1: section: group textbox
|
||||||
item-69 at level 2: paragraph: Yes
|
item-69 at level 2: text: Yes
|
||||||
item-70 at level 1: paragraph:
|
item-70 at level 1: text:
|
||||||
item-71 at level 1: section: group textbox
|
item-71 at level 1: section: group textbox
|
||||||
item-72 at level 2: paragraph: Yes
|
item-72 at level 2: text: Yes
|
||||||
item-73 at level 1: paragraph:
|
item-73 at level 1: text:
|
||||||
item-74 at level 1: paragraph:
|
item-74 at level 1: text:
|
||||||
item-75 at level 1: section: group textbox
|
item-75 at level 1: section: group textbox
|
||||||
item-76 at level 2: paragraph: Case closed.
|
item-76 at level 2: text: Case closed.
|
||||||
item-77 at level 2: paragraph:
|
item-77 at level 2: text:
|
||||||
item-78 at level 2: paragraph: The Health Bureau will carry out ... ters for Disease Control if necessary.
|
item-78 at level 2: text: The Health Bureau will carry out ... ters for Disease Control if necessary.
|
||||||
item-79 at level 1: paragraph:
|
item-79 at level 1: text:
|
||||||
item-80 at level 1: section: group textbox
|
item-80 at level 1: section: group textbox
|
||||||
item-81 at level 2: paragraph: No
|
item-81 at level 2: text: No
|
||||||
item-82 at level 1: paragraph:
|
item-82 at level 1: text:
|
||||||
item-83 at level 1: paragraph:
|
item-83 at level 1: text:
|
||||||
item-84 at level 1: paragraph:
|
item-84 at level 1: text:
|
||||||
122
tests/data/groundtruth/docling_v2/textbox.docx.json
vendored
122
tests/data/groundtruth/docling_v2/textbox.docx.json
vendored
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.6.0",
|
"version": "1.7.0",
|
||||||
"name": "textbox",
|
"name": "textbox",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
@@ -491,7 +491,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Chiayi County Shuishang Township Nanjing Elementary School Affiliated Kindergarten",
|
"orig": "Chiayi County Shuishang Township Nanjing Elementary School Affiliated Kindergarten",
|
||||||
"text": "Chiayi County Shuishang Township Nanjing Elementary School Affiliated Kindergarten",
|
"text": "Chiayi County Shuishang Township Nanjing Elementary School Affiliated Kindergarten",
|
||||||
@@ -510,7 +510,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Infectious Disease Reporting Procedure for the 113th Academic Year Kindergarten",
|
"orig": "Infectious Disease Reporting Procedure for the 113th Academic Year Kindergarten",
|
||||||
"text": "Infectious Disease Reporting Procedure for the 113th Academic Year Kindergarten",
|
"text": "Infectious Disease Reporting Procedure for the 113th Academic Year Kindergarten",
|
||||||
@@ -529,7 +529,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -541,7 +541,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Student falls ill",
|
"orig": "Student falls ill",
|
||||||
"text": "Student falls ill",
|
"text": "Student falls ill",
|
||||||
@@ -560,7 +560,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -593,7 +593,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -605,7 +605,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -617,7 +617,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "If a caregiver suspects that within one week, a fifth of the class (for classes with more than 15 students) or more than three students (for classes with 15 or fewer students)\nshow the same suggested reportable symptoms",
|
"orig": "If a caregiver suspects that within one week, a fifth of the class (for classes with more than 15 students) or more than three students (for classes with 15 or fewer students)\nshow the same suggested reportable symptoms",
|
||||||
"text": "If a caregiver suspects that within one week, a fifth of the class (for classes with more than 15 students) or more than three students (for classes with 15 or fewer students)\nshow the same suggested reportable symptoms",
|
"text": "If a caregiver suspects that within one week, a fifth of the class (for classes with more than 15 students) or more than three students (for classes with 15 or fewer students)\nshow the same suggested reportable symptoms",
|
||||||
@@ -636,7 +636,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -648,7 +648,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -660,7 +660,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -672,7 +672,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -684,7 +684,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Yes",
|
"orig": "Yes",
|
||||||
"text": "Yes",
|
"text": "Yes",
|
||||||
@@ -703,7 +703,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -715,7 +715,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -769,7 +769,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -781,7 +781,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -793,7 +793,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -805,7 +805,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -817,7 +817,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -829,7 +829,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -841,7 +841,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Health Bureau:",
|
"orig": "Health Bureau:",
|
||||||
"text": "Health Bureau:",
|
"text": "Health Bureau:",
|
||||||
@@ -860,7 +860,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Upon receiving a report from the kindergarten, conduct a preliminary assessment of the case, and depending on the situation and type of illness, carry out an epidemiological investigation and report to the Centers for Disease Control.",
|
"orig": "Upon receiving a report from the kindergarten, conduct a preliminary assessment of the case, and depending on the situation and type of illness, carry out an epidemiological investigation and report to the Centers for Disease Control.",
|
||||||
"text": "Upon receiving a report from the kindergarten, conduct a preliminary assessment of the case, and depending on the situation and type of illness, carry out an epidemiological investigation and report to the Centers for Disease Control.",
|
"text": "Upon receiving a report from the kindergarten, conduct a preliminary assessment of the case, and depending on the situation and type of illness, carry out an epidemiological investigation and report to the Centers for Disease Control.",
|
||||||
@@ -921,7 +921,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -933,7 +933,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -945,7 +945,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Department of Education:\nCollaborate with the Health Bureau in conducting epidemiological investigations and assist Health Bureau personnel in implementing necessary epidemic prevention measures at all school levels.",
|
"orig": "Department of Education:\nCollaborate with the Health Bureau in conducting epidemiological investigations and assist Health Bureau personnel in implementing necessary epidemic prevention measures at all school levels.",
|
||||||
"text": "Department of Education:\nCollaborate with the Health Bureau in conducting epidemiological investigations and assist Health Bureau personnel in implementing necessary epidemic prevention measures at all school levels.",
|
"text": "Department of Education:\nCollaborate with the Health Bureau in conducting epidemiological investigations and assist Health Bureau personnel in implementing necessary epidemic prevention measures at all school levels.",
|
||||||
@@ -964,7 +964,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -976,7 +976,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -988,7 +988,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -1000,7 +1000,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -1012,7 +1012,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -1024,7 +1024,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -1036,7 +1036,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -1048,7 +1048,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "The Health Bureau will handle",
|
"orig": "The Health Bureau will handle",
|
||||||
"text": "The Health Bureau will handle",
|
"text": "The Health Bureau will handle",
|
||||||
@@ -1067,7 +1067,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "reporting and specimen collection",
|
"orig": "reporting and specimen collection",
|
||||||
"text": "reporting and specimen collection",
|
"text": "reporting and specimen collection",
|
||||||
@@ -1086,7 +1086,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": ".",
|
"orig": ".",
|
||||||
"text": ".",
|
"text": ".",
|
||||||
@@ -1105,7 +1105,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -1117,7 +1117,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -1129,7 +1129,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -1141,7 +1141,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -1153,7 +1153,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Whether the epidemic has eased.",
|
"orig": "Whether the epidemic has eased.",
|
||||||
"text": "Whether the epidemic has eased.",
|
"text": "Whether the epidemic has eased.",
|
||||||
@@ -1172,7 +1172,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -1184,7 +1184,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -1196,7 +1196,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Whether the test results are positive for a legally designated infectious disease.",
|
"orig": "Whether the test results are positive for a legally designated infectious disease.",
|
||||||
"text": "Whether the test results are positive for a legally designated infectious disease.",
|
"text": "Whether the test results are positive for a legally designated infectious disease.",
|
||||||
@@ -1215,7 +1215,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "No",
|
"orig": "No",
|
||||||
"text": "No",
|
"text": "No",
|
||||||
@@ -1234,7 +1234,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -1246,7 +1246,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -1258,7 +1258,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Yes",
|
"orig": "Yes",
|
||||||
"text": "Yes",
|
"text": "Yes",
|
||||||
@@ -1277,7 +1277,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -1289,7 +1289,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Yes",
|
"orig": "Yes",
|
||||||
"text": "Yes",
|
"text": "Yes",
|
||||||
@@ -1308,7 +1308,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -1320,7 +1320,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -1332,7 +1332,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Case closed.",
|
"orig": "Case closed.",
|
||||||
"text": "Case closed.",
|
"text": "Case closed.",
|
||||||
@@ -1351,7 +1351,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -1363,7 +1363,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "The Health Bureau will carry out subsequent related epidemic prevention measures and follow-up, and will request assistance from the Centers for Disease Control if necessary.",
|
"orig": "The Health Bureau will carry out subsequent related epidemic prevention measures and follow-up, and will request assistance from the Centers for Disease Control if necessary.",
|
||||||
"text": "The Health Bureau will carry out subsequent related epidemic prevention measures and follow-up, and will request assistance from the Centers for Disease Control if necessary.",
|
"text": "The Health Bureau will carry out subsequent related epidemic prevention measures and follow-up, and will request assistance from the Centers for Disease Control if necessary.",
|
||||||
@@ -1382,7 +1382,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -1394,7 +1394,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "No",
|
"orig": "No",
|
||||||
"text": "No",
|
"text": "No",
|
||||||
@@ -1413,7 +1413,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -1425,7 +1425,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -1437,7 +1437,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
|
|||||||
@@ -1,18 +1,18 @@
|
|||||||
item-0 at level 0: unspecified: group _root_
|
item-0 at level 0: unspecified: group _root_
|
||||||
item-1 at level 1: paragraph: italic
|
item-1 at level 1: text: italic
|
||||||
item-2 at level 1: paragraph: bold
|
item-2 at level 1: text: bold
|
||||||
item-3 at level 1: paragraph: underline
|
item-3 at level 1: text: underline
|
||||||
item-4 at level 1: paragraph: hyperlink
|
item-4 at level 1: text: hyperlink
|
||||||
item-5 at level 1: paragraph: italic and bold hyperlink
|
item-5 at level 1: text: italic and bold hyperlink
|
||||||
item-6 at level 1: inline: group group
|
item-6 at level 1: inline: group group
|
||||||
item-7 at level 2: paragraph: Normal
|
item-7 at level 2: text: Normal
|
||||||
item-8 at level 2: paragraph: italic
|
item-8 at level 2: text: italic
|
||||||
item-9 at level 2: paragraph: bold
|
item-9 at level 2: text: bold
|
||||||
item-10 at level 2: paragraph: underline
|
item-10 at level 2: text: underline
|
||||||
item-11 at level 2: paragraph: and
|
item-11 at level 2: text: and
|
||||||
item-12 at level 2: paragraph: hyperlink
|
item-12 at level 2: text: hyperlink
|
||||||
item-13 at level 2: paragraph: on the same line
|
item-13 at level 2: text: on the same line
|
||||||
item-14 at level 1: paragraph:
|
item-14 at level 1: text:
|
||||||
item-15 at level 1: list: group list
|
item-15 at level 1: list: group list
|
||||||
item-16 at level 2: list_item: Italic bullet 1
|
item-16 at level 2: list_item: Italic bullet 1
|
||||||
item-17 at level 2: list_item: Bold bullet 2
|
item-17 at level 2: list_item: Bold bullet 2
|
||||||
@@ -29,4 +29,4 @@ item-0 at level 0: unspecified: group _root_
|
|||||||
item-28 at level 5: text: Nested
|
item-28 at level 5: text: Nested
|
||||||
item-29 at level 5: text: italic
|
item-29 at level 5: text: italic
|
||||||
item-30 at level 5: text: bold
|
item-30 at level 5: text: bold
|
||||||
item-31 at level 1: paragraph:
|
item-31 at level 1: text:
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.6.0",
|
"version": "1.7.0",
|
||||||
"name": "unit_test_formatting",
|
"name": "unit_test_formatting",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
@@ -174,7 +174,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "italic",
|
"orig": "italic",
|
||||||
"text": "italic",
|
"text": "italic",
|
||||||
@@ -193,7 +193,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "bold",
|
"orig": "bold",
|
||||||
"text": "bold",
|
"text": "bold",
|
||||||
@@ -212,7 +212,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "underline",
|
"orig": "underline",
|
||||||
"text": "underline",
|
"text": "underline",
|
||||||
@@ -231,7 +231,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "hyperlink",
|
"orig": "hyperlink",
|
||||||
"text": "hyperlink",
|
"text": "hyperlink",
|
||||||
@@ -251,7 +251,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "italic and bold hyperlink",
|
"orig": "italic and bold hyperlink",
|
||||||
"text": "italic and bold hyperlink",
|
"text": "italic and bold hyperlink",
|
||||||
@@ -271,7 +271,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Normal",
|
"orig": "Normal",
|
||||||
"text": "Normal",
|
"text": "Normal",
|
||||||
@@ -290,7 +290,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "italic",
|
"orig": "italic",
|
||||||
"text": "italic",
|
"text": "italic",
|
||||||
@@ -309,7 +309,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "bold",
|
"orig": "bold",
|
||||||
"text": "bold",
|
"text": "bold",
|
||||||
@@ -328,7 +328,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "underline",
|
"orig": "underline",
|
||||||
"text": "underline",
|
"text": "underline",
|
||||||
@@ -347,7 +347,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "and",
|
"orig": "and",
|
||||||
"text": "and",
|
"text": "and",
|
||||||
@@ -366,7 +366,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "hyperlink",
|
"orig": "hyperlink",
|
||||||
"text": "hyperlink",
|
"text": "hyperlink",
|
||||||
@@ -386,7 +386,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "on the same line",
|
"orig": "on the same line",
|
||||||
"text": "on the same line",
|
"text": "on the same line",
|
||||||
@@ -405,7 +405,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -649,7 +649,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
|
|||||||
@@ -1,48 +1,48 @@
|
|||||||
item-0 at level 0: unspecified: group _root_
|
item-0 at level 0: unspecified: group _root_
|
||||||
item-1 at level 1: title: Test Document
|
item-1 at level 1: title: Test Document
|
||||||
item-2 at level 2: paragraph:
|
item-2 at level 2: text:
|
||||||
item-3 at level 2: section_header: Section 1
|
item-3 at level 2: section_header: Section 1
|
||||||
item-4 at level 3: paragraph:
|
item-4 at level 3: text:
|
||||||
item-5 at level 3: paragraph: Paragraph 1.1
|
item-5 at level 3: text: Paragraph 1.1
|
||||||
item-6 at level 3: paragraph:
|
item-6 at level 3: text:
|
||||||
item-7 at level 3: paragraph: Paragraph 1.2
|
item-7 at level 3: text: Paragraph 1.2
|
||||||
item-8 at level 3: paragraph:
|
item-8 at level 3: text:
|
||||||
item-9 at level 3: section_header: Section 1.1
|
item-9 at level 3: section_header: Section 1.1
|
||||||
item-10 at level 4: paragraph:
|
item-10 at level 4: text:
|
||||||
item-11 at level 4: paragraph: Paragraph 1.1.1
|
item-11 at level 4: text: Paragraph 1.1.1
|
||||||
item-12 at level 4: paragraph:
|
item-12 at level 4: text:
|
||||||
item-13 at level 4: paragraph: Paragraph 1.1.2
|
item-13 at level 4: text: Paragraph 1.1.2
|
||||||
item-14 at level 4: paragraph:
|
item-14 at level 4: text:
|
||||||
item-15 at level 3: section_header: Section 1.2
|
item-15 at level 3: section_header: Section 1.2
|
||||||
item-16 at level 4: paragraph:
|
item-16 at level 4: text:
|
||||||
item-17 at level 4: paragraph: Paragraph 1.1.1
|
item-17 at level 4: text: Paragraph 1.1.1
|
||||||
item-18 at level 4: paragraph:
|
item-18 at level 4: text:
|
||||||
item-19 at level 4: paragraph: Paragraph 1.1.2
|
item-19 at level 4: text: Paragraph 1.1.2
|
||||||
item-20 at level 4: paragraph:
|
item-20 at level 4: text:
|
||||||
item-21 at level 4: section_header: Section 1.2.3
|
item-21 at level 4: section_header: Section 1.2.3
|
||||||
item-22 at level 5: paragraph:
|
item-22 at level 5: text:
|
||||||
item-23 at level 5: paragraph: Paragraph 1.2.3.1
|
item-23 at level 5: text: Paragraph 1.2.3.1
|
||||||
item-24 at level 5: paragraph:
|
item-24 at level 5: text:
|
||||||
item-25 at level 5: paragraph: Paragraph 1.2.3.1
|
item-25 at level 5: text: Paragraph 1.2.3.1
|
||||||
item-26 at level 5: paragraph:
|
item-26 at level 5: text:
|
||||||
item-27 at level 5: paragraph:
|
item-27 at level 5: text:
|
||||||
item-28 at level 2: section_header: Section 2
|
item-28 at level 2: section_header: Section 2
|
||||||
item-29 at level 3: paragraph:
|
item-29 at level 3: text:
|
||||||
item-30 at level 3: paragraph: Paragraph 2.1
|
item-30 at level 3: text: Paragraph 2.1
|
||||||
item-31 at level 3: paragraph:
|
item-31 at level 3: text:
|
||||||
item-32 at level 3: paragraph: Paragraph 2.2
|
item-32 at level 3: text: Paragraph 2.2
|
||||||
item-33 at level 3: paragraph:
|
item-33 at level 3: text:
|
||||||
item-34 at level 3: section: group header-2
|
item-34 at level 3: section: group header-2
|
||||||
item-35 at level 4: section_header: Section 2.1.1
|
item-35 at level 4: section_header: Section 2.1.1
|
||||||
item-36 at level 5: paragraph:
|
item-36 at level 5: text:
|
||||||
item-37 at level 5: paragraph: Paragraph 2.1.1.1
|
item-37 at level 5: text: Paragraph 2.1.1.1
|
||||||
item-38 at level 5: paragraph:
|
item-38 at level 5: text:
|
||||||
item-39 at level 5: paragraph: Paragraph 2.1.1.1
|
item-39 at level 5: text: Paragraph 2.1.1.1
|
||||||
item-40 at level 5: paragraph:
|
item-40 at level 5: text:
|
||||||
item-41 at level 3: section_header: Section 2.1
|
item-41 at level 3: section_header: Section 2.1
|
||||||
item-42 at level 4: paragraph:
|
item-42 at level 4: text:
|
||||||
item-43 at level 4: paragraph: Paragraph 2.1.1
|
item-43 at level 4: text: Paragraph 2.1.1
|
||||||
item-44 at level 4: paragraph:
|
item-44 at level 4: text:
|
||||||
item-45 at level 4: paragraph: Paragraph 2.1.2
|
item-45 at level 4: text: Paragraph 2.1.2
|
||||||
item-46 at level 4: paragraph:
|
item-46 at level 4: text:
|
||||||
item-47 at level 4: paragraph:
|
item-47 at level 4: text:
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.6.0",
|
"version": "1.7.0",
|
||||||
"name": "unit_test_headers",
|
"name": "unit_test_headers",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
@@ -71,7 +71,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -118,7 +118,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -130,7 +130,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Paragraph 1.1",
|
"orig": "Paragraph 1.1",
|
||||||
"text": "Paragraph 1.1",
|
"text": "Paragraph 1.1",
|
||||||
@@ -149,7 +149,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -161,7 +161,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Paragraph 1.2",
|
"orig": "Paragraph 1.2",
|
||||||
"text": "Paragraph 1.2",
|
"text": "Paragraph 1.2",
|
||||||
@@ -180,7 +180,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -221,7 +221,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -233,7 +233,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Paragraph 1.1.1",
|
"orig": "Paragraph 1.1.1",
|
||||||
"text": "Paragraph 1.1.1",
|
"text": "Paragraph 1.1.1",
|
||||||
@@ -252,7 +252,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -264,7 +264,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Paragraph 1.1.2",
|
"orig": "Paragraph 1.1.2",
|
||||||
"text": "Paragraph 1.1.2",
|
"text": "Paragraph 1.1.2",
|
||||||
@@ -283,7 +283,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -327,7 +327,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -339,7 +339,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Paragraph 1.1.1",
|
"orig": "Paragraph 1.1.1",
|
||||||
"text": "Paragraph 1.1.1",
|
"text": "Paragraph 1.1.1",
|
||||||
@@ -358,7 +358,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -370,7 +370,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Paragraph 1.1.2",
|
"orig": "Paragraph 1.1.2",
|
||||||
"text": "Paragraph 1.1.2",
|
"text": "Paragraph 1.1.2",
|
||||||
@@ -389,7 +389,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -433,7 +433,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -445,7 +445,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Paragraph 1.2.3.1",
|
"orig": "Paragraph 1.2.3.1",
|
||||||
"text": "Paragraph 1.2.3.1",
|
"text": "Paragraph 1.2.3.1",
|
||||||
@@ -464,7 +464,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -476,7 +476,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Paragraph 1.2.3.1",
|
"orig": "Paragraph 1.2.3.1",
|
||||||
"text": "Paragraph 1.2.3.1",
|
"text": "Paragraph 1.2.3.1",
|
||||||
@@ -495,7 +495,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -507,7 +507,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -554,7 +554,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -566,7 +566,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Paragraph 2.1",
|
"orig": "Paragraph 2.1",
|
||||||
"text": "Paragraph 2.1",
|
"text": "Paragraph 2.1",
|
||||||
@@ -585,7 +585,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -597,7 +597,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Paragraph 2.2",
|
"orig": "Paragraph 2.2",
|
||||||
"text": "Paragraph 2.2",
|
"text": "Paragraph 2.2",
|
||||||
@@ -616,7 +616,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -657,7 +657,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -669,7 +669,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Paragraph 2.1.1.1",
|
"orig": "Paragraph 2.1.1.1",
|
||||||
"text": "Paragraph 2.1.1.1",
|
"text": "Paragraph 2.1.1.1",
|
||||||
@@ -688,7 +688,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -700,7 +700,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Paragraph 2.1.1.1",
|
"orig": "Paragraph 2.1.1.1",
|
||||||
"text": "Paragraph 2.1.1.1",
|
"text": "Paragraph 2.1.1.1",
|
||||||
@@ -719,7 +719,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -763,7 +763,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -775,7 +775,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Paragraph 2.1.1",
|
"orig": "Paragraph 2.1.1",
|
||||||
"text": "Paragraph 2.1.1",
|
"text": "Paragraph 2.1.1",
|
||||||
@@ -794,7 +794,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -806,7 +806,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Paragraph 2.1.2",
|
"orig": "Paragraph 2.1.2",
|
||||||
"text": "Paragraph 2.1.2",
|
"text": "Paragraph 2.1.2",
|
||||||
@@ -825,7 +825,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -837,7 +837,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
|
|||||||
@@ -1,52 +1,52 @@
|
|||||||
item-0 at level 0: unspecified: group _root_
|
item-0 at level 0: unspecified: group _root_
|
||||||
item-1 at level 1: title: Test Document
|
item-1 at level 1: title: Test Document
|
||||||
item-2 at level 2: paragraph:
|
item-2 at level 2: text:
|
||||||
item-3 at level 2: section_header: 1 Section 1
|
item-3 at level 2: section_header: 1 Section 1
|
||||||
item-4 at level 1: paragraph:
|
item-4 at level 1: text:
|
||||||
item-5 at level 1: paragraph: Paragraph 1.1
|
item-5 at level 1: text: Paragraph 1.1
|
||||||
item-6 at level 1: paragraph:
|
item-6 at level 1: text:
|
||||||
item-7 at level 1: paragraph: Paragraph 1.2
|
item-7 at level 1: text: Paragraph 1.2
|
||||||
item-8 at level 1: paragraph:
|
item-8 at level 1: text:
|
||||||
item-9 at level 1: section: group header-0
|
item-9 at level 1: section: group header-0
|
||||||
item-10 at level 2: section: group header-1
|
item-10 at level 2: section: group header-1
|
||||||
item-11 at level 3: section_header: 1.1 Section 1.1
|
item-11 at level 3: section_header: 1.1 Section 1.1
|
||||||
item-12 at level 4: paragraph:
|
item-12 at level 4: text:
|
||||||
item-13 at level 4: paragraph: Paragraph 1.1.1
|
item-13 at level 4: text: Paragraph 1.1.1
|
||||||
item-14 at level 4: paragraph:
|
item-14 at level 4: text:
|
||||||
item-15 at level 4: paragraph: Paragraph 1.1.2
|
item-15 at level 4: text: Paragraph 1.1.2
|
||||||
item-16 at level 4: paragraph:
|
item-16 at level 4: text:
|
||||||
item-17 at level 3: section_header: 1.2 Section 1.2
|
item-17 at level 3: section_header: 1.2 Section 1.2
|
||||||
item-18 at level 4: paragraph:
|
item-18 at level 4: text:
|
||||||
item-19 at level 4: paragraph: Paragraph 1.1.1
|
item-19 at level 4: text: Paragraph 1.1.1
|
||||||
item-20 at level 4: paragraph:
|
item-20 at level 4: text:
|
||||||
item-21 at level 4: paragraph: Paragraph 1.1.2
|
item-21 at level 4: text: Paragraph 1.1.2
|
||||||
item-22 at level 4: paragraph:
|
item-22 at level 4: text:
|
||||||
item-23 at level 4: section_header: 1.2.1 Section 1.2.3
|
item-23 at level 4: section_header: 1.2.1 Section 1.2.3
|
||||||
item-24 at level 5: paragraph:
|
item-24 at level 5: text:
|
||||||
item-25 at level 5: paragraph: Paragraph 1.2.3.1
|
item-25 at level 5: text: Paragraph 1.2.3.1
|
||||||
item-26 at level 5: paragraph:
|
item-26 at level 5: text:
|
||||||
item-27 at level 5: paragraph: Paragraph 1.2.3.1
|
item-27 at level 5: text: Paragraph 1.2.3.1
|
||||||
item-28 at level 5: paragraph:
|
item-28 at level 5: text:
|
||||||
item-29 at level 5: paragraph:
|
item-29 at level 5: text:
|
||||||
item-30 at level 2: section_header: 2 Section 2
|
item-30 at level 2: section_header: 2 Section 2
|
||||||
item-31 at level 1: paragraph:
|
item-31 at level 1: text:
|
||||||
item-32 at level 1: paragraph: Paragraph 2.1
|
item-32 at level 1: text: Paragraph 2.1
|
||||||
item-33 at level 1: paragraph:
|
item-33 at level 1: text:
|
||||||
item-34 at level 1: paragraph: Paragraph 2.2
|
item-34 at level 1: text: Paragraph 2.2
|
||||||
item-35 at level 1: paragraph:
|
item-35 at level 1: text:
|
||||||
item-36 at level 1: section: group header-0
|
item-36 at level 1: section: group header-0
|
||||||
item-37 at level 2: section: group header-1
|
item-37 at level 2: section: group header-1
|
||||||
item-38 at level 3: section: group header-2
|
item-38 at level 3: section: group header-2
|
||||||
item-39 at level 4: section_header: 2.1.1 Section 2.1.1
|
item-39 at level 4: section_header: 2.1.1 Section 2.1.1
|
||||||
item-40 at level 5: paragraph:
|
item-40 at level 5: text:
|
||||||
item-41 at level 5: paragraph: Paragraph 2.1.1.1
|
item-41 at level 5: text: Paragraph 2.1.1.1
|
||||||
item-42 at level 5: paragraph:
|
item-42 at level 5: text:
|
||||||
item-43 at level 5: paragraph: Paragraph 2.1.1.1
|
item-43 at level 5: text: Paragraph 2.1.1.1
|
||||||
item-44 at level 5: paragraph:
|
item-44 at level 5: text:
|
||||||
item-45 at level 3: section_header: 2.2 Section 2.1
|
item-45 at level 3: section_header: 2.2 Section 2.1
|
||||||
item-46 at level 4: paragraph:
|
item-46 at level 4: text:
|
||||||
item-47 at level 4: paragraph: Paragraph 2.1.1
|
item-47 at level 4: text: Paragraph 2.1.1
|
||||||
item-48 at level 4: paragraph:
|
item-48 at level 4: text:
|
||||||
item-49 at level 4: paragraph: Paragraph 2.1.2
|
item-49 at level 4: text: Paragraph 2.1.2
|
||||||
item-50 at level 4: paragraph:
|
item-50 at level 4: text:
|
||||||
item-51 at level 4: paragraph:
|
item-51 at level 4: text:
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.6.0",
|
"version": "1.7.0",
|
||||||
"name": "unit_test_headers_numbered",
|
"name": "unit_test_headers_numbered",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
@@ -169,7 +169,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -194,7 +194,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -206,7 +206,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Paragraph 1.1",
|
"orig": "Paragraph 1.1",
|
||||||
"text": "Paragraph 1.1",
|
"text": "Paragraph 1.1",
|
||||||
@@ -225,7 +225,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -237,7 +237,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Paragraph 1.2",
|
"orig": "Paragraph 1.2",
|
||||||
"text": "Paragraph 1.2",
|
"text": "Paragraph 1.2",
|
||||||
@@ -256,7 +256,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -297,7 +297,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -309,7 +309,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Paragraph 1.1.1",
|
"orig": "Paragraph 1.1.1",
|
||||||
"text": "Paragraph 1.1.1",
|
"text": "Paragraph 1.1.1",
|
||||||
@@ -328,7 +328,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -340,7 +340,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Paragraph 1.1.2",
|
"orig": "Paragraph 1.1.2",
|
||||||
"text": "Paragraph 1.1.2",
|
"text": "Paragraph 1.1.2",
|
||||||
@@ -359,7 +359,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -403,7 +403,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -415,7 +415,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Paragraph 1.1.1",
|
"orig": "Paragraph 1.1.1",
|
||||||
"text": "Paragraph 1.1.1",
|
"text": "Paragraph 1.1.1",
|
||||||
@@ -434,7 +434,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -446,7 +446,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Paragraph 1.1.2",
|
"orig": "Paragraph 1.1.2",
|
||||||
"text": "Paragraph 1.1.2",
|
"text": "Paragraph 1.1.2",
|
||||||
@@ -465,7 +465,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -509,7 +509,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -521,7 +521,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Paragraph 1.2.3.1",
|
"orig": "Paragraph 1.2.3.1",
|
||||||
"text": "Paragraph 1.2.3.1",
|
"text": "Paragraph 1.2.3.1",
|
||||||
@@ -540,7 +540,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -552,7 +552,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Paragraph 1.2.3.1",
|
"orig": "Paragraph 1.2.3.1",
|
||||||
"text": "Paragraph 1.2.3.1",
|
"text": "Paragraph 1.2.3.1",
|
||||||
@@ -571,7 +571,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -583,7 +583,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -608,7 +608,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -620,7 +620,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Paragraph 2.1",
|
"orig": "Paragraph 2.1",
|
||||||
"text": "Paragraph 2.1",
|
"text": "Paragraph 2.1",
|
||||||
@@ -639,7 +639,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -651,7 +651,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Paragraph 2.2",
|
"orig": "Paragraph 2.2",
|
||||||
"text": "Paragraph 2.2",
|
"text": "Paragraph 2.2",
|
||||||
@@ -670,7 +670,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -711,7 +711,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -723,7 +723,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Paragraph 2.1.1.1",
|
"orig": "Paragraph 2.1.1.1",
|
||||||
"text": "Paragraph 2.1.1.1",
|
"text": "Paragraph 2.1.1.1",
|
||||||
@@ -742,7 +742,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -754,7 +754,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Paragraph 2.1.1.1",
|
"orig": "Paragraph 2.1.1.1",
|
||||||
"text": "Paragraph 2.1.1.1",
|
"text": "Paragraph 2.1.1.1",
|
||||||
@@ -773,7 +773,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -817,7 +817,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -829,7 +829,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Paragraph 2.1.1",
|
"orig": "Paragraph 2.1.1",
|
||||||
"text": "Paragraph 2.1.1",
|
"text": "Paragraph 2.1.1",
|
||||||
@@ -848,7 +848,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -860,7 +860,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Paragraph 2.1.2",
|
"orig": "Paragraph 2.1.2",
|
||||||
"text": "Paragraph 2.1.2",
|
"text": "Paragraph 2.1.2",
|
||||||
@@ -879,7 +879,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -891,7 +891,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
|
|||||||
@@ -1,25 +1,25 @@
|
|||||||
item-0 at level 0: unspecified: group _root_
|
item-0 at level 0: unspecified: group _root_
|
||||||
item-1 at level 1: section: group header-0
|
item-1 at level 1: section: group header-0
|
||||||
item-2 at level 2: section_header: Test Document
|
item-2 at level 2: section_header: Test Document
|
||||||
item-3 at level 3: paragraph:
|
item-3 at level 3: text:
|
||||||
item-4 at level 3: paragraph:
|
item-4 at level 3: text:
|
||||||
item-5 at level 3: paragraph: Paragraph 2.1.1
|
item-5 at level 3: text: Paragraph 2.1.1
|
||||||
item-6 at level 3: paragraph:
|
item-6 at level 3: text:
|
||||||
item-7 at level 3: paragraph: Paragraph 2.1.2
|
item-7 at level 3: text: Paragraph 2.1.2
|
||||||
item-8 at level 3: paragraph:
|
item-8 at level 3: text:
|
||||||
item-9 at level 3: section: group header-2
|
item-9 at level 3: section: group header-2
|
||||||
item-10 at level 4: section_header: Test 1:
|
item-10 at level 4: section_header: Test 1:
|
||||||
item-11 at level 5: list: group list
|
item-11 at level 5: list: group list
|
||||||
item-12 at level 6: list_item: List item 1
|
item-12 at level 6: list_item: List item 1
|
||||||
item-13 at level 6: list_item: List item 2
|
item-13 at level 6: list_item: List item 2
|
||||||
item-14 at level 6: list_item: List item 3
|
item-14 at level 6: list_item: List item 3
|
||||||
item-15 at level 5: paragraph:
|
item-15 at level 5: text:
|
||||||
item-16 at level 4: section_header: Test 2:
|
item-16 at level 4: section_header: Test 2:
|
||||||
item-17 at level 5: list: group list
|
item-17 at level 5: list: group list
|
||||||
item-18 at level 6: list_item: List item a
|
item-18 at level 6: list_item: List item a
|
||||||
item-19 at level 6: list_item: List item b
|
item-19 at level 6: list_item: List item b
|
||||||
item-20 at level 6: list_item: List item c
|
item-20 at level 6: list_item: List item c
|
||||||
item-21 at level 5: paragraph:
|
item-21 at level 5: text:
|
||||||
item-22 at level 4: section_header: Test 3:
|
item-22 at level 4: section_header: Test 3:
|
||||||
item-23 at level 5: list: group list
|
item-23 at level 5: list: group list
|
||||||
item-24 at level 6: list_item: List item 1
|
item-24 at level 6: list_item: List item 1
|
||||||
@@ -29,14 +29,14 @@ item-0 at level 0: unspecified: group _root_
|
|||||||
item-28 at level 7: list_item: List item 1.2
|
item-28 at level 7: list_item: List item 1.2
|
||||||
item-29 at level 7: list_item: List item 1.3
|
item-29 at level 7: list_item: List item 1.3
|
||||||
item-30 at level 6: list_item: List item 3
|
item-30 at level 6: list_item: List item 3
|
||||||
item-31 at level 5: paragraph:
|
item-31 at level 5: text:
|
||||||
item-32 at level 4: section_header: Test 4:
|
item-32 at level 4: section_header: Test 4:
|
||||||
item-33 at level 5: list: group list
|
item-33 at level 5: list: group list
|
||||||
item-34 at level 6: list_item: List item 1
|
item-34 at level 6: list_item: List item 1
|
||||||
item-35 at level 6: list: group list
|
item-35 at level 6: list: group list
|
||||||
item-36 at level 7: list_item: List item 1.1
|
item-36 at level 7: list_item: List item 1.1
|
||||||
item-37 at level 6: list_item: List item 2
|
item-37 at level 6: list_item: List item 2
|
||||||
item-38 at level 5: paragraph:
|
item-38 at level 5: text:
|
||||||
item-39 at level 4: section_header: Test 5:
|
item-39 at level 4: section_header: Test 5:
|
||||||
item-40 at level 5: list: group list
|
item-40 at level 5: list: group list
|
||||||
item-41 at level 6: list_item: List item 1
|
item-41 at level 6: list_item: List item 1
|
||||||
@@ -45,7 +45,7 @@ item-0 at level 0: unspecified: group _root_
|
|||||||
item-44 at level 7: list: group list
|
item-44 at level 7: list: group list
|
||||||
item-45 at level 8: list_item: List item 1.1.1
|
item-45 at level 8: list_item: List item 1.1.1
|
||||||
item-46 at level 6: list_item: List item 3
|
item-46 at level 6: list_item: List item 3
|
||||||
item-47 at level 5: paragraph:
|
item-47 at level 5: text:
|
||||||
item-48 at level 4: section_header: Test 6:
|
item-48 at level 4: section_header: Test 6:
|
||||||
item-49 at level 5: list: group list
|
item-49 at level 5: list: group list
|
||||||
item-50 at level 6: list_item: List item 1
|
item-50 at level 6: list_item: List item 1
|
||||||
@@ -56,6 +56,6 @@ item-0 at level 0: unspecified: group _root_
|
|||||||
item-55 at level 7: list: group list
|
item-55 at level 7: list: group list
|
||||||
item-56 at level 8: list_item: List item 1.2.1
|
item-56 at level 8: list_item: List item 1.2.1
|
||||||
item-57 at level 6: list_item: List item 3
|
item-57 at level 6: list_item: List item 3
|
||||||
item-58 at level 5: paragraph:
|
item-58 at level 5: text:
|
||||||
item-59 at level 5: paragraph:
|
item-59 at level 5: text:
|
||||||
item-60 at level 5: paragraph:
|
item-60 at level 5: text:
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.6.0",
|
"version": "1.7.0",
|
||||||
"name": "unit_test_lists",
|
"name": "unit_test_lists",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
@@ -338,7 +338,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -350,7 +350,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -362,7 +362,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Paragraph 2.1.1",
|
"orig": "Paragraph 2.1.1",
|
||||||
"text": "Paragraph 2.1.1",
|
"text": "Paragraph 2.1.1",
|
||||||
@@ -381,7 +381,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -393,7 +393,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Paragraph 2.1.2",
|
"orig": "Paragraph 2.1.2",
|
||||||
"text": "Paragraph 2.1.2",
|
"text": "Paragraph 2.1.2",
|
||||||
@@ -412,7 +412,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -507,7 +507,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -602,7 +602,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -760,7 +760,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -855,7 +855,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -971,7 +971,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -1135,7 +1135,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -1147,7 +1147,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -1159,7 +1159,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
|
|||||||
@@ -1,16 +1,16 @@
|
|||||||
item-0 at level 0: unspecified: group _root_
|
item-0 at level 0: unspecified: group _root_
|
||||||
item-1 at level 1: paragraph: Transcript
|
item-1 at level 1: text: Transcript
|
||||||
item-2 at level 1: paragraph: February 20, 2025, 8:32PM
|
item-2 at level 1: text: February 20, 2025, 8:32PM
|
||||||
item-3 at level 1: picture
|
item-3 at level 1: picture
|
||||||
item-4 at level 1: inline: group group
|
item-4 at level 1: inline: group group
|
||||||
item-5 at level 2: paragraph: This is test 1
|
item-5 at level 2: text: This is test 1
|
||||||
item-6 at level 2: paragraph: 0:08
|
item-6 at level 2: text: 0:08
|
||||||
Correct, he is not.
|
Correct, he is not.
|
||||||
item-7 at level 1: paragraph:
|
item-7 at level 1: text:
|
||||||
item-8 at level 1: picture
|
item-8 at level 1: picture
|
||||||
item-9 at level 1: inline: group group
|
item-9 at level 1: inline: group group
|
||||||
item-10 at level 2: paragraph: This is test 2
|
item-10 at level 2: text: This is test 2
|
||||||
item-11 at level 2: paragraph: 0:16
|
item-11 at level 2: text: 0:16
|
||||||
Yeah, exactly.
|
Yeah, exactly.
|
||||||
item-12 at level 1: paragraph:
|
item-12 at level 1: text:
|
||||||
item-13 at level 1: paragraph:
|
item-13 at level 1: text:
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.6.0",
|
"version": "1.7.0",
|
||||||
"name": "word_image_anchors",
|
"name": "word_image_anchors",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
@@ -93,7 +93,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Transcript",
|
"orig": "Transcript",
|
||||||
"text": "Transcript",
|
"text": "Transcript",
|
||||||
@@ -112,7 +112,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "February 20, 2025, 8:32PM",
|
"orig": "February 20, 2025, 8:32PM",
|
||||||
"text": "February 20, 2025, 8:32PM",
|
"text": "February 20, 2025, 8:32PM",
|
||||||
@@ -131,7 +131,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "This is test 1",
|
"orig": "This is test 1",
|
||||||
"text": "This is test 1",
|
"text": "This is test 1",
|
||||||
@@ -150,7 +150,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "0:08\nCorrect, he is not.",
|
"orig": "0:08\nCorrect, he is not.",
|
||||||
"text": "0:08\nCorrect, he is not.",
|
"text": "0:08\nCorrect, he is not.",
|
||||||
@@ -169,7 +169,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -181,7 +181,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "This is test 2",
|
"orig": "This is test 2",
|
||||||
"text": "This is test 2",
|
"text": "This is test 2",
|
||||||
@@ -200,7 +200,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "0:16\nYeah, exactly.",
|
"orig": "0:16\nYeah, exactly.",
|
||||||
"text": "0:16\nYeah, exactly.",
|
"text": "0:16\nYeah, exactly.",
|
||||||
@@ -219,7 +219,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -231,7 +231,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
|
|||||||
@@ -1,28 +1,28 @@
|
|||||||
item-0 at level 0: unspecified: group _root_
|
item-0 at level 0: unspecified: group _root_
|
||||||
item-1 at level 1: paragraph: Summer activities
|
item-1 at level 1: text: Summer activities
|
||||||
item-2 at level 1: title: Swimming in the lake
|
item-2 at level 1: title: Swimming in the lake
|
||||||
item-3 at level 2: paragraph: Duck
|
item-3 at level 2: text: Duck
|
||||||
item-4 at level 2: picture
|
item-4 at level 2: picture
|
||||||
item-5 at level 2: paragraph: Figure 1: This is a cute duckling
|
item-5 at level 2: text: Figure 1: This is a cute duckling
|
||||||
item-6 at level 2: section_header: Let’s swim!
|
item-6 at level 2: section_header: Let’s swim!
|
||||||
item-7 at level 3: paragraph: To get started with swimming, fi ... down in a water and try not to drown:
|
item-7 at level 3: text: To get started with swimming, fi ... down in a water and try not to drown:
|
||||||
item-8 at level 3: list: group list
|
item-8 at level 3: list: group list
|
||||||
item-9 at level 4: list_item: You can relax and look around
|
item-9 at level 4: list_item: You can relax and look around
|
||||||
item-10 at level 4: list_item: Paddle about
|
item-10 at level 4: list_item: Paddle about
|
||||||
item-11 at level 4: list_item: Enjoy summer warmth
|
item-11 at level 4: list_item: Enjoy summer warmth
|
||||||
item-12 at level 3: paragraph: Also, don’t forget:
|
item-12 at level 3: text: Also, don’t forget:
|
||||||
item-13 at level 3: list: group list
|
item-13 at level 3: list: group list
|
||||||
item-14 at level 4: list_item: Wear sunglasses
|
item-14 at level 4: list_item: Wear sunglasses
|
||||||
item-15 at level 4: list_item: Don’t forget to drink water
|
item-15 at level 4: list_item: Don’t forget to drink water
|
||||||
item-16 at level 4: list_item: Use sun cream
|
item-16 at level 4: list_item: Use sun cream
|
||||||
item-17 at level 3: paragraph: Hmm, what else…
|
item-17 at level 3: text: Hmm, what else…
|
||||||
item-18 at level 3: section_header: Let’s eat
|
item-18 at level 3: section_header: Let’s eat
|
||||||
item-19 at level 4: paragraph: After we had a good day of swimm ... , it’s important to eat something nice
|
item-19 at level 4: text: After we had a good day of swimm ... , it’s important to eat something nice
|
||||||
item-20 at level 4: paragraph: I like to eat leaves
|
item-20 at level 4: text: I like to eat leaves
|
||||||
item-21 at level 4: paragraph: Here are some interesting things a respectful duck could eat:
|
item-21 at level 4: text: Here are some interesting things a respectful duck could eat:
|
||||||
item-22 at level 4: table with [4x3]
|
item-22 at level 4: table with [4x3]
|
||||||
item-23 at level 4: paragraph:
|
item-23 at level 4: text:
|
||||||
item-24 at level 4: paragraph: And let’s add another list in the end:
|
item-24 at level 4: text: And let’s add another list in the end:
|
||||||
item-25 at level 4: list: group list
|
item-25 at level 4: list: group list
|
||||||
item-26 at level 5: list_item: Leaves
|
item-26 at level 5: list_item: Leaves
|
||||||
item-27 at level 5: list_item: Berries
|
item-27 at level 5: list_item: Berries
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.6.0",
|
"version": "1.7.0",
|
||||||
"name": "word_sample",
|
"name": "word_sample",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
@@ -98,7 +98,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Summer activities",
|
"orig": "Summer activities",
|
||||||
"text": "Summer activities",
|
"text": "Summer activities",
|
||||||
@@ -142,7 +142,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Duck",
|
"orig": "Duck",
|
||||||
"text": "Duck",
|
"text": "Duck",
|
||||||
@@ -161,7 +161,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Figure 1: This is a cute duckling",
|
"orig": "Figure 1: This is a cute duckling",
|
||||||
"text": "Figure 1: This is a cute duckling",
|
"text": "Figure 1: This is a cute duckling",
|
||||||
@@ -212,7 +212,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "To get started with swimming, first lay down in a water and try not to drown:",
|
"orig": "To get started with swimming, first lay down in a water and try not to drown:",
|
||||||
"text": "To get started with swimming, first lay down in a water and try not to drown:",
|
"text": "To get started with swimming, first lay down in a water and try not to drown:",
|
||||||
@@ -294,7 +294,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Also, don’t forget:",
|
"orig": "Also, don’t forget:",
|
||||||
"text": "Also, don’t forget:",
|
"text": "Also, don’t forget:",
|
||||||
@@ -376,7 +376,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Hmm, what else…",
|
"orig": "Hmm, what else…",
|
||||||
"text": "Hmm, what else…",
|
"text": "Hmm, what else…",
|
||||||
@@ -430,7 +430,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "After we had a good day of swimming in the lake, it’s important to eat something nice",
|
"orig": "After we had a good day of swimming in the lake, it’s important to eat something nice",
|
||||||
"text": "After we had a good day of swimming in the lake, it’s important to eat something nice",
|
"text": "After we had a good day of swimming in the lake, it’s important to eat something nice",
|
||||||
@@ -449,7 +449,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "I like to eat leaves",
|
"orig": "I like to eat leaves",
|
||||||
"text": "I like to eat leaves",
|
"text": "I like to eat leaves",
|
||||||
@@ -468,7 +468,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "Here are some interesting things a respectful duck could eat:",
|
"orig": "Here are some interesting things a respectful duck could eat:",
|
||||||
"text": "Here are some interesting things a respectful duck could eat:",
|
"text": "Here are some interesting things a respectful duck could eat:",
|
||||||
@@ -487,7 +487,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
@@ -499,7 +499,7 @@
|
|||||||
},
|
},
|
||||||
"children": [],
|
"children": [],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "paragraph",
|
"label": "text",
|
||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "And let’s add another list in the end:",
|
"orig": "And let’s add another list in the end:",
|
||||||
"text": "And let’s add another list in the end:",
|
"text": "And let’s add another list in the end:",
|
||||||
@@ -625,7 +625,8 @@
|
|||||||
"text": "",
|
"text": "",
|
||||||
"column_header": true,
|
"column_header": true,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -637,7 +638,8 @@
|
|||||||
"text": "Food",
|
"text": "Food",
|
||||||
"column_header": true,
|
"column_header": true,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -649,7 +651,8 @@
|
|||||||
"text": "Calories per portion",
|
"text": "Calories per portion",
|
||||||
"column_header": true,
|
"column_header": true,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -661,7 +664,8 @@
|
|||||||
"text": "Leaves",
|
"text": "Leaves",
|
||||||
"column_header": false,
|
"column_header": false,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -673,7 +677,8 @@
|
|||||||
"text": "Ash, Elm, Maple",
|
"text": "Ash, Elm, Maple",
|
||||||
"column_header": false,
|
"column_header": false,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -685,7 +690,8 @@
|
|||||||
"text": "50",
|
"text": "50",
|
||||||
"column_header": false,
|
"column_header": false,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -697,7 +703,8 @@
|
|||||||
"text": "Berries",
|
"text": "Berries",
|
||||||
"column_header": false,
|
"column_header": false,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -709,7 +716,8 @@
|
|||||||
"text": "Blueberry, Strawberry, Cranberry",
|
"text": "Blueberry, Strawberry, Cranberry",
|
||||||
"column_header": false,
|
"column_header": false,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -721,7 +729,8 @@
|
|||||||
"text": "150",
|
"text": "150",
|
||||||
"column_header": false,
|
"column_header": false,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -733,7 +742,8 @@
|
|||||||
"text": "Grain",
|
"text": "Grain",
|
||||||
"column_header": false,
|
"column_header": false,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -745,7 +755,8 @@
|
|||||||
"text": "Corn, Buckwheat, Barley",
|
"text": "Corn, Buckwheat, Barley",
|
||||||
"column_header": false,
|
"column_header": false,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -757,7 +768,8 @@
|
|||||||
"text": "200",
|
"text": "200",
|
||||||
"column_header": false,
|
"column_header": false,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"num_rows": 4,
|
"num_rows": 4,
|
||||||
@@ -774,7 +786,8 @@
|
|||||||
"text": "",
|
"text": "",
|
||||||
"column_header": true,
|
"column_header": true,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -786,7 +799,8 @@
|
|||||||
"text": "Food",
|
"text": "Food",
|
||||||
"column_header": true,
|
"column_header": true,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -798,7 +812,8 @@
|
|||||||
"text": "Calories per portion",
|
"text": "Calories per portion",
|
||||||
"column_header": true,
|
"column_header": true,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
@@ -812,7 +827,8 @@
|
|||||||
"text": "Leaves",
|
"text": "Leaves",
|
||||||
"column_header": false,
|
"column_header": false,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -824,7 +840,8 @@
|
|||||||
"text": "Ash, Elm, Maple",
|
"text": "Ash, Elm, Maple",
|
||||||
"column_header": false,
|
"column_header": false,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -836,7 +853,8 @@
|
|||||||
"text": "50",
|
"text": "50",
|
||||||
"column_header": false,
|
"column_header": false,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
@@ -850,7 +868,8 @@
|
|||||||
"text": "Berries",
|
"text": "Berries",
|
||||||
"column_header": false,
|
"column_header": false,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -862,7 +881,8 @@
|
|||||||
"text": "Blueberry, Strawberry, Cranberry",
|
"text": "Blueberry, Strawberry, Cranberry",
|
||||||
"column_header": false,
|
"column_header": false,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -874,7 +894,8 @@
|
|||||||
"text": "150",
|
"text": "150",
|
||||||
"column_header": false,
|
"column_header": false,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
@@ -888,7 +909,8 @@
|
|||||||
"text": "Grain",
|
"text": "Grain",
|
||||||
"column_header": false,
|
"column_header": false,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -900,7 +922,8 @@
|
|||||||
"text": "Corn, Buckwheat, Barley",
|
"text": "Corn, Buckwheat, Barley",
|
||||||
"column_header": false,
|
"column_header": false,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"row_span": 1,
|
"row_span": 1,
|
||||||
@@ -912,7 +935,8 @@
|
|||||||
"text": "200",
|
"text": "200",
|
||||||
"column_header": false,
|
"column_header": false,
|
||||||
"row_header": false,
|
"row_header": false,
|
||||||
"row_section": false
|
"row_section": false,
|
||||||
|
"fillable": false
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -1,19 +1,19 @@
|
|||||||
item-0 at level 0: unspecified: group _root_
|
item-0 at level 0: unspecified: group _root_
|
||||||
item-1 at level 1: section: group header-0
|
item-1 at level 1: section: group header-0
|
||||||
item-2 at level 2: section_header: Test with tables
|
item-2 at level 2: section_header: Test with tables
|
||||||
item-3 at level 3: paragraph: A uniform table
|
item-3 at level 3: text: A uniform table
|
||||||
item-4 at level 3: table with [3x3]
|
item-4 at level 3: table with [3x3]
|
||||||
item-5 at level 3: paragraph:
|
item-5 at level 3: text:
|
||||||
item-6 at level 3: paragraph: A non-uniform table with horizontal spans
|
item-6 at level 3: text: A non-uniform table with horizontal spans
|
||||||
item-7 at level 3: table with [3x3]
|
item-7 at level 3: table with [3x3]
|
||||||
item-8 at level 3: paragraph:
|
item-8 at level 3: text:
|
||||||
item-9 at level 3: paragraph: A non-uniform table with horizontal spans in inner columns
|
item-9 at level 3: text: A non-uniform table with horizontal spans in inner columns
|
||||||
item-10 at level 3: table with [3x4]
|
item-10 at level 3: table with [3x4]
|
||||||
item-11 at level 3: paragraph:
|
item-11 at level 3: text:
|
||||||
item-12 at level 3: paragraph: A non-uniform table with vertical spans
|
item-12 at level 3: text: A non-uniform table with vertical spans
|
||||||
item-13 at level 3: table with [5x3]
|
item-13 at level 3: table with [5x3]
|
||||||
item-14 at level 3: paragraph:
|
item-14 at level 3: text:
|
||||||
item-15 at level 3: paragraph: A non-uniform table with all kinds of spans and empty cells
|
item-15 at level 3: text: A non-uniform table with all kinds of spans and empty cells
|
||||||
item-16 at level 3: table with [9x5]
|
item-16 at level 3: table with [9x5]
|
||||||
item-17 at level 3: paragraph:
|
item-17 at level 3: text:
|
||||||
item-18 at level 3: paragraph:
|
item-18 at level 3: text:
|
||||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user