Merge pull request #556 from DS4SD/cau/layout-processing-improvement

feat: layout processing improvements and bugfixes Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
2025-07-27 12:34:22 +00:00 · 2024-12-10 16:29:07 +01:00 · 2024-12-10 16:29:07 +01:00 · b66fb830c9
commit b66fb830c9
parent 184eed4095
52 changed files with 401 additions and 739 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,3 +1,14 @@
 ## [v2.10.0](https://github.com/DS4SD/docling/releases/tag/v2.10.0) - 2024-12-09
 ### Feature
 * Docling-parse v2 as default PDF backend ([#549](https://github.com/DS4SD/docling/issues/549)) ([`aca57f0`](https://github.com/DS4SD/docling/commit/aca57f0527dddcc027dc1ee840e2e492ab997170))
 ### Fix
 * Call into docling-core for legacy document transform ([#551](https://github.com/DS4SD/docling/issues/551)) ([`7972d47`](https://github.com/DS4SD/docling/commit/7972d47f88604f02d6a32527116c4d78eb1005e2))
 * Introduce Image format options in CLI. Silence the tqdm downloading messages. ([#544](https://github.com/DS4SD/docling/issues/544)) ([`78f61a8`](https://github.com/DS4SD/docling/commit/78f61a8522d3a19ecc1d605e8441fb543ca0fa96))
 ## [v2.9.0](https://github.com/DS4SD/docling/releases/tag/v2.9.0) - 2024-12-09
 ### Feature
--- a/docling/cli/main.py
+++ b/docling/cli/main.py
@ -29,8 +29,10 @@ from docling.datamodel.pipeline_options import (
    AcceleratorDevice,
    AcceleratorOptions,
    EasyOcrOptions,
    OcrEngine,
    OcrMacOptions,
    OcrOptions,
    PdfBackend,
    PdfPipelineOptions,
    RapidOcrOptions,
    TableFormerMode,
@ -70,22 +72,6 @@ def version_callback(value: bool):
        raise typer.Exit()
 # Define an enum for the backend options
 class PdfBackend(str, Enum):
    PYPDFIUM2 = "pypdfium2"
    DLPARSE_V1 = "dlparse_v1"
    DLPARSE_V2 = "dlparse_v2"
 # Define an enum for the ocr engines
 class OcrEngine(str, Enum):
    EASYOCR = "easyocr"
    TESSERACT_CLI = "tesseract_cli"
    TESSERACT = "tesseract"
    OCRMAC = "ocrmac"
    RAPIDOCR = "rapidocr"
 def export_documents(
    conv_results: Iterable[ConversionResult],
    output_dir: Path,
--- a/docling/datamodel/base_models.py
+++ b/docling/datamodel/base_models.py
@ -19,12 +19,12 @@ if TYPE_CHECKING:
 class ConversionStatus(str, Enum):
-    PENDING = auto()
+    PENDING = "pending"
-    STARTED = auto()
+    STARTED = "started"
-    FAILURE = auto()
+    FAILURE = "failure"
-    SUCCESS = auto()
+    SUCCESS = "success"
-    PARTIAL_SUCCESS = auto()
+    PARTIAL_SUCCESS = "partial_success"
-    SKIPPED = auto()
+    SKIPPED = "skipped"
 class InputFormat(str, Enum):
@ -89,15 +89,15 @@ MimeTypeToFormat = {
 class DocInputType(str, Enum):
-    PATH = auto()
+    PATH = "path"
-    STREAM = auto()
+    STREAM = "stream"
 class DoclingComponentType(str, Enum):
-    DOCUMENT_BACKEND = auto()
+    DOCUMENT_BACKEND = "document_backend"
-    MODEL = auto()
+    MODEL = "model"
-    DOC_ASSEMBLER = auto()
+    DOC_ASSEMBLER = "doc_assembler"
-    USER_INPUT = auto()
+    USER_INPUT = "user_input"
 class ErrorItem(BaseModel):
--- a/docling/datamodel/document.py
+++ b/docling/datamodel/document.py
@ -33,6 +33,7 @@ from docling_core.types.legacy_doc.document import (
 from docling_core.types.legacy_doc.document import CCSFileInfoObject as DsFileInfoObject
 from docling_core.types.legacy_doc.document import ExportedCCSDocument as DsDocument
 from docling_core.utils.file import resolve_source_to_stream
 from docling_core.utils.legacy import docling_document_to_legacy
 from pydantic import BaseModel
 from typing_extensions import deprecated
@ -191,259 +192,7 @@ class ConversionResult(BaseModel):
    @property
    @deprecated("Use document instead.")
    def legacy_document(self):
-        reverse_label_mapping = {
+        return docling_document_to_legacy(self.document)
            DocItemLabel.CAPTION.value: "Caption",
            DocItemLabel.FOOTNOTE.value: "Footnote",
            DocItemLabel.FORMULA.value: "Formula",
            DocItemLabel.LIST_ITEM.value: "List-item",
            DocItemLabel.PAGE_FOOTER.value: "Page-footer",
            DocItemLabel.PAGE_HEADER.value: "Page-header",
            DocItemLabel.PICTURE.value: "Picture",  # low threshold adjust to capture chemical structures for examples.
            DocItemLabel.SECTION_HEADER.value: "Section-header",
            DocItemLabel.TABLE.value: "Table",
            DocItemLabel.TEXT.value: "Text",
            DocItemLabel.TITLE.value: "Title",
            DocItemLabel.DOCUMENT_INDEX.value: "Document Index",
            DocItemLabel.CODE.value: "Code",
            DocItemLabel.CHECKBOX_SELECTED.value: "Checkbox-Selected",
            DocItemLabel.CHECKBOX_UNSELECTED.value: "Checkbox-Unselected",
            DocItemLabel.FORM.value: "Form",
            DocItemLabel.KEY_VALUE_REGION.value: "Key-Value Region",
            DocItemLabel.PARAGRAPH.value: "paragraph",
        }
        title = ""
        desc = DsDocumentDescription(logs=[])
        page_hashes = [
            PageReference(
                hash=create_hash(self.input.document_hash + ":" + str(p.page_no - 1)),
                page=p.page_no,
                model="default",
            )
            for p in self.document.pages.values()
        ]
        file_info = DsFileInfoObject(
            filename=self.input.file.name,
            document_hash=self.input.document_hash,
            num_pages=self.input.page_count,
            page_hashes=page_hashes,
        )
        main_text = []
        tables = []
        figures = []
        equations = []
        footnotes = []
        page_headers = []
        page_footers = []
        embedded_captions = set()
        for ix, (item, level) in enumerate(
            self.document.iterate_items(self.document.body)
        ):
            if isinstance(item, (TableItem, PictureItem)) and len(item.captions) > 0:
                caption = item.caption_text(self.document)
                if caption:
                    embedded_captions.add(caption)
        for item, level in self.document.iterate_items():
            if isinstance(item, DocItem):
                item_type = item.label
                if isinstance(item, (TextItem, ListItem, SectionHeaderItem)):
                    if isinstance(item, ListItem) and item.marker:
                        text = f"{item.marker} {item.text}"
                    else:
                        text = item.text
                    # Can be empty.
                    prov = [
                        Prov(
                            bbox=p.bbox.as_tuple(),
                            page=p.page_no,
                            span=[0, len(item.text)],
                        )
                        for p in item.prov
                    ]
                    main_text.append(
                        BaseText(
                            text=text,
                            obj_type=layout_label_to_ds_type.get(item.label),
                            name=reverse_label_mapping[item.label],
                            prov=prov,
                        )
                    )
                    # skip captions of they are embedded in the actual
                    # floating object
                    if item_type == DocItemLabel.CAPTION and text in embedded_captions:
                        continue
                elif isinstance(item, TableItem) and item.data:
                    index = len(tables)
                    ref_str = f"#/tables/{index}"
                    main_text.append(
                        Ref(
                            name=reverse_label_mapping[item.label],
                            obj_type=layout_label_to_ds_type.get(item.label),
                            ref=ref_str,
                        ),
                    )
                    # Initialise empty table data grid (only empty cells)
                    table_data = [
                        [
                            TableCell(
                                text="",
                                # bbox=[0,0,0,0],
                                spans=[[i, j]],
                                obj_type="body",
                            )
                            for j in range(item.data.num_cols)
                        ]
                        for i in range(item.data.num_rows)
                    ]
                    # Overwrite cells in table data for which there is actual cell content.
                    for cell in item.data.table_cells:
                        for i in range(
                            min(cell.start_row_offset_idx, item.data.num_rows),
                            min(cell.end_row_offset_idx, item.data.num_rows),
                        ):
                            for j in range(
                                min(cell.start_col_offset_idx, item.data.num_cols),
                                min(cell.end_col_offset_idx, item.data.num_cols),
                            ):
                                celltype = "body"
                                if cell.column_header:
                                    celltype = "col_header"
                                elif cell.row_header:
                                    celltype = "row_header"
                                elif cell.row_section:
                                    celltype = "row_section"
                                def make_spans(cell):
                                    for rspan in range(
                                        min(
                                            cell.start_row_offset_idx,
                                            item.data.num_rows,
                                        ),
                                        min(
                                            cell.end_row_offset_idx, item.data.num_rows
                                        ),
                                    ):
                                        for cspan in range(
                                            min(
                                                cell.start_col_offset_idx,
                                                item.data.num_cols,
                                            ),
                                            min(
                                                cell.end_col_offset_idx,
                                                item.data.num_cols,
                                            ),
                                        ):
                                            yield [rspan, cspan]
                                spans = list(make_spans(cell))
                                table_data[i][j] = GlmTableCell(
                                    text=cell.text,
                                    bbox=(
                                        cell.bbox.as_tuple()
                                        if cell.bbox is not None
                                        else None
                                    ),  # check if this is bottom-left
                                    spans=spans,
                                    obj_type=celltype,
                                    col=j,
                                    row=i,
                                    row_header=cell.row_header,
                                    row_section=cell.row_section,
                                    col_header=cell.column_header,
                                    row_span=[
                                        cell.start_row_offset_idx,
                                        cell.end_row_offset_idx,
                                    ],
                                    col_span=[
                                        cell.start_col_offset_idx,
                                        cell.end_col_offset_idx,
                                    ],
                                )
                    # Compute the caption
                    caption = item.caption_text(self.document)
                    tables.append(
                        DsSchemaTable(
                            text=caption,
                            num_cols=item.data.num_cols,
                            num_rows=item.data.num_rows,
                            obj_type=layout_label_to_ds_type.get(item.label),
                            data=table_data,
                            prov=[
                                Prov(
                                    bbox=p.bbox.as_tuple(),
                                    page=p.page_no,
                                    span=[0, 0],
                                )
                                for p in item.prov
                            ],
                        )
                    )
                elif isinstance(item, PictureItem):
                    index = len(figures)
                    ref_str = f"#/figures/{index}"
                    main_text.append(
                        Ref(
                            name=reverse_label_mapping[item.label],
                            obj_type=layout_label_to_ds_type.get(item.label),
                            ref=ref_str,
                        ),
                    )
                    # Compute the caption
                    caption = item.caption_text(self.document)
                    figures.append(
                        Figure(
                            prov=[
                                Prov(
                                    bbox=p.bbox.as_tuple(),
                                    page=p.page_no,
                                    span=[0, len(caption)],
                                )
                                for p in item.prov
                            ],
                            obj_type=layout_label_to_ds_type.get(item.label),
                            text=caption,
                            # data=[[]],
                        )
                    )
        page_dimensions = [
            PageDimensions(page=p.page_no, height=p.size.height, width=p.size.width)
            for p in self.document.pages.values()
        ]
        ds_doc = DsDocument(
            name=title,
            description=desc,
            file_info=file_info,
            main_text=main_text,
            equations=equations,
            footnotes=footnotes,
            page_headers=page_headers,
            page_footers=page_footers,
            tables=tables,
            figures=figures,
            page_dimensions=page_dimensions,
        )
        return ds_doc
 class _DummyBackend(AbstractDocumentBackend):
--- a/docling/datamodel/pipeline_options.py
+++ b/docling/datamodel/pipeline_options.py
@ -190,6 +190,26 @@ class OcrMacOptions(OcrOptions):
    )
 # Define an enum for the backend options
 class PdfBackend(str, Enum):
    """Enum of valid PDF backends."""
    PYPDFIUM2 = "pypdfium2"
    DLPARSE_V1 = "dlparse_v1"
    DLPARSE_V2 = "dlparse_v2"
 # Define an enum for the ocr engines
 class OcrEngine(str, Enum):
    """Enum of valid OCR engines."""
    EASYOCR = "easyocr"
    TESSERACT_CLI = "tesseract_cli"
    TESSERACT = "tesseract"
    OCRMAC = "ocrmac"
    RAPIDOCR = "rapidocr"
 class PipelineOptions(BaseModel):
    """Base pipeline options."""
--- a/docling/models/layout_model.py
+++ b/docling/models/layout_model.py
@ -80,7 +80,7 @@ class LayoutModel(BasePageModel):
            DocItemLabel.TITLE: (255, 153, 153),  # Light Red (same as Section-Header)
            DocItemLabel.FOOTNOTE: (200, 200, 255),  # Light Blue
            DocItemLabel.DOCUMENT_INDEX: (220, 220, 220),  # Light Gray
-            DocItemLabel.CODE: (255, 223, 186),  # Peach
+            DocItemLabel.CODE: (125, 125, 125),  # Gray
            DocItemLabel.CHECKBOX_SELECTED: (255, 182, 193),  # Pale Green
            DocItemLabel.CHECKBOX_UNSELECTED: (255, 182, 193),  # Light Pink
            DocItemLabel.FORM: (200, 255, 255),  # Light Cyan
--- a/docling/models/rapid_ocr_model.py
+++ b/docling/models/rapid_ocr_model.py
@ -97,24 +97,25 @@ class RapidOcrModel(BaseOcrModel):
                        del high_res_image
                        del im
-                        cells = [
+                        if result is not None:
-                            OcrCell(
+                            cells = [
-                                id=ix,
+                                OcrCell(
-                                text=line[1],
+                                    id=ix,
-                                confidence=line[2],
+                                    text=line[1],
-                                bbox=BoundingBox.from_tuple(
+                                    confidence=line[2],
-                                    coord=(
+                                    bbox=BoundingBox.from_tuple(
-                                        (line[0][0][0] / self.scale) + ocr_rect.l,
+                                        coord=(
-                                        (line[0][0][1] / self.scale) + ocr_rect.t,
+                                            (line[0][0][0] / self.scale) + ocr_rect.l,
-                                        (line[0][2][0] / self.scale) + ocr_rect.l,
+                                            (line[0][0][1] / self.scale) + ocr_rect.t,
-                                        (line[0][2][1] / self.scale) + ocr_rect.t,
+                                            (line[0][2][0] / self.scale) + ocr_rect.l,
                                            (line[0][2][1] / self.scale) + ocr_rect.t,
                                        ),
                                        origin=CoordOrigin.TOPLEFT,
                                    ),
-                                    origin=CoordOrigin.TOPLEFT,
+                                )
-                                ),
+                                for ix, line in enumerate(result)
-                            )
+                            ]
-                            for ix, line in enumerate(result)
+                            all_ocr_cells.extend(cells)
                        ]
                        all_ocr_cells.extend(cells)
                    # Post-process the cells
                    page.cells = self.post_process_cells(all_ocr_cells, page.cells)
--- a/docling/models/table_structure_model.py
+++ b/docling/models/table_structure_model.py
@ -71,6 +71,10 @@ class TableStructureModel(BasePageModel):
            x0, y0, x1, y1 = table_element.cluster.bbox.as_tuple()
            draw.rectangle([(x0, y0), (x1, y1)], outline="red")
            for cell in table_element.cluster.cells:
                x0, y0, x1, y1 = cell.bbox.as_tuple()
                draw.rectangle([(x0, y0), (x1, y1)], outline="green")
            for tc in table_element.table_cells:
                if tc.bbox is not None:
                    x0, y0, x1, y1 = tc.bbox.as_tuple()
@ -84,7 +88,6 @@ class TableStructureModel(BasePageModel):
                        text=f"{tc.start_row_offset_idx}, {tc.start_col_offset_idx}",
                        fill="black",
                    )
        if show:
            image.show()
        else:
@ -136,41 +139,33 @@ class TableStructureModel(BasePageModel):
                        yield page
                        continue
                    tokens = []
                    for c in page.cells:
                        for cluster, _ in in_tables:
                            if c.bbox.area() > 0:
                                if (
                                    c.bbox.intersection_area_with(cluster.bbox)
                                    / c.bbox.area()
                                    > 0.2
                                ):
                                    # Only allow non empty stings (spaces) into the cells of a table
                                    if len(c.text.strip()) > 0:
                                        new_cell = copy.deepcopy(c)
                                        new_cell.bbox = new_cell.bbox.scaled(
                                            scale=self.scale
                                        )
                                        tokens.append(new_cell.model_dump())
                    page_input = {
                        "tokens": tokens,
                        "width": page.size.width * self.scale,
                        "height": page.size.height * self.scale,
                        "image": numpy.asarray(page.get_image(scale=self.scale)),
                    }
                    page_input["image"] = numpy.asarray(
                        page.get_image(scale=self.scale)
                    )
                    table_clusters, table_bboxes = zip(*in_tables)
                    if len(table_bboxes):
-                        tf_output = self.tf_predictor.multi_table_predict(
+                        for table_cluster, tbl_box in in_tables:
                            page_input, table_bboxes, do_matching=self.do_cell_matching
                        )
-                        for table_cluster, table_out in zip(table_clusters, tf_output):
+                            tokens = []
                            for c in table_cluster.cells:
                                # Only allow non empty stings (spaces) into the cells of a table
                                if len(c.text.strip()) > 0:
                                    new_cell = copy.deepcopy(c)
                                    new_cell.bbox = new_cell.bbox.scaled(
                                        scale=self.scale
                                    )
                                    tokens.append(new_cell.model_dump())
                            page_input["tokens"] = tokens
                            tf_output = self.tf_predictor.multi_table_predict(
                                page_input, [tbl_box], do_matching=self.do_cell_matching
                            )
                            table_out = tf_output[0]
                            table_cells = []
                            for element in table_out["tf_responses"]:
--- a/docling/utils/layout_postprocessor.py
+++ b/docling/utils/layout_postprocessor.py
@ -156,16 +156,16 @@ class LayoutPostprocessor:
    SPECIAL_TYPES = WRAPPER_TYPES | {DocItemLabel.PICTURE}
    CONFIDENCE_THRESHOLDS = {
-        DocItemLabel.CAPTION: 0.35,
+        DocItemLabel.CAPTION: 0.5,
-        DocItemLabel.FOOTNOTE: 0.35,
+        DocItemLabel.FOOTNOTE: 0.5,
-        DocItemLabel.FORMULA: 0.35,
+        DocItemLabel.FORMULA: 0.5,
-        DocItemLabel.LIST_ITEM: 0.35,
+        DocItemLabel.LIST_ITEM: 0.5,
-        DocItemLabel.PAGE_FOOTER: 0.35,
+        DocItemLabel.PAGE_FOOTER: 0.5,
-        DocItemLabel.PAGE_HEADER: 0.35,
+        DocItemLabel.PAGE_HEADER: 0.5,
-        DocItemLabel.PICTURE: 0.1,
+        DocItemLabel.PICTURE: 0.5,
        DocItemLabel.SECTION_HEADER: 0.45,
        DocItemLabel.TABLE: 0.35,
-        DocItemLabel.TEXT: 0.45,
+        DocItemLabel.TEXT: 0.55,  # 0.45,
        DocItemLabel.TITLE: 0.45,
        DocItemLabel.CODE: 0.45,
        DocItemLabel.CHECKBOX_SELECTED: 0.45,
@ -218,6 +218,12 @@ class LayoutPostprocessor:
        final_clusters = self._sort_clusters(
            self.regular_clusters + self.special_clusters
        )
        for cluster in final_clusters:
            cluster.cells = self._sort_cells(cluster.cells)
            # Also sort cells in children if any
            for child in cluster.children:
                child.cells = self._sort_cells(child.cells)
        return final_clusters, self.cells
    def _process_regular_clusters(self) -> List[Cluster]:
@ -273,6 +279,8 @@ class LayoutPostprocessor:
            if c.confidence >= self.CONFIDENCE_THRESHOLDS[c.label]
        ]
        special_clusters = self._handle_cross_type_overlaps(special_clusters)
        for special in special_clusters:
            contained = []
            for cluster in self.regular_clusters:
@ -283,14 +291,17 @@ class LayoutPostprocessor:
                        contained.append(cluster)
            if contained:
-                # Sort contained clusters by minimum cell ID
+                # # Sort contained clusters by minimum cell ID:
-                contained.sort(
+                # contained.sort(
-                    key=lambda cluster: (
+                #     key=lambda cluster: (
-                        min(cell.id for cell in cluster.cells)
+                #         min(cell.id for cell in cluster.cells)
-                        if cluster.cells
+                #         if cluster.cells
-                        else sys.maxsize
+                #         else sys.maxsize
-                    )
+                #     )
-                )
+                # )
                # Sort contained clusters left-to-right, top-to-bottom
                contained = self._sort_clusters(contained)
                special.children = contained
                # Adjust bbox only for wrapper types
@ -318,6 +329,109 @@ class LayoutPostprocessor:
        return picture_clusters + wrapper_clusters
    def _handle_cross_type_overlaps(self, special_clusters) -> List[Cluster]:
        """Handle overlaps between regular and wrapper clusters before child assignment.
        In particular, KEY_VALUE_REGION proposals that are almost identical to a TABLE
        should be removed.
        """
        wrappers_to_remove = set()
        for wrapper in special_clusters:
            if wrapper.label != DocItemLabel.KEY_VALUE_REGION:
                continue  # only treat KEY_VALUE_REGION for now.
            for regular in self.regular_clusters:
                if regular.label == DocItemLabel.TABLE:
                    # Calculate overlap
                    overlap = regular.bbox.intersection_area_with(wrapper.bbox)
                    wrapper_area = wrapper.bbox.area()
                    overlap_ratio = overlap / wrapper_area
                    # If wrapper is mostly overlapping with a TABLE, remove the wrapper
                    if overlap_ratio > 0.8:  # 80% overlap threshold
                        wrappers_to_remove.add(wrapper.id)
                        break
        # Filter out the identified wrappers
        special_clusters = [
            cluster
            for cluster in special_clusters
            if cluster.id not in wrappers_to_remove
        ]
        return special_clusters
    def _should_prefer_cluster(
        self, candidate: Cluster, other: Cluster, params: dict
    ) -> bool:
        """Determine if candidate cluster should be preferred over other cluster based on rules.
        Returns True if candidate should be preferred, False if not."""
        # Rule 1: LIST_ITEM vs TEXT
        if (
            candidate.label == DocItemLabel.LIST_ITEM
            and other.label == DocItemLabel.TEXT
        ):
            # Check if areas are similar (within 20% of each other)
            area_ratio = candidate.bbox.area() / other.bbox.area()
            area_similarity = abs(1 - area_ratio) < 0.2
            if area_similarity:
                return True
        # Rule 2: CODE vs others
        if candidate.label == DocItemLabel.CODE:
            # Calculate how much of the other cluster is contained within the CODE cluster
            overlap = other.bbox.intersection_area_with(candidate.bbox)
            containment = overlap / other.bbox.area()
            if containment > 0.8:  # other is 80% contained within CODE
                return True
        # If no label-based rules matched, fall back to area/confidence thresholds
        area_ratio = candidate.bbox.area() / other.bbox.area()
        conf_diff = other.confidence - candidate.confidence
        if (
            area_ratio <= params["area_threshold"]
            and conf_diff > params["conf_threshold"]
        ):
            return False
        return True  # Default to keeping candidate if no rules triggered rejection
    def _select_best_cluster_from_group(
        self,
        group_clusters: List[Cluster],
        params: dict,
    ) -> Cluster:
        """Select best cluster from a group of overlapping clusters based on all rules."""
        current_best = None
        for candidate in group_clusters:
            should_select = True
            for other in group_clusters:
                if other == candidate:
                    continue
                if not self._should_prefer_cluster(candidate, other, params):
                    should_select = False
                    break
            if should_select:
                if current_best is None:
                    current_best = candidate
                else:
                    # If both clusters pass rules, prefer the larger one unless confidence differs significantly
                    if (
                        candidate.bbox.area() > current_best.bbox.area()
                        and current_best.confidence - candidate.confidence
                        <= params["conf_threshold"]
                    ):
                        current_best = candidate
        return current_best if current_best else group_clusters[0]
    def _remove_overlapping_clusters(
        self,
        clusters: List[Cluster],
@ -360,36 +474,15 @@ class LayoutPostprocessor:
                continue
            group_clusters = [valid_clusters[cid] for cid in group]
-            current_best = None
+            best = self._select_best_cluster_from_group(group_clusters, params)
-            for candidate in group_clusters:
+            # Simple cell merging - no special cases
                should_select = True
                for other in group_clusters:
                    if other == candidate:
                        continue
                    area_ratio = candidate.bbox.area() / other.bbox.area()
                    conf_diff = other.confidence - candidate.confidence
                    if (
                        area_ratio <= params["area_threshold"]
                        and conf_diff > params["conf_threshold"]
                    ):
                        should_select = False
                        break
                if should_select:
                    if current_best is None or (
                        candidate.bbox.area() > current_best.bbox.area()
                        and current_best.confidence - candidate.confidence
                        <= params["conf_threshold"]
                    ):
                        current_best = candidate
            best = current_best if current_best else group_clusters[0]
            for cluster in group_clusters:
                if cluster != best:
                    best.cells.extend(cluster.cells)
            best.cells = self._deduplicate_cells(best.cells)
            best.cells = self._sort_cells(best.cells)
            result.append(best)
        return result
@ -424,6 +517,16 @@ class LayoutPostprocessor:
        return current_best if current_best else clusters[0]
    def _deduplicate_cells(self, cells: List[Cell]) -> List[Cell]:
        """Ensure each cell appears only once, maintaining order of first appearance."""
        seen_ids = set()
        unique_cells = []
        for cell in cells:
            if cell.id not in seen_ids:
                seen_ids.add(cell.id)
                unique_cells.append(cell)
        return unique_cells
    def _assign_cells_to_clusters(
        self, clusters: List[Cluster], min_overlap: float = 0.2
    ) -> List[Cluster]:
@ -452,6 +555,10 @@ class LayoutPostprocessor:
            if best_cluster is not None:
                best_cluster.cells.append(cell)
        # Deduplicate cells in each cluster after assignment
        for cluster in clusters:
            cluster.cells = self._deduplicate_cells(cluster.cells)
        return clusters
    def _find_unassigned_cells(self, clusters: List[Cluster]) -> List[Cell]:
@ -487,13 +594,10 @@ class LayoutPostprocessor:
        return clusters
    def _sort_cells(self, cells: List[Cell]) -> List[Cell]:
        """Sort cells in native reading order."""
        return sorted(cells, key=lambda c: (c.id))
    def _sort_clusters(self, clusters: List[Cluster]) -> List[Cluster]:
        """Sort clusters in reading order (top-to-bottom, left-to-right)."""
-
+        return sorted(clusters, key=lambda cluster: (cluster.bbox.t, cluster.bbox.l))
        def reading_order_key(cluster: Cluster) -> Tuple[float, float]:
            if cluster.cells and cluster.label != DocItemLabel.PICTURE:
                first_cell = min(cluster.cells, key=lambda c: (c.bbox.t, c.bbox.l))
                return (first_cell.bbox.t, first_cell.bbox.l)
            return (cluster.bbox.t, cluster.bbox.l)
        return sorted(clusters, key=reading_order_key)
--- a/docs/concepts/architecture.md
+++ b/docs/concepts/architecture.md
@ -10,7 +10,7 @@ For each document format, the *document converter* knows which format-specific *
 The *conversion result* contains the [*Docling document*](./docling_document.md), Docling's fundamental document representation.
-Some typical scenarios for using a Docling document include directly calling its *export methods*, such as for markdown, dictionary etc., or having it chunked by a *chunker*.
+Some typical scenarios for using a Docling document include directly calling its *export methods*, such as for markdown, dictionary etc., or having it chunked by a [*chunker*](./chunking.md).
 For more details on Docling's architecture, check out the [Docling Technical Report](https://arxiv.org/abs/2408.09869).
--- a/docs/reference/cli.md
+++ b/docs/reference/cli.md
@ -1,4 +1,4 @@
-# CLI Reference
+# CLI reference
 This page provides documentation for our command line tools.
@ -6,4 +6,4 @@ This page provides documentation for our command line tools.
    :module: docling.cli.main
    :command: click_app
    :prog_name: docling
-    :style: table
+    :style: table
--- a/docs/api_reference/docling_document.md
+++ b/docs/api_reference/docling_document.md
--- a/docs/api_reference/document_converter.md
+++ b/docs/api_reference/document_converter.md
--- a/docs/api_reference/pipeline_options.md
+++ b/docs/api_reference/pipeline_options.md
--- a/docs/usage.md
+++ b/docs/usage.md
@ -22,9 +22,7 @@ A simple example would look like this:
 docling https://arxiv.org/pdf/2206.01062
 ```
-To see all available options (export formats etc.) run `docling --help`. More details in the [CLI reference page](./cli.md).
+To see all available options (export formats etc.) run `docling --help`. More details in the [CLI reference page](./reference/cli.md).
 ### Advanced options
@ -130,29 +128,37 @@ You can limit the CPU threads used by Docling by setting the environment variabl
 ## Chunking
-You can perform a hierarchy-aware chunking of a Docling document as follows:
+You can chunk a Docling document using a [chunker](concepts/chunking.md), such as a
 `HybridChunker`, as shown below (for more details check out
 [this example](examples/hybrid_chunking.ipynb)):
 ```python
 from docling.document_converter import DocumentConverter
-from docling_core.transforms.chunker import HierarchicalChunker
+from docling.chunking import HybridChunker
 conv_res = DocumentConverter().convert("https://arxiv.org/pdf/2206.01062")
 doc = conv_res.document
 chunks = list(HierarchicalChunker().chunk(doc))
-print(chunks[30])
+chunker = HybridChunker(tokenizer="BAAI/bge-small-en-v1.5")  # set tokenizer as needed
 chunk_iter = chunker.chunk(doc)
 ```
 An example chunk would look like this:
 ```python
 print(list(chunk_iter)[11])
 # {
-#   "text": "Lately, new types of ML models for document-layout analysis have emerged [...]",
+#   "text": "In this paper, we present the DocLayNet dataset. [...]",
 #   "meta": {
 #     "doc_items": [{
-#       "self_ref": "#/texts/40",
+#       "self_ref": "#/texts/28",
 #       "label": "text",
 #       "prov": [{
 #         "page_no": 2,
-#         "bbox": {"l": 317.06, "t": 325.81, "r": 559.18, "b": 239.97, ...},
+#         "bbox": {"l": 53.29, "t": 287.14, "r": 295.56, "b": 212.37, ...},
-#       }]
+#       }], ...,
-#     }],
+#     }, ...],
-#     "headings": ["2 RELATED WORK"],
+#     "headings": ["1 INTRODUCTION"],
 #   }
 # }
 ```
--- a/mkdocs.yml
+++ b/mkdocs.yml
@ -56,7 +56,6 @@ nav:
    - "Docling": index.md
    - Installation: installation.md
    - Usage: usage.md
    - CLI: cli.md
    - FAQ: faq.md
    - Docling v2: v2.md
  - Concepts:
@ -77,15 +76,12 @@ nav:
      - "Multimodal export": examples/export_multimodal.py
      - "Force full page OCR": examples/full_page_ocr.py
      - "Accelerator options": examples/run_with_acclerators.py
    - Chunking:
      - "Hybrid chunking": examples/hybrid_chunking.ipynb
    - RAG / QA:
      - "RAG with LlamaIndex 🦙": examples/rag_llamaindex.ipynb
      - "RAG with LangChain 🦜🔗": examples/rag_langchain.ipynb
      - "Hybrid RAG with Qdrant": examples/hybrid_rag_qdrant.ipynb
    - Chunking:
      - "Hybrid chunking": examples/hybrid_chunking.ipynb
    #   - Chunking: examples/chunking.md
    # - CLI:
    #   - CLI: examples/cli.md
  - Integrations:
    - Integrations: integrations/index.md
    - "🐝 Bee": integrations/bee.md
@ -100,10 +96,13 @@ nav:
    - "spaCy": integrations/spacy.md
    - "txtai": integrations/txtai.md
    # - "LangChain 🦜🔗": integrations/langchain.md
-  - API reference:
+  - Reference:
-    - Document Converter: api_reference/document_converter.md
+    - Python API:
-    - Pipeline options: api_reference/pipeline_options.md
+      - Document Converter: reference/document_converter.md
-    - Docling Document: api_reference/docling_document.md
+      - Pipeline options: reference/pipeline_options.md
      - Docling Document: reference/docling_document.md
    - CLI:
      - CLI reference: reference/cli.md
 markdown_extensions:
  - pymdownx.superfences
--- a/poetry.lock
+++ b/poetry.lock
@ -922,27 +922,29 @@ name = "docling-core"
 version = "2.9.0"
 description = "A python library to define and validate data types in Docling."
 optional = false
-python-versions = "<4.0,>=3.9"
+python-versions = "^3.9"
-files = [
+files = []
-    {file = "docling_core-2.9.0-py3-none-any.whl", hash = "sha256:b44b077db5d2ac8a900f30a15abe329c165b1f2eb7f1c90d1275c423c1c3d668"},
+develop = false
    {file = "docling_core-2.9.0.tar.gz", hash = "sha256:1bf12fe67ee4852330e9bac33fe62b45598ff885481e03a88fa8e1bf48252424"},
 ]
 [package.dependencies]
-jsonref = ">=1.1.0,<2.0.0"
+jsonref = "^1.1.0"
-jsonschema = ">=4.16.0,<5.0.0"
+jsonschema = "^4.16.0"
-pandas = ">=2.1.4,<3.0.0"
+pandas = "^2.1.4"
-pillow = ">=10.3.0,<11.0.0"
+pillow = "^10.3.0"
-pydantic = ">=2.6.0,<2.10.0 || >2.10.0,<2.10.1 || >2.10.1,<2.10.2 || >2.10.2,<3.0.0"
+pydantic = ">=2.6.0,<3.0.0,!=2.10.0,!=2.10.1,!=2.10.2"
 pyyaml = ">=5.1,<7.0.0"
-semchunk = {version = ">=2.2.0,<3.0.0", optional = true, markers = "extra == \"chunking\""}
+tabulate = "^0.9.0"
-tabulate = ">=0.9.0,<0.10.0"
+typing-extensions = "^4.12.2"
 transformers = {version = ">=4.34.0,<5.0.0", optional = true, markers = "extra == \"chunking\""}
 typing-extensions = ">=4.12.2,<5.0.0"
 [package.extras]
 chunking = ["semchunk (>=2.2.0,<3.0.0)", "transformers (>=4.34.0,<5.0.0)"]
 [package.source]
 type = "git"
 url = "ssh://git@github.com/DS4SD/docling-core.git"
 reference = "cau/include-picture-contents"
 resolved_reference = "012f8ac38a2ba7e77110b3f7ad57af2a984232e5"
 [[package]]
 name = "docling-ibm-models"
 version = "2.0.7"
@ -2855,32 +2857,6 @@ files = [
    {file = "more_itertools-10.5.0-py3-none-any.whl", hash = "sha256:037b0d3203ce90cca8ab1defbbdac29d5f993fc20131f3664dc8d6acfa872aef"},
 ]
 [[package]]
 name = "mpire"
 version = "2.10.2"
 description = "A Python package for easy multiprocessing, but faster than multiprocessing"
 optional = false
 python-versions = "*"
 files = [
    {file = "mpire-2.10.2-py3-none-any.whl", hash = "sha256:d627707f7a8d02aa4c7f7d59de399dec5290945ddf7fbd36cbb1d6ebb37a51fb"},
    {file = "mpire-2.10.2.tar.gz", hash = "sha256:f66a321e93fadff34585a4bfa05e95bd946cf714b442f51c529038eb45773d97"},
 ]
 [package.dependencies]
 multiprocess = [
    {version = "*", optional = true, markers = "python_version < \"3.11\" and extra == \"dill\""},
    {version = ">=0.70.15", optional = true, markers = "python_version >= \"3.11\" and extra == \"dill\""},
 ]
 pygments = ">=2.0"
 pywin32 = {version = ">=301", markers = "platform_system == \"Windows\""}
 tqdm = ">=4.27"
 [package.extras]
 dashboard = ["flask"]
 dill = ["multiprocess", "multiprocess (>=0.70.15)"]
 docs = ["docutils (==0.17.1)", "sphinx (==3.2.1)", "sphinx-autodoc-typehints (==1.11.0)", "sphinx-rtd-theme (==0.5.0)", "sphinx-versions (==1.0.1)", "sphinxcontrib-images (==0.9.2)"]
 testing = ["ipywidgets", "multiprocess", "multiprocess (>=0.70.15)", "numpy", "pywin32 (>=301)", "rich"]
 [[package]]
 name = "mpmath"
 version = "1.3.0"
@ -6170,21 +6146,6 @@ files = [
 cryptography = ">=2.0"
 jeepney = ">=0.6"
 [[package]]
 name = "semchunk"
 version = "2.2.0"
 description = "A fast and lightweight Python library for splitting text into semantically meaningful chunks."
 optional = false
 python-versions = ">=3.9"
 files = [
    {file = "semchunk-2.2.0-py3-none-any.whl", hash = "sha256:7db19ca90ddb48f99265e789e07a7bb111ae25185f9cc3d44b94e1e61b9067fc"},
    {file = "semchunk-2.2.0.tar.gz", hash = "sha256:4de761ce614036fa3bea61adbe47e3ade7c96ac9b062f223b3ac353dbfd26743"},
 ]
 [package.dependencies]
 mpire = {version = "*", extras = ["dill"]}
 tqdm = "*"
 [[package]]
 name = "semver"
 version = "2.13.0"
@ -7723,4 +7684,4 @@ tesserocr = ["tesserocr"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9"
-content-hash = "6917af8d76aa1f85a159f0ab9546478b4bef194ae726c79196bac087c7368fef"
+content-hash = "c991515ef231d9eeead33cc876e8cb93fe31e949a5ab92918a4b77257d2700a3"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "docling"
-version = "2.9.0"  # DO NOT EDIT, updated automatically
+version = "2.10.0"  # DO NOT EDIT, updated automatically
 description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications."
 authors = ["Christoph Auer <cau@zurich.ibm.com>", "Michele Dolfi <dol@zurich.ibm.com>", "Maxim Lysak <mly@zurich.ibm.com>", "Nikos Livathinos <nli@zurich.ibm.com>", "Ahmed Nassar <ahn@zurich.ibm.com>", "Panos Vagenas <pva@zurich.ibm.com>", "Peter Staar <taa@zurich.ibm.com>"]
 license = "MIT"
@ -28,7 +28,8 @@ python = "^3.9"
 docling-ibm-models = { git = "ssh://git@github.com/DS4SD/docling-ibm-models.git", branch = "nli/performance" }
 deepsearch-glm = "^1.0.0"
 docling-parse = "^3.0.0"
-docling-core = { version = "^2.9.0", extras = ["chunking"] }
+#docling-core = { version = "^2.9.0", extras = ["chunking"] }
 docling-core = { git = "ssh://git@github.com/DS4SD/docling-core.git", branch = "cau/include-picture-contents" }
 pydantic = "^2.0.0"
 filetype = "^1.2.0"
 pypdfium2 = "^4.30.0"
--- a/tests/data/groundtruth/docling_v1/2203.01017v2.doctags.txt
+++ b/tests/data/groundtruth/docling_v1/2203.01017v2.doctags.txt
@ -153,41 +153,20 @@
 </table>
 <paragraph><location><page_8><loc_9><loc_89><loc_10><loc_90></location>- a.</paragraph>
 <paragraph><location><page_8><loc_11><loc_89><loc_82><loc_90></location>- Red - PDF cells, Green - predicted bounding boxes, Blue - post-processed predictions matched to PDF cells</paragraph>
-<paragraph><location><page_8><loc_9><loc_87><loc_46><loc_88></location>Japanese language (previously unseen by TableFormer):</paragraph>
+<caption><location><page_8><loc_9><loc_87><loc_70><loc_88></location>Japanese language (previously unseen by TableFormer): Example table from FinTabNet:</caption>
 <paragraph><location><page_8><loc_50><loc_87><loc_70><loc_88></location>Example table from FinTabNet:</paragraph>
 <figure>
 <location><page_8><loc_8><loc_76><loc_49><loc_87></location>
 </figure>
 <caption><location><page_8><loc_9><loc_73><loc_63><loc_74></location>b. Structure predicted by TableFormer, with superimposed matched PDF cell text:</caption>
 <figure>
-<location><page_8><loc_50><loc_77><loc_91><loc_88></location>
+<location><page_8><loc_8><loc_76><loc_49><loc_87></location>
-<caption>b. Structure predicted by TableFormer, with superimposed matched PDF cell text:</caption>
+<caption>Japanese language (previously unseen by TableFormer): Example table from FinTabNet:b. Structure predicted by TableFormer, with superimposed matched PDF cell text:</caption>
 </figure>
-<table>
+<figure>
 <location><page_8><loc_9><loc_63><loc_49><loc_72></location>
-<row_0><col_0><body></col_0><col_1><body></col_1><col_2><col_header>論文ファイル</col_2><col_3><col_header>論文ファイル</col_3><col_4><col_header>参考文献</col_4><col_5><col_header>参考文献</col_5></row_0>
+</figure>
 <row_1><col_0><col_header>出典</col_0><col_1><col_header>ファイル 数</col_1><col_2><col_header>英語</col_2><col_3><col_header>日本語</col_3><col_4><col_header>英語</col_4><col_5><col_header>日本語</col_5></row_1>
 <row_2><col_0><row_header>Association for Computational Linguistics(ACL2003)</col_0><col_1><body>65</col_1><col_2><body>65</col_2><col_3><body>0</col_3><col_4><body>150</col_4><col_5><body>0</col_5></row_2>
 <row_3><col_0><row_header>Computational Linguistics(COLING2002)</col_0><col_1><body>140</col_1><col_2><body>140</col_2><col_3><body>0</col_3><col_4><body>150</col_4><col_5><body>0</col_5></row_3>
 <row_4><col_0><row_header>電気情報通信学会 2003 年総合大会</col_0><col_1><body>150</col_1><col_2><body>8</col_2><col_3><body>142</col_3><col_4><body>223</col_4><col_5><body>147</col_5></row_4>
 <row_5><col_0><row_header>情報処理学会第 65 回全国大会 (2003)</col_0><col_1><body>177</col_1><col_2><body>1</col_2><col_3><body>176</col_3><col_4><body>150</col_4><col_5><body>236</col_5></row_5>
 <row_6><col_0><row_header>第 17 回人工知能学会全国大会 (2003)</col_0><col_1><body>208</col_1><col_2><body>5</col_2><col_3><body>203</col_3><col_4><body>152</col_4><col_5><body>244</col_5></row_6>
 <row_7><col_0><row_header>自然言語処理研究会第 146 〜 155 回</col_0><col_1><body>98</col_1><col_2><body>2</col_2><col_3><body>96</col_3><col_4><body>150</col_4><col_5><body>232</col_5></row_7>
 <row_8><col_0><row_header>WWW から収集した論文</col_0><col_1><body>107</col_1><col_2><body>73</col_2><col_3><body>34</col_3><col_4><body>147</col_4><col_5><body>96</col_5></row_8>
 <row_9><col_0><body></col_0><col_1><body>945</col_1><col_2><body>294</col_2><col_3><body>651</col_3><col_4><body>1122</col_4><col_5><body>955</col_5></row_9>
 </table>
 <caption><location><page_8><loc_62><loc_62><loc_90><loc_63></location>Text is aligned to match original for ease of viewing</caption>
-<table>
+<figure>
 <location><page_8><loc_50><loc_64><loc_90><loc_72></location>
 <caption>Text is aligned to match original for ease of viewing</caption>
-<row_0><col_0><body></col_0><col_1><col_header>Shares (in millions)</col_1><col_2><col_header>Shares (in millions)</col_2><col_3><col_header>Weighted Average Grant Date Fair Value</col_3><col_4><col_header>Weighted Average Grant Date Fair Value</col_4></row_0>
+</figure>
 <row_1><col_0><body></col_0><col_1><col_header>RS U s</col_1><col_2><col_header>PSUs</col_2><col_3><col_header>RSUs</col_3><col_4><col_header>PSUs</col_4></row_1>
 <row_2><col_0><row_header>Nonvested on Janua ry 1</col_0><col_1><body>1. 1</col_1><col_2><body>0.3</col_2><col_3><body>90.10 $</col_3><col_4><body>$ 91.19</col_4></row_2>
 <row_3><col_0><row_header>Granted</col_0><col_1><body>0. 5</col_1><col_2><body>0.1</col_2><col_3><body>117.44</col_3><col_4><body>122.41</col_4></row_3>
 <row_4><col_0><row_header>Vested</col_0><col_1><body>(0. 5 )</col_1><col_2><body>(0.1)</col_2><col_3><body>87.08</col_3><col_4><body>81.14</col_4></row_4>
 <row_5><col_0><row_header>Canceled or forfeited</col_0><col_1><body>(0. 1 )</col_1><col_2><body>-</col_2><col_3><body>102.01</col_3><col_4><body>92.18</col_4></row_5>
 <row_6><col_0><row_header>Nonvested on December 31</col_0><col_1><body>1.0</col_1><col_2><body>0.3</col_2><col_3><body>104.85 $</col_3><col_4><body>$ 104.51</col_4></row_6>
 </table>
 <caption><location><page_8><loc_8><loc_54><loc_89><loc_59></location>Figure 5: One of the benefits of TableFormer is that it is language agnostic, as an example, the left part of the illustration demonstrates TableFormer predictions on previously unseen language (Japanese). Additionally, we see that TableFormer is robust to variability in style and content, right side of the illustration shows the example of the TableFormer prediction from the FinTabNet dataset.</caption>
 <figure>
 <location><page_8><loc_8><loc_44><loc_35><loc_52></location>
@ -296,7 +275,7 @@
 <paragraph><location><page_13><loc_10><loc_35><loc_45><loc_37></location>Figure 8: Example of a table with multi-line header.</paragraph>
 <caption><location><page_13><loc_50><loc_59><loc_89><loc_61></location>Figure 9: Example of a table with big empty distance between cells.</caption>
 <figure>
-<location><page_13><loc_51><loc_63><loc_70><loc_68></location>
+<location><page_13><loc_51><loc_63><loc_91><loc_87></location>
 <caption>Figure 9: Example of a table with big empty distance between cells.</caption>
 </figure>
 <caption><location><page_13><loc_51><loc_13><loc_89><loc_14></location>Figure 10: Example of a complex table with empty cells.</caption>
@ -319,7 +298,11 @@
 <location><page_14><loc_52><loc_55><loc_87><loc_89></location>
 <caption>Figure 13: Table predictions example on colorful table.</caption>
 </figure>
-<paragraph><location><page_14><loc_56><loc_13><loc_83><loc_14></location>Figure 14: Example with multi-line text.</paragraph>
+<caption><location><page_14><loc_56><loc_13><loc_83><loc_14></location>Figure 14: Example with multi-line text.</caption>
 <figure>
 <location><page_14><loc_52><loc_25><loc_85><loc_31></location>
 <caption>Figure 14: Example with multi-line text.</caption>
 </figure>
 <figure>
 <location><page_15><loc_9><loc_69><loc_46><loc_83></location>
 </figure>
@ -335,6 +318,9 @@
 <caption>Figure 15: Example with triangular table.</caption>
 </figure>
 <figure>
 <location><page_15><loc_53><loc_72><loc_86><loc_85></location>
 </figure>
 <figure>
 <location><page_15><loc_53><loc_41><loc_86><loc_54></location>
 </figure>
 <caption><location><page_15><loc_50><loc_15><loc_89><loc_18></location>Figure 16: Example of how post-processing helps to restore mis-aligned bounding boxes prediction artifact.</caption>
--- a/tests/data/groundtruth/docling_v1/2203.01017v2.json
+++ b/tests/data/groundtruth/docling_v1/2203.01017v2.json
--- a/tests/data/groundtruth/docling_v1/2203.01017v2.md
+++ b/tests/data/groundtruth/docling_v1/2203.01017v2.md
@ -219,40 +219,18 @@ Table 4: Results of structure with content retrieved using cell detection on Pub
 - Red - PDF cells, Green - predicted bounding boxes, Blue - post-processed predictions matched to PDF cells
-Japanese language (previously unseen by TableFormer):
+Japanese language (previously unseen by TableFormer): Example table from FinTabNet:
 Example table from FinTabNet:
 <!-- image -->
 b. Structure predicted by TableFormer, with superimposed matched PDF cell text:
 Japanese language (previously unseen by TableFormer): Example table from FinTabNet:b. Structure predicted by TableFormer, with superimposed matched PDF cell text:
 <!-- image -->
-
+<!-- image -->
 |                                                    |             | 論文ファイル   | 論文ファイル   | 参考文献   | 参考文献   |
 |----------------------------------------------------|-------------|----------------|----------------|------------|------------|
 | 出典                                               | ファイル 数 | 英語           | 日本語         | 英語       | 日本語     |
 | Association for Computational Linguistics(ACL2003) | 65          | 65             | 0              | 150        | 0          |
 | Computational Linguistics(COLING2002)              | 140         | 140            | 0              | 150        | 0          |
 | 電気情報通信学会 2003 年総合大会                   | 150         | 8              | 142            | 223        | 147        |
 | 情報処理学会第 65 回全国大会 (2003)                | 177         | 1              | 176            | 150        | 236        |
 | 第 17 回人工知能学会全国大会 (2003)                | 208         | 5              | 203            | 152        | 244        |
 | 自然言語処理研究会第 146 〜 155 回                 | 98          | 2              | 96             | 150        | 232        |
 | WWW から収集した論文                               | 107         | 73             | 34             | 147        | 96         |
 |                                                    | 945         | 294            | 651            | 1122       | 955        |
 Text is aligned to match original for ease of viewing
-
+<!-- image -->
 |                          | Shares (in millions)   | Shares (in millions)   | Weighted Average Grant Date Fair Value   | Weighted Average Grant Date Fair Value   |
 |--------------------------|------------------------|------------------------|------------------------------------------|------------------------------------------|
 |                          | RS U s                 | PSUs                   | RSUs                                     | PSUs                                     |
 | Nonvested on Janua ry 1  | 1. 1                   | 0.3                    | 90.10 $                                  | $ 91.19                                  |
 | Granted                  | 0. 5                   | 0.1                    | 117.44                                   | 122.41                                   |
 | Vested                   | (0. 5 )                | (0.1)                  | 87.08                                    | 81.14                                    |
 | Canceled or forfeited    | (0. 1 )                | -                      | 102.01                                   | 92.18                                    |
 | Nonvested on December 31 | 1.0                    | 0.3                    | 104.85 $                                 | $ 104.51                                 |
 Figure 5: One of the benefits of TableFormer is that it is language agnostic, as an example, the left part of the illustration demonstrates TableFormer predictions on previously unseen language (Japanese). Additionally, we see that TableFormer is robust to variability in style and content, right side of the illustration shows the example of the TableFormer prediction from the FinTabNet dataset.
 <!-- image -->
@ -458,6 +436,7 @@ Figure 13: Table predictions example on colorful table.
 <!-- image -->
 Figure 14: Example with multi-line text.
 <!-- image -->
 <!-- image -->
@ -472,6 +451,9 @@ Figure 15: Example with triangular table.
 <!-- image -->
 <!-- image -->
 <!-- image -->
 Figure 16: Example of how post-processing helps to restore mis-aligned bounding boxes prediction artifact.
--- a/tests/data/groundtruth/docling_v1/2203.01017v2.pages.json
+++ b/tests/data/groundtruth/docling_v1/2203.01017v2.pages.json
--- a/tests/data/groundtruth/docling_v1/2206.01062.doctags.txt
+++ b/tests/data/groundtruth/docling_v1/2206.01062.doctags.txt
@ -3,17 +3,16 @@
 <paragraph><location><page_1><loc_15><loc_77><loc_32><loc_83></location>Birgit Pfitzmann IBM Research Rueschlikon, Switzerland bpf@zurich.ibm.com</paragraph>
 <paragraph><location><page_1><loc_42><loc_77><loc_58><loc_83></location>Christoph Auer IBM Research Rueschlikon, Switzerland cau@zurich.ibm.com</paragraph>
 <paragraph><location><page_1><loc_69><loc_77><loc_85><loc_83></location>Michele Dolfi IBM Research Rueschlikon, Switzerland dol@zurich.ibm.com</paragraph>
-<paragraph><location><page_1><loc_28><loc_70><loc_45><loc_76></location>Ahmed S. Nassar IBM Research Rueschlikon, Switzerland ahn@zurich.ibm.com</paragraph>
+<paragraph><location><page_1><loc_28><loc_71><loc_45><loc_76></location>Ahmed S. Nassar IBM Research Rueschlikon, Switzerland</paragraph>
-<paragraph><location><page_1><loc_55><loc_70><loc_72><loc_76></location>Peter Staar IBM Research Rueschlikon, Switzerland taa@zurich.ibm.com</paragraph>
+<paragraph><location><page_1><loc_29><loc_70><loc_44><loc_71></location>ahn@zurich.ibm.com</paragraph>
 <subtitle-level-1><location><page_1><loc_9><loc_67><loc_18><loc_69></location>ABSTRACT</subtitle-level-1>
 <paragraph><location><page_1><loc_9><loc_33><loc_48><loc_67></location>Accurate document layout analysis is a key requirement for highquality PDF document conversion. With the recent availability of public, large ground-truth datasets such as PubLayNet and DocBank, deep-learning models have proven to be very effective at layout detection and segmentation. While these datasets are of adequate size to train such models, they severely lack in layout variability since they are sourced from scientific article repositories such as PubMed and arXiv only. Consequently, the accuracy of the layout segmentation drops significantly when these models are applied on more challenging and diverse layouts. In this paper, we present DocLayNet , a new, publicly available, document-layout annotation dataset in COCO format. It contains 80863 manually annotated pages from diverse data sources to represent a wide variability in layouts. For each PDF page, the layout annotations provide labelled bounding-boxes with a choice of 11 distinct classes. DocLayNet also provides a subset of double- and triple-annotated pages to determine the inter-annotator agreement. In multiple experiments, we provide baseline accuracy scores (in mAP) for a set of popular object detection models. We also demonstrate that these models fall approximately 10% behind the inter-annotator agreement. Furthermore, we provide evidence that DocLayNet is of sufficient size. Lastly, we compare models trained on PubLayNet, DocBank and DocLayNet, showing that layout predictions of the DocLayNettrained models are more robust and thus the preferred choice for general-purpose document-layout analysis.</paragraph>
 <subtitle-level-1><location><page_1><loc_9><loc_29><loc_22><loc_30></location>CCS CONCEPTS</subtitle-level-1>
 <paragraph><location><page_1><loc_9><loc_25><loc_49><loc_29></location>· Information systems → Document structure ; · Applied computing → Document analysis ; · Computing methodologies → Machine learning ; Computer vision ; Object detection ;</paragraph>
 <paragraph><location><page_1><loc_9><loc_15><loc_48><loc_20></location>Permission to make digital or hard copies of part or all of this work for personal or classroom use is granted without fee provided that copies are not made or distributed for profit or commercial advantage and that copies bear this notice and the full citation on the first page. Copyrights for third-party components of this work must be honored. For all other uses, contact the owner/author(s).</paragraph>
-<paragraph><location><page_1><loc_9><loc_14><loc_32><loc_15></location>KDD '22, August 14-18, 2022, Washington, DC, USA</paragraph>
+<paragraph><location><page_1><loc_9><loc_12><loc_32><loc_15></location>KDD '22, August 14-18, 2022, Washington, DC, USA © 2022 Copyright held by the owner/author(s). ACM ISBN 978-1-4503-9385-0/22/08.</paragraph>
 <paragraph><location><page_1><loc_9><loc_13><loc_31><loc_14></location>© 2022 Copyright held by the owner/author(s).</paragraph>
 <paragraph><location><page_1><loc_9><loc_12><loc_26><loc_13></location>ACM ISBN 978-1-4503-9385-0/22/08.</paragraph>
 <paragraph><location><page_1><loc_9><loc_11><loc_27><loc_12></location>https://doi.org/10.1145/3534678.3539043</paragraph>
 <paragraph><location><page_1><loc_55><loc_70><loc_72><loc_76></location>Peter Staar IBM Research Rueschlikon, Switzerland taa@zurich.ibm.com</paragraph>
 <caption><location><page_1><loc_52><loc_29><loc_91><loc_32></location>Figure 1: Four examples of complex page layouts across different document categories</caption>
 <figure>
 <location><page_1><loc_53><loc_34><loc_90><loc_68></location>
--- a/tests/data/groundtruth/docling_v1/2206.01062.json
+++ b/tests/data/groundtruth/docling_v1/2206.01062.json
--- a/tests/data/groundtruth/docling_v1/2206.01062.md
+++ b/tests/data/groundtruth/docling_v1/2206.01062.md
@ -6,9 +6,9 @@ Christoph Auer IBM Research Rueschlikon, Switzerland cau@zurich.ibm.com
 Michele Dolfi IBM Research Rueschlikon, Switzerland dol@zurich.ibm.com
-Ahmed S. Nassar IBM Research Rueschlikon, Switzerland ahn@zurich.ibm.com
+Ahmed S. Nassar IBM Research Rueschlikon, Switzerland
-Peter Staar IBM Research Rueschlikon, Switzerland taa@zurich.ibm.com
+ahn@zurich.ibm.com
 ## ABSTRACT
@ -20,14 +20,12 @@ Accurate document layout analysis is a key requirement for highquality PDF docum
 Permission to make digital or hard copies of part or all of this work for personal or classroom use is granted without fee provided that copies are not made or distributed for profit or commercial advantage and that copies bear this notice and the full citation on the first page. Copyrights for third-party components of this work must be honored. For all other uses, contact the owner/author(s).
-KDD '22, August 14-18, 2022, Washington, DC, USA
+KDD '22, August 14-18, 2022, Washington, DC, USA © 2022 Copyright held by the owner/author(s). ACM ISBN 978-1-4503-9385-0/22/08.
 © 2022 Copyright held by the owner/author(s).
 ACM ISBN 978-1-4503-9385-0/22/08.
 https://doi.org/10.1145/3534678.3539043
 Peter Staar IBM Research Rueschlikon, Switzerland taa@zurich.ibm.com
 Figure 1: Four examples of complex page layouts across different document categories
 <!-- image -->
--- a/tests/data/groundtruth/docling_v1/2206.01062.pages.json
+++ b/tests/data/groundtruth/docling_v1/2206.01062.pages.json
--- a/tests/data/groundtruth/docling_v1/2305.03393v1.doctags.txt
+++ b/tests/data/groundtruth/docling_v1/2305.03393v1.doctags.txt
@ -1,6 +1,6 @@
 <document>
 <subtitle-level-1><location><page_1><loc_22><loc_82><loc_79><loc_85></location>Optimized Table Tokenization for Table Structure Recognition</subtitle-level-1>
-<paragraph><location><page_1><loc_23><loc_75><loc_78><loc_79></location>Maksym Lysak [0000 - 0002 - 3723 - $^{6960]}$, Ahmed Nassar[0000 - 0002 - 9468 - $^{0822]}$, Nikolaos Livathinos [0000 - 0001 - 8513 - $^{3491]}$, Christoph Auer[0000 - 0001 - 5761 - $^{0422]}$, [0000 - 0002 - 8088 - 0823]</paragraph>
+<paragraph><location><page_1><loc_23><loc_75><loc_78><loc_79></location>Maksym Lysak [0000 − 0002 − 3723 − $^{6960]}$, Ahmed Nassar[0000 − 0002 − 9468 − $^{0822]}$, Nikolaos Livathinos [0000 − 0001 − 8513 − $^{3491]}$, Christoph Auer[0000 − 0001 − 5761 − $^{0422]}$, [0000 − 0002 − 8088 − 0823]</paragraph>
 <paragraph><location><page_1><loc_38><loc_74><loc_49><loc_75></location>and Peter Staar</paragraph>
 <paragraph><location><page_1><loc_46><loc_72><loc_55><loc_73></location>IBM Research</paragraph>
 <paragraph><location><page_1><loc_36><loc_70><loc_64><loc_71></location>{mly,ahn,nli,cau,taa}@zurich.ibm.com</paragraph>
--- a/tests/data/groundtruth/docling_v1/2305.03393v1.json
+++ b/tests/data/groundtruth/docling_v1/2305.03393v1.json
--- a/tests/data/groundtruth/docling_v1/2305.03393v1.md
+++ b/tests/data/groundtruth/docling_v1/2305.03393v1.md
@ -1,6 +1,6 @@
 ## Optimized Table Tokenization for Table Structure Recognition
-Maksym Lysak [0000 - 0002 - 3723 - $^{6960]}$, Ahmed Nassar[0000 - 0002 - 9468 - $^{0822]}$, Nikolaos Livathinos [0000 - 0001 - 8513 - $^{3491]}$, Christoph Auer[0000 - 0001 - 5761 - $^{0422]}$, [0000 - 0002 - 8088 - 0823]
+Maksym Lysak [0000 − 0002 − 3723 − $^{6960]}$, Ahmed Nassar[0000 − 0002 − 9468 − $^{0822]}$, Nikolaos Livathinos [0000 − 0001 − 8513 − $^{3491]}$, Christoph Auer[0000 − 0001 − 5761 − $^{0422]}$, [0000 − 0002 − 8088 − 0823]
 and Peter Staar
--- a/tests/data/groundtruth/docling_v1/2305.03393v1.pages.json
+++ b/tests/data/groundtruth/docling_v1/2305.03393v1.pages.json
--- a/tests/data/groundtruth/docling_v1/redp5110_sampled.doctags.txt
+++ b/tests/data/groundtruth/docling_v1/redp5110_sampled.doctags.txt
@ -5,10 +5,7 @@
 </figure>
 <subtitle-level-1><location><page_1><loc_6><loc_79><loc_96><loc_89></location>Row and Column Access Control Support in IBM DB2 for i</subtitle-level-1>
 <figure>
-<location><page_1><loc_5><loc_11><loc_96><loc_63></location>
+<location><page_1><loc_3><loc_1><loc_96><loc_64></location>
 </figure>
 <figure>
 <location><page_1><loc_52><loc_2><loc_95><loc_10></location>
 </figure>
 <subtitle-level-1><location><page_2><loc_11><loc_88><loc_28><loc_91></location>Contents</subtitle-level-1>
 <table>
@ -105,7 +102,9 @@
 <location><page_5><loc_5><loc_70><loc_39><loc_91></location>
 </figure>
 <paragraph><location><page_5><loc_13><loc_65><loc_19><loc_66></location>Chapter 1.</paragraph>
-<paragraph><location><page_5><loc_82><loc_84><loc_85><loc_88></location>1</paragraph>
+<figure>
 <location><page_5><loc_78><loc_82><loc_89><loc_91></location>
 </figure>
 <subtitle-level-1><location><page_5><loc_22><loc_61><loc_89><loc_68></location>Securing and protecting IBM DB2 data</subtitle-level-1>
 <paragraph><location><page_5><loc_22><loc_46><loc_89><loc_56></location>Recent news headlines are filled with reports of data breaches and cyber-attacks impacting global businesses of all sizes. The Identity Theft Resource Center$^{1}$ reports that almost 5000 data breaches have occurred since 2005, exposing over 600 million records of data. The financial cost of these data breaches is skyrocketing. Studies from the Ponemon Institute$^{2}$ revealed that the average cost of a data breach increased in 2013 by 15% globally and resulted in a brand equity loss of $9.4 million per attack. The average cost that is incurred for each lost record containing sensitive information increased more than 9% to $145 per record.</paragraph>
 <paragraph><location><page_5><loc_22><loc_38><loc_86><loc_44></location>Businesses must make a serious effort to secure their data and recognize that securing information assets is a cost of doing business. In many parts of the world and in many industries, securing the data is required by law and subject to audits. Data security is no longer an option; it is a requirement.</paragraph>
@ -155,17 +154,7 @@
 </table>
 <paragraph><location><page_8><loc_22><loc_40><loc_89><loc_43></location>To discover who has authorization to define and manage RCAC, you can use the query that is shown in Example 2-1.</paragraph>
 <paragraph><location><page_8><loc_22><loc_38><loc_76><loc_39></location>Example 2-1 Query to determine who has authority to define and manage RCAC</paragraph>
-<paragraph><location><page_8><loc_22><loc_35><loc_28><loc_36></location>SELECT</paragraph>
+<table><location><page_8><loc_22><loc_26><loc_89><loc_37></location>SELECT function_id, user_name, usage, user_type FROM function_usage WHERE function_id=’QIBM_DB_SECADM’ ORDER BY user_name;</table>
 <paragraph><location><page_8><loc_30><loc_35><loc_41><loc_36></location>function_id,</paragraph>
 <paragraph><location><page_8><loc_27><loc_34><loc_39><loc_35></location>user_name,</paragraph>
 <paragraph><location><page_8><loc_28><loc_32><loc_36><loc_33></location>usage,</paragraph>
 <paragraph><location><page_8><loc_27><loc_31><loc_39><loc_32></location>user_type</paragraph>
 <paragraph><location><page_8><loc_22><loc_29><loc_26><loc_30></location>FROM</paragraph>
 <paragraph><location><page_8><loc_29><loc_29><loc_43><loc_30></location>function_usage</paragraph>
 <paragraph><location><page_8><loc_22><loc_28><loc_27><loc_29></location>WHERE</paragraph>
 <paragraph><location><page_8><loc_29><loc_28><loc_54><loc_29></location>function_id=’QIBM_DB_SECADM’</paragraph>
 <paragraph><location><page_8><loc_22><loc_26><loc_29><loc_27></location>ORDER BY</paragraph>
 <paragraph><location><page_8><loc_31><loc_26><loc_39><loc_27></location>user_name;</paragraph>
 <subtitle-level-1><location><page_8><loc_11><loc_20><loc_41><loc_22></location>2.2 Separation of duties</subtitle-level-1>
 <paragraph><location><page_8><loc_22><loc_10><loc_89><loc_18></location>Separation of duties helps businesses comply with industry regulations or organizational requirements and simplifies the management of authorities. Separation of duties is commonly used to prevent fraudulent activities or errors by a single person. It provides the ability for administrative functions to be divided across individuals without overlapping responsibilities, so that one user does not possess unlimited authority, such as with the *ALLOBJ authority.</paragraph>
 <paragraph><location><page_9><loc_22><loc_82><loc_89><loc_91></location>For example, assume that a business has assigned the duty to manage security on IBM i to Theresa. Before release IBM i 7.2, to grant privileges, Theresa had to have the same privileges Theresa was granting to others. Therefore, to grant *USE privileges to the PAYROLL table, Theresa had to have *OBJMGT and *USE authority (or a higher level of authority, such as *ALLOBJ). This requirement allowed Theresa to access the data in the PAYROLL table even though Theresa's job description was only to manage its security.</paragraph>
@ -247,7 +236,7 @@
 <paragraph><location><page_12><loc_22><loc_34><loc_66><loc_35></location>- 1. There are user profiles for MGR, JANE, JUDY, and TONY.</paragraph>
 <paragraph><location><page_12><loc_22><loc_32><loc_65><loc_33></location>- 2. The user profile JANE specifies a group profile of MGR.</paragraph>
 <paragraph><location><page_12><loc_22><loc_28><loc_88><loc_31></location>- 3. If a user is connected to the server using user profile JANE, all of the following function invocations return a value of 1:</paragraph>
-<paragraph><location><page_12><loc_25><loc_19><loc_74><loc_27></location>VERIFY_GROUP_FOR_USER (CURRENT_USER, 'MGR') VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JANE', 'MGR') The following function invocation returns a value of 0: VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JUDY', 'TONY') VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JANE', 'MGR', 'STEVE')</paragraph>
+<paragraph><location><page_12><loc_25><loc_19><loc_74><loc_27></location>VERIFY_GROUP_FOR_USER (CURRENT_USER, 'MGR') VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JANE', 'MGR') VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JANE', 'MGR', 'STEVE') The following function invocation returns a value of 0: VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JUDY', 'TONY')</paragraph>
 <paragraph><location><page_13><loc_22><loc_90><loc_27><loc_91></location>RETURN</paragraph>
 <paragraph><location><page_13><loc_22><loc_88><loc_26><loc_89></location>CASE</paragraph>
 <paragraph><location><page_13><loc_22><loc_67><loc_85><loc_88></location>WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'HR', 'EMP' ) = 1 THEN EMPLOYEES . DATE_OF_BIRTH WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'MGR' ) = 1 AND SESSION_USER = EMPLOYEES . USER_ID THEN EMPLOYEES . DATE_OF_BIRTH WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'MGR' ) = 1 AND SESSION_USER <> EMPLOYEES . USER_ID THEN ( 9999 || '-' || MONTH ( EMPLOYEES . DATE_OF_BIRTH ) || '-' || DAY (EMPLOYEES.DATE_OF_BIRTH )) ELSE NULL END ENABLE ;</paragraph>
@ -269,12 +258,7 @@
 <paragraph><location><page_14><loc_22><loc_67><loc_89><loc_71></location>Now that you have created the row permission and the two column masks, RCAC must be activated. The row permission and the two column masks are enabled (last clause in the scripts), but now you must activate RCAC on the table. To do so, complete the following steps:</paragraph>
 <paragraph><location><page_14><loc_22><loc_65><loc_67><loc_66></location>- 1. Run the SQL statements that are shown in Example 3-10.</paragraph>
 <subtitle-level-1><location><page_14><loc_22><loc_62><loc_61><loc_63></location>Example 3-10 Activating RCAC on the EMPLOYEES table</subtitle-level-1>
-<paragraph><location><page_14><loc_22><loc_60><loc_62><loc_61></location>- /* Active Row Access Control (permissions) */</paragraph>
+<paragraph><location><page_14><loc_22><loc_54><loc_62><loc_61></location>- /* Active Row Access Control (permissions) */ /* Active Column Access Control (masks) */ ALTER TABLE HR_SCHEMA.EMPLOYEES ACTIVATE ROW ACCESS CONTROL ACTIVATE COLUMN ACCESS CONTROL;</paragraph>
 <paragraph><location><page_14><loc_22><loc_58><loc_58><loc_60></location>- /* Active Column Access Control (masks)</paragraph>
 <paragraph><location><page_14><loc_60><loc_58><loc_62><loc_60></location>*/</paragraph>
 <paragraph><location><page_14><loc_22><loc_57><loc_48><loc_58></location>ALTER TABLE HR_SCHEMA.EMPLOYEES</paragraph>
 <paragraph><location><page_14><loc_22><loc_55><loc_44><loc_56></location>ACTIVATE ROW ACCESS CONTROL</paragraph>
 <paragraph><location><page_14><loc_22><loc_54><loc_48><loc_55></location>ACTIVATE COLUMN ACCESS CONTROL;</paragraph>
 <paragraph><location><page_14><loc_22><loc_48><loc_88><loc_52></location>- 2. Look at the definition of the EMPLOYEE table, as shown in Figure 3-11. To do this, from the main navigation pane of System i Navigator, click Schemas  HR_SCHEMA  Tables , right-click the EMPLOYEES table, and click Definition .</paragraph>
 <caption><location><page_14><loc_11><loc_17><loc_57><loc_18></location>Figure 3-11 Selecting the EMPLOYEES table from System i Navigator</caption>
 <figure>
--- a/tests/data/groundtruth/docling_v1/redp5110_sampled.json
+++ b/tests/data/groundtruth/docling_v1/redp5110_sampled.json
--- a/tests/data/groundtruth/docling_v1/redp5110_sampled.md
+++ b/tests/data/groundtruth/docling_v1/redp5110_sampled.md
@ -6,9 +6,6 @@ Front cover
 ## Row and Column Access Control Support in IBM DB2 for i
 <!-- image -->
 <!-- image -->
 ## Contents
@ -141,7 +138,8 @@ Hernando Bedoya is a Senior IT Specialist at STG Lab Services and Training in Ro
 Chapter 1.
-1
+
 <!-- image -->
 ## Securing and protecting IBM DB2 data
@ -223,27 +221,7 @@ To discover who has authorization to define and manage RCAC, you can use the que
 Example 2-1 Query to determine who has authority to define and manage RCAC
-SELECT
+SELECT function_id, user_name, usage, user_type FROM function_usage WHERE function_id=’QIBM_DB_SECADM’ ORDER BY user_name;
 function_id,
 user_name,
 usage,
 user_type
 FROM
 function_usage
 WHERE
 function_id=’QIBM_DB_SECADM’
 ORDER BY
 user_name;
 ## 2.2 Separation of duties
@ -350,7 +328,7 @@ Here is an example of using the VERIFY_GROUP_FOR_USER function:
 - 3. If a user is connected to the server using user profile JANE, all of the following function invocations return a value of 1:
-VERIFY_GROUP_FOR_USER (CURRENT_USER, 'MGR') VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JANE', 'MGR') The following function invocation returns a value of 0: VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JUDY', 'TONY') VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JANE', 'MGR', 'STEVE')
+VERIFY_GROUP_FOR_USER (CURRENT_USER, 'MGR') VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JANE', 'MGR') VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JANE', 'MGR', 'STEVE') The following function invocation returns a value of 0: VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JUDY', 'TONY')
 RETURN
@ -387,17 +365,7 @@ Now that you have created the row permission and the two column masks, RCAC must
 ## Example 3-10 Activating RCAC on the EMPLOYEES table
- /* Active Row Access Control (permissions) */
+- /* Active Row Access Control (permissions) */ /* Active Column Access Control (masks) */ ALTER TABLE HR_SCHEMA.EMPLOYEES ACTIVATE ROW ACCESS CONTROL ACTIVATE COLUMN ACCESS CONTROL;
 - /* Active Column Access Control (masks)
 */
 ALTER TABLE HR_SCHEMA.EMPLOYEES
 ACTIVATE ROW ACCESS CONTROL
 ACTIVATE COLUMN ACCESS CONTROL;
 - 2. Look at the definition of the EMPLOYEE table, as shown in Figure 3-11. To do this, from the main navigation pane of System i Navigator, click Schemas  HR_SCHEMA  Tables , right-click the EMPLOYEES table, and click Definition .
--- a/tests/data/groundtruth/docling_v1/redp5110_sampled.pages.json
+++ b/tests/data/groundtruth/docling_v1/redp5110_sampled.pages.json
--- a/tests/data/groundtruth/docling_v2/2203.01017v2.doctags.txt
+++ b/tests/data/groundtruth/docling_v2/2203.01017v2.doctags.txt
@ -156,39 +156,17 @@
 <list_item><location><page_8><loc_9><loc_89><loc_10><loc_90></location>a.</list_item>
 <list_item><location><page_8><loc_11><loc_89><loc_82><loc_90></location>Red - PDF cells, Green - predicted bounding boxes, Blue - post-processed predictions matched to PDF cells</list_item>
 </unordered_list>
 <text><location><page_8><loc_9><loc_87><loc_46><loc_88></location>Japanese language (previously unseen by TableFormer):</text>
 <text><location><page_8><loc_50><loc_87><loc_70><loc_88></location>Example table from FinTabNet:</text>
 <figure>
 <location><page_8><loc_8><loc_76><loc_49><loc_87></location>
 <caption>Japanese language (previously unseen by TableFormer): Example table from FinTabNet:b. Structure predicted by TableFormer, with superimposed matched PDF cell text:</caption>
 </figure>
 <figure>
 <location><page_8><loc_50><loc_77><loc_91><loc_88></location>
 <caption>b. Structure predicted by TableFormer, with superimposed matched PDF cell text:</caption>
 </figure>
 <table>
 <location><page_8><loc_9><loc_63><loc_49><loc_72></location>
-<row_0><col_0><body></col_0><col_1><body></col_1><col_2><col_header>論文ファイル</col_2><col_3><col_header>論文ファイル</col_3><col_4><col_header>参考文献</col_4><col_5><col_header>参考文献</col_5></row_0>
+</figure>
-<row_1><col_0><col_header>出典</col_0><col_1><col_header>ファイル 数</col_1><col_2><col_header>英語</col_2><col_3><col_header>日本語</col_3><col_4><col_header>英語</col_4><col_5><col_header>日本語</col_5></row_1>
+<figure>
 <row_2><col_0><row_header>Association for Computational Linguistics(ACL2003)</col_0><col_1><body>65</col_1><col_2><body>65</col_2><col_3><body>0</col_3><col_4><body>150</col_4><col_5><body>0</col_5></row_2>
 <row_3><col_0><row_header>Computational Linguistics(COLING2002)</col_0><col_1><body>140</col_1><col_2><body>140</col_2><col_3><body>0</col_3><col_4><body>150</col_4><col_5><body>0</col_5></row_3>
 <row_4><col_0><row_header>電気情報通信学会 2003 年総合大会</col_0><col_1><body>150</col_1><col_2><body>8</col_2><col_3><body>142</col_3><col_4><body>223</col_4><col_5><body>147</col_5></row_4>
 <row_5><col_0><row_header>情報処理学会第 65 回全国大会 (2003)</col_0><col_1><body>177</col_1><col_2><body>1</col_2><col_3><body>176</col_3><col_4><body>150</col_4><col_5><body>236</col_5></row_5>
 <row_6><col_0><row_header>第 17 回人工知能学会全国大会 (2003)</col_0><col_1><body>208</col_1><col_2><body>5</col_2><col_3><body>203</col_3><col_4><body>152</col_4><col_5><body>244</col_5></row_6>
 <row_7><col_0><row_header>自然言語処理研究会第 146 〜 155 回</col_0><col_1><body>98</col_1><col_2><body>2</col_2><col_3><body>96</col_3><col_4><body>150</col_4><col_5><body>232</col_5></row_7>
 <row_8><col_0><row_header>WWW から収集した論文</col_0><col_1><body>107</col_1><col_2><body>73</col_2><col_3><body>34</col_3><col_4><body>147</col_4><col_5><body>96</col_5></row_8>
 <row_9><col_0><body></col_0><col_1><body>945</col_1><col_2><body>294</col_2><col_3><body>651</col_3><col_4><body>1122</col_4><col_5><body>955</col_5></row_9>
 </table>
 <table>
 <location><page_8><loc_50><loc_64><loc_90><loc_72></location>
 <caption>Text is aligned to match original for ease of viewing</caption>
-<row_0><col_0><body></col_0><col_1><col_header>Shares (in millions)</col_1><col_2><col_header>Shares (in millions)</col_2><col_3><col_header>Weighted Average Grant Date Fair Value</col_3><col_4><col_header>Weighted Average Grant Date Fair Value</col_4></row_0>
+</figure>
 <row_1><col_0><body></col_0><col_1><col_header>RS U s</col_1><col_2><col_header>PSUs</col_2><col_3><col_header>RSUs</col_3><col_4><col_header>PSUs</col_4></row_1>
 <row_2><col_0><row_header>Nonvested on Janua ry 1</col_0><col_1><body>1. 1</col_1><col_2><body>0.3</col_2><col_3><body>90.10 $</col_3><col_4><body>$ 91.19</col_4></row_2>
 <row_3><col_0><row_header>Granted</col_0><col_1><body>0. 5</col_1><col_2><body>0.1</col_2><col_3><body>117.44</col_3><col_4><body>122.41</col_4></row_3>
 <row_4><col_0><row_header>Vested</col_0><col_1><body>(0. 5 )</col_1><col_2><body>(0.1)</col_2><col_3><body>87.08</col_3><col_4><body>81.14</col_4></row_4>
 <row_5><col_0><row_header>Canceled or forfeited</col_0><col_1><body>(0. 1 )</col_1><col_2><body>-</col_2><col_3><body>102.01</col_3><col_4><body>92.18</col_4></row_5>
 <row_6><col_0><row_header>Nonvested on December 31</col_0><col_1><body>1.0</col_1><col_2><body>0.3</col_2><col_3><body>104.85 $</col_3><col_4><body>$ 104.51</col_4></row_6>
 </table>
 <figure>
 <location><page_8><loc_8><loc_44><loc_35><loc_52></location>
 <caption>Figure 5: One of the benefits of TableFormer is that it is language agnostic, as an example, the left part of the illustration demonstrates TableFormer predictions on previously unseen language (Japanese). Additionally, we see that TableFormer is robust to variability in style and content, right side of the illustration shows the example of the TableFormer prediction from the FinTabNet dataset.</caption>
@ -316,7 +294,7 @@
 <text><location><page_13><loc_8><loc_83><loc_47><loc_86></location>Aditional images with examples of TableFormer predictions and post-processing can be found below.</text>
 <paragraph><location><page_13><loc_10><loc_35><loc_45><loc_37></location>Figure 8: Example of a table with multi-line header.</paragraph>
 <figure>
-<location><page_13><loc_51><loc_63><loc_70><loc_68></location>
+<location><page_13><loc_51><loc_63><loc_91><loc_87></location>
 <caption>Figure 9: Example of a table with big empty distance between cells.</caption>
 </figure>
 <figure>
@ -335,7 +313,10 @@
 <location><page_14><loc_52><loc_55><loc_87><loc_89></location>
 <caption>Figure 13: Table predictions example on colorful table.</caption>
 </figure>
-<paragraph><location><page_14><loc_56><loc_13><loc_83><loc_14></location>Figure 14: Example with multi-line text.</paragraph>
+<figure>
 <location><page_14><loc_52><loc_25><loc_85><loc_31></location>
 <caption>Figure 14: Example with multi-line text.</caption>
 </figure>
 <figure>
 <location><page_15><loc_9><loc_69><loc_46><loc_83></location>
 </figure>
@ -350,6 +331,9 @@
 <caption>Figure 15: Example with triangular table.</caption>
 </figure>
 <figure>
 <location><page_15><loc_53><loc_72><loc_86><loc_85></location>
 </figure>
 <figure>
 <location><page_15><loc_53><loc_41><loc_86><loc_54></location>
 </figure>
 <figure>
--- a/tests/data/groundtruth/docling_v2/2203.01017v2.json
+++ b/tests/data/groundtruth/docling_v2/2203.01017v2.json
--- a/tests/data/groundtruth/docling_v2/2203.01017v2.md
+++ b/tests/data/groundtruth/docling_v2/2203.01017v2.md
@ -223,38 +223,15 @@ Table 4: Results of structure with content retrieved using cell detection on Pub
 - a.
 - Red - PDF cells, Green - predicted bounding boxes, Blue - post-processed predictions matched to PDF cells
-Japanese language (previously unseen by TableFormer):
+Japanese language (previously unseen by TableFormer): Example table from FinTabNet:b. Structure predicted by TableFormer, with superimposed matched PDF cell text:
 Example table from FinTabNet:
 <!-- image -->
 b. Structure predicted by TableFormer, with superimposed matched PDF cell text:
 <!-- image -->
 |                                                    |             | 論文ファイル   | 論文ファイル   | 参考文献   | 参考文献   |
 |----------------------------------------------------|-------------|----------------|----------------|------------|------------|
 | 出典                                               | ファイル 数 | 英語           | 日本語         | 英語       | 日本語     |
 | Association for Computational Linguistics(ACL2003) | 65          | 65             | 0              | 150        | 0          |
 | Computational Linguistics(COLING2002)              | 140         | 140            | 0              | 150        | 0          |
 | 電気情報通信学会 2003 年総合大会                   | 150         | 8              | 142            | 223        | 147        |
 | 情報処理学会第 65 回全国大会 (2003)                | 177         | 1              | 176            | 150        | 236        |
 | 第 17 回人工知能学会全国大会 (2003)                | 208         | 5              | 203            | 152        | 244        |
 | 自然言語処理研究会第 146 〜 155 回                 | 98          | 2              | 96             | 150        | 232        |
 | WWW から収集した論文                               | 107         | 73             | 34             | 147        | 96         |
 |                                                    | 945         | 294            | 651            | 1122       | 955        |
 Text is aligned to match original for ease of viewing
-|                          | Shares (in millions)   | Shares (in millions)   | Weighted Average Grant Date Fair Value   | Weighted Average Grant Date Fair Value   |
+<!-- image -->
 |--------------------------|------------------------|------------------------|------------------------------------------|------------------------------------------|
 |                          | RS U s                 | PSUs                   | RSUs                                     | PSUs                                     |
 | Nonvested on Janua ry 1  | 1. 1                   | 0.3                    | 90.10 $                                  | $ 91.19                                  |
 | Granted                  | 0. 5                   | 0.1                    | 117.44                                   | 122.41                                   |
 | Vested                   | (0. 5 )                | (0.1)                  | 87.08                                    | 81.14                                    |
 | Canceled or forfeited    | (0. 1 )                | -                      | 102.01                                   | 92.18                                    |
 | Nonvested on December 31 | 1.0                    | 0.3                    | 104.85 $                                 | $ 104.51                                 |
 Figure 5: One of the benefits of TableFormer is that it is language agnostic, as an example, the left part of the illustration demonstrates TableFormer predictions on previously unseen language (Japanese). Additionally, we see that TableFormer is robust to variability in style and content, right side of the illustration shows the example of the TableFormer prediction from the FinTabNet dataset.
@ -426,12 +403,16 @@ Figure 14: Example with multi-line text.
 <!-- image -->
 <!-- image -->
 Figure 15: Example with triangular table.
 <!-- image -->
 <!-- image -->
 <!-- image -->
 Figure 16: Example of how post-processing helps to restore mis-aligned bounding boxes prediction artifact.
 <!-- image -->
--- a/tests/data/groundtruth/docling_v2/2203.01017v2.pages.json
+++ b/tests/data/groundtruth/docling_v2/2203.01017v2.pages.json
--- a/tests/data/groundtruth/docling_v2/2206.01062.doctags.txt
+++ b/tests/data/groundtruth/docling_v2/2206.01062.doctags.txt
@ -3,17 +3,16 @@
 <text><location><page_1><loc_15><loc_77><loc_32><loc_83></location>Birgit Pfitzmann IBM Research Rueschlikon, Switzerland bpf@zurich.ibm.com</text>
 <text><location><page_1><loc_42><loc_77><loc_58><loc_83></location>Christoph Auer IBM Research Rueschlikon, Switzerland cau@zurich.ibm.com</text>
 <text><location><page_1><loc_69><loc_77><loc_85><loc_83></location>Michele Dolfi IBM Research Rueschlikon, Switzerland dol@zurich.ibm.com</text>
-<text><location><page_1><loc_28><loc_70><loc_45><loc_76></location>Ahmed S. Nassar IBM Research Rueschlikon, Switzerland ahn@zurich.ibm.com</text>
+<text><location><page_1><loc_28><loc_71><loc_45><loc_76></location>Ahmed S. Nassar IBM Research Rueschlikon, Switzerland</text>
-<text><location><page_1><loc_55><loc_70><loc_72><loc_76></location>Peter Staar IBM Research Rueschlikon, Switzerland taa@zurich.ibm.com</text>
+<text><location><page_1><loc_29><loc_70><loc_44><loc_71></location>ahn@zurich.ibm.com</text>
 <section_header_level_1><location><page_1><loc_9><loc_67><loc_18><loc_69></location>ABSTRACT</section_header_level_1>
 <text><location><page_1><loc_9><loc_33><loc_48><loc_67></location>Accurate document layout analysis is a key requirement for highquality PDF document conversion. With the recent availability of public, large ground-truth datasets such as PubLayNet and DocBank, deep-learning models have proven to be very effective at layout detection and segmentation. While these datasets are of adequate size to train such models, they severely lack in layout variability since they are sourced from scientific article repositories such as PubMed and arXiv only. Consequently, the accuracy of the layout segmentation drops significantly when these models are applied on more challenging and diverse layouts. In this paper, we present DocLayNet , a new, publicly available, document-layout annotation dataset in COCO format. It contains 80863 manually annotated pages from diverse data sources to represent a wide variability in layouts. For each PDF page, the layout annotations provide labelled bounding-boxes with a choice of 11 distinct classes. DocLayNet also provides a subset of double- and triple-annotated pages to determine the inter-annotator agreement. In multiple experiments, we provide baseline accuracy scores (in mAP) for a set of popular object detection models. We also demonstrate that these models fall approximately 10% behind the inter-annotator agreement. Furthermore, we provide evidence that DocLayNet is of sufficient size. Lastly, we compare models trained on PubLayNet, DocBank and DocLayNet, showing that layout predictions of the DocLayNettrained models are more robust and thus the preferred choice for general-purpose document-layout analysis.</text>
 <section_header_level_1><location><page_1><loc_9><loc_29><loc_22><loc_30></location>CCS CONCEPTS</section_header_level_1>
 <text><location><page_1><loc_9><loc_25><loc_49><loc_29></location>· Information systems → Document structure ; · Applied computing → Document analysis ; · Computing methodologies → Machine learning ; Computer vision ; Object detection ;</text>
 <text><location><page_1><loc_9><loc_15><loc_48><loc_20></location>Permission to make digital or hard copies of part or all of this work for personal or classroom use is granted without fee provided that copies are not made or distributed for profit or commercial advantage and that copies bear this notice and the full citation on the first page. Copyrights for third-party components of this work must be honored. For all other uses, contact the owner/author(s).</text>
-<text><location><page_1><loc_9><loc_14><loc_32><loc_15></location>KDD '22, August 14-18, 2022, Washington, DC, USA</text>
+<text><location><page_1><loc_9><loc_12><loc_32><loc_15></location>KDD '22, August 14-18, 2022, Washington, DC, USA © 2022 Copyright held by the owner/author(s). ACM ISBN 978-1-4503-9385-0/22/08.</text>
 <text><location><page_1><loc_9><loc_13><loc_31><loc_14></location>© 2022 Copyright held by the owner/author(s).</text>
 <text><location><page_1><loc_9><loc_12><loc_26><loc_13></location>ACM ISBN 978-1-4503-9385-0/22/08.</text>
 <text><location><page_1><loc_9><loc_11><loc_27><loc_12></location>https://doi.org/10.1145/3534678.3539043</text>
 <text><location><page_1><loc_55><loc_70><loc_72><loc_76></location>Peter Staar IBM Research Rueschlikon, Switzerland taa@zurich.ibm.com</text>
 <figure>
 <location><page_1><loc_53><loc_34><loc_90><loc_68></location>
 <caption>Figure 1: Four examples of complex page layouts across different document categories</caption>
--- a/tests/data/groundtruth/docling_v2/2206.01062.json
+++ b/tests/data/groundtruth/docling_v2/2206.01062.json
--- a/tests/data/groundtruth/docling_v2/2206.01062.md
+++ b/tests/data/groundtruth/docling_v2/2206.01062.md
@ -6,9 +6,9 @@ Christoph Auer IBM Research Rueschlikon, Switzerland cau@zurich.ibm.com
 Michele Dolfi IBM Research Rueschlikon, Switzerland dol@zurich.ibm.com
-Ahmed S. Nassar IBM Research Rueschlikon, Switzerland ahn@zurich.ibm.com
+Ahmed S. Nassar IBM Research Rueschlikon, Switzerland
-Peter Staar IBM Research Rueschlikon, Switzerland taa@zurich.ibm.com
+ahn@zurich.ibm.com
 ## ABSTRACT
@ -20,14 +20,12 @@ Accurate document layout analysis is a key requirement for highquality PDF docum
 Permission to make digital or hard copies of part or all of this work for personal or classroom use is granted without fee provided that copies are not made or distributed for profit or commercial advantage and that copies bear this notice and the full citation on the first page. Copyrights for third-party components of this work must be honored. For all other uses, contact the owner/author(s).
-KDD '22, August 14-18, 2022, Washington, DC, USA
+KDD '22, August 14-18, 2022, Washington, DC, USA © 2022 Copyright held by the owner/author(s). ACM ISBN 978-1-4503-9385-0/22/08.
 © 2022 Copyright held by the owner/author(s).
 ACM ISBN 978-1-4503-9385-0/22/08.
 https://doi.org/10.1145/3534678.3539043
 Peter Staar IBM Research Rueschlikon, Switzerland taa@zurich.ibm.com
 Figure 1: Four examples of complex page layouts across different document categories
 <!-- image -->
--- a/tests/data/groundtruth/docling_v2/2206.01062.pages.json
+++ b/tests/data/groundtruth/docling_v2/2206.01062.pages.json
--- a/tests/data/groundtruth/docling_v2/2305.03393v1.doctags.txt
+++ b/tests/data/groundtruth/docling_v2/2305.03393v1.doctags.txt
@ -1,6 +1,6 @@
 <document>
 <section_header_level_1><location><page_1><loc_22><loc_82><loc_79><loc_85></location>Optimized Table Tokenization for Table Structure Recognition</section_header_level_1>
-<text><location><page_1><loc_23><loc_75><loc_78><loc_79></location>Maksym Lysak [0000 - 0002 - 3723 - $^{6960]}$, Ahmed Nassar[0000 - 0002 - 9468 - $^{0822]}$, Nikolaos Livathinos [0000 - 0001 - 8513 - $^{3491]}$, Christoph Auer[0000 - 0001 - 5761 - $^{0422]}$, [0000 - 0002 - 8088 - 0823]</text>
+<text><location><page_1><loc_23><loc_75><loc_78><loc_79></location>Maksym Lysak [0000 − 0002 − 3723 − $^{6960]}$, Ahmed Nassar[0000 − 0002 − 9468 − $^{0822]}$, Nikolaos Livathinos [0000 − 0001 − 8513 − $^{3491]}$, Christoph Auer[0000 − 0001 − 5761 − $^{0422]}$, [0000 − 0002 − 8088 − 0823]</text>
 <text><location><page_1><loc_38><loc_74><loc_49><loc_75></location>and Peter Staar</text>
 <text><location><page_1><loc_46><loc_72><loc_55><loc_73></location>IBM Research</text>
 <text><location><page_1><loc_36><loc_70><loc_64><loc_71></location>{mly,ahn,nli,cau,taa}@zurich.ibm.com</text>
--- a/tests/data/groundtruth/docling_v2/2305.03393v1.json
+++ b/tests/data/groundtruth/docling_v2/2305.03393v1.json
--- a/tests/data/groundtruth/docling_v2/2305.03393v1.md
+++ b/tests/data/groundtruth/docling_v2/2305.03393v1.md
@ -1,6 +1,6 @@
 ## Optimized Table Tokenization for Table Structure Recognition
-Maksym Lysak [0000 - 0002 - 3723 - $^{6960]}$, Ahmed Nassar[0000 - 0002 - 9468 - $^{0822]}$, Nikolaos Livathinos [0000 - 0001 - 8513 - $^{3491]}$, Christoph Auer[0000 - 0001 - 5761 - $^{0422]}$, [0000 - 0002 - 8088 - 0823]
+Maksym Lysak [0000 − 0002 − 3723 − $^{6960]}$, Ahmed Nassar[0000 − 0002 − 9468 − $^{0822]}$, Nikolaos Livathinos [0000 − 0001 − 8513 − $^{3491]}$, Christoph Auer[0000 − 0001 − 5761 − $^{0422]}$, [0000 − 0002 − 8088 − 0823]
 and Peter Staar
--- a/tests/data/groundtruth/docling_v2/2305.03393v1.pages.json
+++ b/tests/data/groundtruth/docling_v2/2305.03393v1.pages.json
--- a/tests/data/groundtruth/docling_v2/redp5110_sampled.doctags.txt
+++ b/tests/data/groundtruth/docling_v2/redp5110_sampled.doctags.txt
@ -5,10 +5,7 @@
 </figure>
 <section_header_level_1><location><page_1><loc_6><loc_79><loc_96><loc_89></location>Row and Column Access Control Support in IBM DB2 for i</section_header_level_1>
 <figure>
-<location><page_1><loc_5><loc_11><loc_96><loc_63></location>
+<location><page_1><loc_3><loc_1><loc_96><loc_64></location>
 </figure>
 <figure>
 <location><page_1><loc_52><loc_2><loc_95><loc_10></location>
 </figure>
 <section_header_level_1><location><page_2><loc_11><loc_88><loc_28><loc_91></location>Contents</section_header_level_1>
 <table>
@ -109,7 +106,9 @@
 <location><page_5><loc_5><loc_70><loc_39><loc_91></location>
 </figure>
 <text><location><page_5><loc_13><loc_65><loc_19><loc_66></location>Chapter 1.</text>
-<text><location><page_5><loc_82><loc_84><loc_85><loc_88></location>1</text>
+<figure>
 <location><page_5><loc_78><loc_82><loc_89><loc_91></location>
 </figure>
 <section_header_level_1><location><page_5><loc_22><loc_61><loc_89><loc_68></location>Securing and protecting IBM DB2 data</section_header_level_1>
 <text><location><page_5><loc_22><loc_46><loc_89><loc_56></location>Recent news headlines are filled with reports of data breaches and cyber-attacks impacting global businesses of all sizes. The Identity Theft Resource Center$^{1}$ reports that almost 5000 data breaches have occurred since 2005, exposing over 600 million records of data. The financial cost of these data breaches is skyrocketing. Studies from the Ponemon Institute$^{2}$ revealed that the average cost of a data breach increased in 2013 by 15% globally and resulted in a brand equity loss of $9.4 million per attack. The average cost that is incurred for each lost record containing sensitive information increased more than 9% to $145 per record.</text>
 <text><location><page_5><loc_22><loc_38><loc_86><loc_44></location>Businesses must make a serious effort to secure their data and recognize that securing information assets is a cost of doing business. In many parts of the world and in many industries, securing the data is required by law and subject to audits. Data security is no longer an option; it is a requirement.</text>
@ -165,17 +164,7 @@
 </table>
 <text><location><page_8><loc_22><loc_40><loc_89><loc_43></location>To discover who has authorization to define and manage RCAC, you can use the query that is shown in Example 2-1.</text>
 <paragraph><location><page_8><loc_22><loc_38><loc_76><loc_39></location>Example 2-1 Query to determine who has authority to define and manage RCAC</paragraph>
-<text><location><page_8><loc_22><loc_35><loc_28><loc_36></location>SELECT</text>
+<table><location><page_8><loc_22><loc_26><loc_89><loc_37></location>SELECT function_id, user_name, usage, user_type FROM function_usage WHERE function_id=’QIBM_DB_SECADM’ ORDER BY user_name;</table>
 <text><location><page_8><loc_30><loc_35><loc_41><loc_36></location>function_id,</text>
 <text><location><page_8><loc_27><loc_34><loc_39><loc_35></location>user_name,</text>
 <text><location><page_8><loc_28><loc_32><loc_36><loc_33></location>usage,</text>
 <text><location><page_8><loc_27><loc_31><loc_39><loc_32></location>user_type</text>
 <text><location><page_8><loc_22><loc_29><loc_26><loc_30></location>FROM</text>
 <text><location><page_8><loc_29><loc_29><loc_43><loc_30></location>function_usage</text>
 <text><location><page_8><loc_22><loc_28><loc_27><loc_29></location>WHERE</text>
 <text><location><page_8><loc_29><loc_28><loc_54><loc_29></location>function_id=’QIBM_DB_SECADM’</text>
 <text><location><page_8><loc_22><loc_26><loc_29><loc_27></location>ORDER BY</text>
 <text><location><page_8><loc_31><loc_26><loc_39><loc_27></location>user_name;</text>
 <section_header_level_1><location><page_8><loc_11><loc_20><loc_41><loc_22></location>2.2 Separation of duties</section_header_level_1>
 <text><location><page_8><loc_22><loc_10><loc_89><loc_18></location>Separation of duties helps businesses comply with industry regulations or organizational requirements and simplifies the management of authorities. Separation of duties is commonly used to prevent fraudulent activities or errors by a single person. It provides the ability for administrative functions to be divided across individuals without overlapping responsibilities, so that one user does not possess unlimited authority, such as with the *ALLOBJ authority.</text>
 <text><location><page_9><loc_22><loc_82><loc_89><loc_91></location>For example, assume that a business has assigned the duty to manage security on IBM i to Theresa. Before release IBM i 7.2, to grant privileges, Theresa had to have the same privileges Theresa was granting to others. Therefore, to grant *USE privileges to the PAYROLL table, Theresa had to have *OBJMGT and *USE authority (or a higher level of authority, such as *ALLOBJ). This requirement allowed Theresa to access the data in the PAYROLL table even though Theresa's job description was only to manage its security.</text>
@ -255,7 +244,7 @@
 <list_item><location><page_12><loc_22><loc_32><loc_65><loc_33></location>2. The user profile JANE specifies a group profile of MGR.</list_item>
 <list_item><location><page_12><loc_22><loc_28><loc_88><loc_31></location>3. If a user is connected to the server using user profile JANE, all of the following function invocations return a value of 1:</list_item>
 </unordered_list>
-<code><location><page_12><loc_25><loc_19><loc_74><loc_27></location>VERIFY_GROUP_FOR_USER (CURRENT_USER, 'MGR') VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JANE', 'MGR') The following function invocation returns a value of 0: VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JUDY', 'TONY') VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JANE', 'MGR', 'STEVE')</code>
+<code><location><page_12><loc_25><loc_19><loc_74><loc_27></location>VERIFY_GROUP_FOR_USER (CURRENT_USER, 'MGR') VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JANE', 'MGR') VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JANE', 'MGR', 'STEVE') The following function invocation returns a value of 0: VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JUDY', 'TONY')</code>
 <text><location><page_13><loc_22><loc_90><loc_27><loc_91></location>RETURN</text>
 <text><location><page_13><loc_22><loc_88><loc_26><loc_89></location>CASE</text>
 <code><location><page_13><loc_22><loc_67><loc_85><loc_88></location>WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'HR', 'EMP' ) = 1 THEN EMPLOYEES . DATE_OF_BIRTH WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'MGR' ) = 1 AND SESSION_USER = EMPLOYEES . USER_ID THEN EMPLOYEES . DATE_OF_BIRTH WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'MGR' ) = 1 AND SESSION_USER <> EMPLOYEES . USER_ID THEN ( 9999 || '-' || MONTH ( EMPLOYEES . DATE_OF_BIRTH ) || '-' || DAY (EMPLOYEES.DATE_OF_BIRTH )) ELSE NULL END ENABLE ;</code>
@ -283,14 +272,7 @@
 </unordered_list>
 <section_header_level_1><location><page_14><loc_22><loc_62><loc_61><loc_63></location>Example 3-10 Activating RCAC on the EMPLOYEES table</section_header_level_1>
 <unordered_list>
-<list_item><location><page_14><loc_22><loc_60><loc_62><loc_61></location>/* Active Row Access Control (permissions) */</list_item>
+<list_item><location><page_14><loc_22><loc_54><loc_62><loc_61></location>/* Active Row Access Control (permissions) */ /* Active Column Access Control (masks) */ ALTER TABLE HR_SCHEMA.EMPLOYEES ACTIVATE ROW ACCESS CONTROL ACTIVATE COLUMN ACCESS CONTROL;</list_item>
 <list_item><location><page_14><loc_22><loc_58><loc_58><loc_60></location>/* Active Column Access Control (masks)</list_item>
 </unordered_list>
 <text><location><page_14><loc_60><loc_58><loc_62><loc_60></location>*/</text>
 <text><location><page_14><loc_22><loc_57><loc_48><loc_58></location>ALTER TABLE HR_SCHEMA.EMPLOYEES</text>
 <text><location><page_14><loc_22><loc_55><loc_44><loc_56></location>ACTIVATE ROW ACCESS CONTROL</text>
 <text><location><page_14><loc_22><loc_54><loc_48><loc_55></location>ACTIVATE COLUMN ACCESS CONTROL;</text>
 <unordered_list>
 <list_item><location><page_14><loc_22><loc_48><loc_88><loc_52></location>2. Look at the definition of the EMPLOYEE table, as shown in Figure 3-11. To do this, from the main navigation pane of System i Navigator, click Schemas  HR_SCHEMA  Tables , right-click the EMPLOYEES table, and click Definition .</list_item>
 </unordered_list>
 <figure>
--- a/tests/data/groundtruth/docling_v2/redp5110_sampled.json
+++ b/tests/data/groundtruth/docling_v2/redp5110_sampled.json
--- a/tests/data/groundtruth/docling_v2/redp5110_sampled.md
+++ b/tests/data/groundtruth/docling_v2/redp5110_sampled.md
@ -6,8 +6,6 @@ Front cover
 <!-- image -->
 <!-- image -->
 ## Contents
 | Notices                                                                                                                                        | . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vii   |
@ -120,7 +118,7 @@ Hernando Bedoya is a Senior IT Specialist at STG Lab Services and Training in Ro
 Chapter 1.
-1
+<!-- image -->
 ## Securing and protecting IBM DB2 data
@ -198,27 +196,7 @@ To discover who has authorization to define and manage RCAC, you can use the que
 Example 2-1 Query to determine who has authority to define and manage RCAC
-SELECT
+SELECT function\_id, user\_name, usage, user\_type FROM function\_usage WHERE function\_id=’QIBM\_DB\_SECADM’ ORDER BY user\_name;
 function\_id,
 user\_name,
 usage,
 user\_type
 FROM
 function\_usage
 WHERE
 function\_id=’QIBM\_DB\_SECADM’
 ORDER BY
 user\_name;
 ## 2.2 Separation of duties
@ -318,7 +296,7 @@ Here is an example of using the VERIFY\_GROUP\_FOR\_USER function:
 - 3. If a user is connected to the server using user profile JANE, all of the following function invocations return a value of 1:
 ```
-VERIFY\_GROUP\_FOR\_USER (CURRENT\_USER, 'MGR') VERIFY\_GROUP\_FOR\_USER (CURRENT\_USER, 'JANE', 'MGR') The following function invocation returns a value of 0: VERIFY\_GROUP\_FOR\_USER (CURRENT\_USER, 'JUDY', 'TONY') VERIFY\_GROUP\_FOR\_USER (CURRENT\_USER, 'JANE', 'MGR', 'STEVE')
+VERIFY\_GROUP\_FOR\_USER (CURRENT\_USER, 'MGR') VERIFY\_GROUP\_FOR\_USER (CURRENT\_USER, 'JANE', 'MGR') VERIFY\_GROUP\_FOR\_USER (CURRENT\_USER, 'JANE', 'MGR', 'STEVE') The following function invocation returns a value of 0: VERIFY\_GROUP\_FOR\_USER (CURRENT\_USER, 'JUDY', 'TONY')
 ```
 RETURN
@ -356,17 +334,7 @@ Now that you have created the row permission and the two column masks, RCAC must
 ## Example 3-10 Activating RCAC on the EMPLOYEES table
- /* Active Row Access Control (permissions) */
+- /* Active Row Access Control (permissions) */ /* Active Column Access Control (masks) */ ALTER TABLE HR\_SCHEMA.EMPLOYEES ACTIVATE ROW ACCESS CONTROL ACTIVATE COLUMN ACCESS CONTROL;
 - /* Active Column Access Control (masks)
 */
 ALTER TABLE HR\_SCHEMA.EMPLOYEES
 ACTIVATE ROW ACCESS CONTROL
 ACTIVATE COLUMN ACCESS CONTROL;
 - 2. Look at the definition of the EMPLOYEE table, as shown in Figure 3-11. To do this, from the main navigation pane of System i Navigator, click Schemas  HR\_SCHEMA  Tables , right-click the EMPLOYEES table, and click Definition .
 Figure 3-11 Selecting the EMPLOYEES table from System i Navigator
--- a/tests/data/groundtruth/docling_v2/redp5110_sampled.pages.json
+++ b/tests/data/groundtruth/docling_v2/redp5110_sampled.pages.json
--- a/tests/test_e2e_conversion.py
+++ b/tests/test_e2e_conversion.py
@ -8,8 +8,8 @@ from docling.document_converter import DocumentConverter, PdfFormatOption
 from .verify_utils import verify_conversion_result_v1, verify_conversion_result_v2
-GENERATE_V1 = False
+GENERATE_V1 = True
-GENERATE_V2 = False
+GENERATE_V2 = True
 def get_pdf_paths():
--- a/tests/test_e2e_ocr_conversion.py
+++ b/tests/test_e2e_ocr_conversion.py
@ -18,8 +18,8 @@ from docling.document_converter import DocumentConverter, PdfFormatOption
 from .verify_utils import verify_conversion_result_v1, verify_conversion_result_v2
-GENERATE_V1 = False
+GENERATE_V1 = True
-GENERATE_V2 = False
+GENERATE_V2 = True
 def get_pdf_paths():