Futher layout tuning

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer 2024-12-09 16:25:19 +01:00
parent 46ae215b68
commit 30fa21d863
36 changed files with 200 additions and 313 deletions

View File

@ -80,7 +80,7 @@ class LayoutModel(BasePageModel):
DocItemLabel.TITLE: (255, 153, 153), # Light Red (same as Section-Header)
DocItemLabel.FOOTNOTE: (200, 200, 255), # Light Blue
DocItemLabel.DOCUMENT_INDEX: (220, 220, 220), # Light Gray
DocItemLabel.CODE: (255, 223, 186), # Peach
DocItemLabel.CODE: (125, 125, 125), # Gray
DocItemLabel.CHECKBOX_SELECTED: (255, 182, 193), # Pale Green
DocItemLabel.CHECKBOX_UNSELECTED: (255, 182, 193), # Light Pink
DocItemLabel.FORM: (200, 255, 255), # Light Cyan

View File

@ -156,16 +156,16 @@ class LayoutPostprocessor:
SPECIAL_TYPES = WRAPPER_TYPES | {DocItemLabel.PICTURE}
CONFIDENCE_THRESHOLDS = {
DocItemLabel.CAPTION: 0.35,
DocItemLabel.FOOTNOTE: 0.35,
DocItemLabel.FORMULA: 0.35,
DocItemLabel.LIST_ITEM: 0.35,
DocItemLabel.PAGE_FOOTER: 0.35,
DocItemLabel.PAGE_HEADER: 0.35,
DocItemLabel.CAPTION: 0.5,
DocItemLabel.FOOTNOTE: 0.5,
DocItemLabel.FORMULA: 0.5,
DocItemLabel.LIST_ITEM: 0.5,
DocItemLabel.PAGE_FOOTER: 0.5,
DocItemLabel.PAGE_HEADER: 0.5,
DocItemLabel.PICTURE: 0.1,
DocItemLabel.SECTION_HEADER: 0.45,
DocItemLabel.TABLE: 0.35,
DocItemLabel.TEXT: 0.45,
DocItemLabel.TEXT: 0.55, # 0.45,
DocItemLabel.TITLE: 0.45,
DocItemLabel.CODE: 0.45,
DocItemLabel.CHECKBOX_SELECTED: 0.45,
@ -218,6 +218,12 @@ class LayoutPostprocessor:
final_clusters = self._sort_clusters(
self.regular_clusters + self.special_clusters
)
for cluster in final_clusters:
cluster.cells = self._sort_cells(cluster.cells)
# Also sort cells in children if any
for child in cluster.children:
child.cells = self._sort_cells(child.cells)
return final_clusters, self.cells
def _process_regular_clusters(self) -> List[Cluster]:
@ -318,6 +324,76 @@ class LayoutPostprocessor:
return picture_clusters + wrapper_clusters
def _should_prefer_cluster(
self, candidate: Cluster, other: Cluster, params: dict
) -> bool:
"""Determine if candidate cluster should be preferred over other cluster based on rules.
Returns True if candidate should be preferred, False if not."""
# Rule 1: LIST_ITEM vs TEXT
if (
candidate.label == DocItemLabel.LIST_ITEM
and other.label == DocItemLabel.TEXT
):
# Check if areas are similar (within 20% of each other)
area_ratio = candidate.bbox.area() / other.bbox.area()
area_similarity = abs(1 - area_ratio) < 0.2
if area_similarity:
return True
# Rule 2: CODE vs others
if candidate.label == DocItemLabel.CODE:
# Calculate how much of the other cluster is contained within the CODE cluster
overlap = other.bbox.intersection_area_with(candidate.bbox)
containment = overlap / other.bbox.area()
if containment > 0.8: # other is 80% contained within CODE
return True
# If no label-based rules matched, fall back to area/confidence thresholds
area_ratio = candidate.bbox.area() / other.bbox.area()
conf_diff = other.confidence - candidate.confidence
if (
area_ratio <= params["area_threshold"]
and conf_diff > params["conf_threshold"]
):
return False
return True # Default to keeping candidate if no rules triggered rejection
def _select_best_cluster_from_group(
self,
group_clusters: List[Cluster],
params: dict,
) -> Cluster:
"""Select best cluster from a group of overlapping clusters based on all rules."""
current_best = None
for candidate in group_clusters:
should_select = True
for other in group_clusters:
if other == candidate:
continue
if not self._should_prefer_cluster(candidate, other, params):
should_select = False
break
if should_select:
if current_best is None:
current_best = candidate
else:
# If both clusters pass rules, prefer the larger one unless confidence differs significantly
if (
candidate.bbox.area() > current_best.bbox.area()
and current_best.confidence - candidate.confidence
<= params["conf_threshold"]
):
current_best = candidate
return current_best if current_best else group_clusters[0]
def _remove_overlapping_clusters(
self,
clusters: List[Cluster],
@ -360,36 +436,14 @@ class LayoutPostprocessor:
continue
group_clusters = [valid_clusters[cid] for cid in group]
current_best = None
best = self._select_best_cluster_from_group(group_clusters, params)
for candidate in group_clusters:
should_select = True
for other in group_clusters:
if other == candidate:
continue
area_ratio = candidate.bbox.area() / other.bbox.area()
conf_diff = other.confidence - candidate.confidence
if (
area_ratio <= params["area_threshold"]
and conf_diff > params["conf_threshold"]
):
should_select = False
break
if should_select:
if current_best is None or (
candidate.bbox.area() > current_best.bbox.area()
and current_best.confidence - candidate.confidence
<= params["conf_threshold"]
):
current_best = candidate
best = current_best if current_best else group_clusters[0]
# Simple cell merging - no special cases
for cluster in group_clusters:
if cluster != best:
best.cells.extend(cluster.cells)
best.cells = self._sort_cells(best.cells)
result.append(best)
return result
@ -487,6 +541,10 @@ class LayoutPostprocessor:
return clusters
def _sort_cells(self, cells: List[Cell]) -> List[Cell]:
"""Sort cells in native reading order."""
return sorted(cells, key=lambda c: (c.id))
def _sort_clusters(self, clusters: List[Cluster]) -> List[Cluster]:
"""Sort clusters in reading order (top-to-bottom, left-to-right)."""

View File

@ -153,41 +153,20 @@
</table>
<paragraph><location><page_8><loc_9><loc_89><loc_10><loc_90></location>- a.</paragraph>
<paragraph><location><page_8><loc_11><loc_89><loc_82><loc_90></location>- Red - PDF cells, Green - predicted bounding boxes, Blue - post-processed predictions matched to PDF cells</paragraph>
<paragraph><location><page_8><loc_9><loc_87><loc_46><loc_88></location>Japanese language (previously unseen by TableFormer):</paragraph>
<paragraph><location><page_8><loc_50><loc_87><loc_70><loc_88></location>Example table from FinTabNet:</paragraph>
<figure>
<location><page_8><loc_8><loc_76><loc_49><loc_87></location>
</figure>
<caption><location><page_8><loc_9><loc_87><loc_70><loc_88></location>Japanese language (previously unseen by TableFormer): Example table from FinTabNet:</caption>
<caption><location><page_8><loc_9><loc_73><loc_63><loc_74></location>b. Structure predicted by TableFormer, with superimposed matched PDF cell text:</caption>
<figure>
<location><page_8><loc_50><loc_77><loc_91><loc_88></location>
<caption>b. Structure predicted by TableFormer, with superimposed matched PDF cell text:</caption>
<location><page_8><loc_8><loc_76><loc_49><loc_87></location>
<caption>Japanese language (previously unseen by TableFormer): Example table from FinTabNet:b. Structure predicted by TableFormer, with superimposed matched PDF cell text:</caption>
</figure>
<table>
<figure>
<location><page_8><loc_9><loc_63><loc_49><loc_72></location>
<row_0><col_0><body></col_0><col_1><body></col_1><col_2><col_header>論文ファイル</col_2><col_3><col_header>論文ファイル</col_3><col_4><col_header>参考文献</col_4><col_5><col_header>参考文献</col_5></row_0>
<row_1><col_0><col_header>出典</col_0><col_1><col_header>ファイル 数</col_1><col_2><col_header>英語</col_2><col_3><col_header>日本語</col_3><col_4><col_header>英語</col_4><col_5><col_header>日本語</col_5></row_1>
<row_2><col_0><row_header>Association for Computational Linguistics(ACL2003)</col_0><col_1><body>65</col_1><col_2><body>65</col_2><col_3><body>0</col_3><col_4><body>150</col_4><col_5><body>0</col_5></row_2>
<row_3><col_0><row_header>Computational Linguistics(COLING2002)</col_0><col_1><body>140</col_1><col_2><body>140</col_2><col_3><body>0</col_3><col_4><body>150</col_4><col_5><body>0</col_5></row_3>
<row_4><col_0><row_header>電気情報通信学会 2003 年総合大会</col_0><col_1><body>150</col_1><col_2><body>8</col_2><col_3><body>142</col_3><col_4><body>223</col_4><col_5><body>147</col_5></row_4>
<row_5><col_0><row_header>情報処理学会第 65 回全国大会 (2003)</col_0><col_1><body>177</col_1><col_2><body>1</col_2><col_3><body>176</col_3><col_4><body>150</col_4><col_5><body>236</col_5></row_5>
<row_6><col_0><row_header>第 17 回人工知能学会全国大会 (2003)</col_0><col_1><body>208</col_1><col_2><body>5</col_2><col_3><body>203</col_3><col_4><body>152</col_4><col_5><body>244</col_5></row_6>
<row_7><col_0><row_header>自然言語処理研究会第 146 〜 155 回</col_0><col_1><body>98</col_1><col_2><body>2</col_2><col_3><body>96</col_3><col_4><body>150</col_4><col_5><body>232</col_5></row_7>
<row_8><col_0><row_header>WWW から収集した論文</col_0><col_1><body>107</col_1><col_2><body>73</col_2><col_3><body>34</col_3><col_4><body>147</col_4><col_5><body>96</col_5></row_8>
<row_9><col_0><body></col_0><col_1><body>945</col_1><col_2><body>294</col_2><col_3><body>651</col_3><col_4><body>1122</col_4><col_5><body>955</col_5></row_9>
</table>
</figure>
<caption><location><page_8><loc_62><loc_62><loc_90><loc_63></location>Text is aligned to match original for ease of viewing</caption>
<table>
<figure>
<location><page_8><loc_50><loc_64><loc_90><loc_72></location>
<caption>Text is aligned to match original for ease of viewing</caption>
<row_0><col_0><body></col_0><col_1><col_header>Shares (in millions)</col_1><col_2><col_header>Shares (in millions)</col_2><col_3><col_header>Weighted Average Grant Date Fair Value</col_3><col_4><col_header>Weighted Average Grant Date Fair Value</col_4></row_0>
<row_1><col_0><body></col_0><col_1><col_header>RS U s</col_1><col_2><col_header>PSUs</col_2><col_3><col_header>RSUs</col_3><col_4><col_header>PSUs</col_4></row_1>
<row_2><col_0><row_header>Nonvested on Janua ry 1</col_0><col_1><body>1. 1</col_1><col_2><body>0.3</col_2><col_3><body>90.10 $</col_3><col_4><body>$ 91.19</col_4></row_2>
<row_3><col_0><row_header>Granted</col_0><col_1><body>0. 5</col_1><col_2><body>0.1</col_2><col_3><body>117.44</col_3><col_4><body>122.41</col_4></row_3>
<row_4><col_0><row_header>Vested</col_0><col_1><body>(0. 5 )</col_1><col_2><body>(0.1)</col_2><col_3><body>87.08</col_3><col_4><body>81.14</col_4></row_4>
<row_5><col_0><row_header>Canceled or forfeited</col_0><col_1><body>(0. 1 )</col_1><col_2><body>-</col_2><col_3><body>102.01</col_3><col_4><body>92.18</col_4></row_5>
<row_6><col_0><row_header>Nonvested on December 31</col_0><col_1><body>1.0</col_1><col_2><body>0.3</col_2><col_3><body>104.85 $</col_3><col_4><body>$ 104.51</col_4></row_6>
</table>
</figure>
<caption><location><page_8><loc_8><loc_54><loc_89><loc_59></location>Figure 5: One of the benefits of TableFormer is that it is language agnostic, as an example, the left part of the illustration demonstrates TableFormer predictions on previously unseen language (Japanese). Additionally, we see that TableFormer is robust to variability in style and content, right side of the illustration shows the example of the TableFormer prediction from the FinTabNet dataset.</caption>
<figure>
<location><page_8><loc_8><loc_44><loc_35><loc_52></location>
@ -296,7 +275,7 @@
<paragraph><location><page_13><loc_10><loc_35><loc_45><loc_37></location>Figure 8: Example of a table with multi-line header.</paragraph>
<caption><location><page_13><loc_50><loc_59><loc_89><loc_61></location>Figure 9: Example of a table with big empty distance between cells.</caption>
<figure>
<location><page_13><loc_51><loc_63><loc_70><loc_68></location>
<location><page_13><loc_51><loc_63><loc_91><loc_87></location>
<caption>Figure 9: Example of a table with big empty distance between cells.</caption>
</figure>
<caption><location><page_13><loc_51><loc_13><loc_89><loc_14></location>Figure 10: Example of a complex table with empty cells.</caption>
@ -319,7 +298,11 @@
<location><page_14><loc_52><loc_55><loc_87><loc_89></location>
<caption>Figure 13: Table predictions example on colorful table.</caption>
</figure>
<paragraph><location><page_14><loc_56><loc_13><loc_83><loc_14></location>Figure 14: Example with multi-line text.</paragraph>
<caption><location><page_14><loc_56><loc_13><loc_83><loc_14></location>Figure 14: Example with multi-line text.</caption>
<figure>
<location><page_14><loc_52><loc_25><loc_85><loc_31></location>
<caption>Figure 14: Example with multi-line text.</caption>
</figure>
<figure>
<location><page_15><loc_9><loc_69><loc_46><loc_83></location>
</figure>
@ -335,6 +318,9 @@
<caption>Figure 15: Example with triangular table.</caption>
</figure>
<figure>
<location><page_15><loc_53><loc_72><loc_86><loc_85></location>
</figure>
<figure>
<location><page_15><loc_53><loc_41><loc_86><loc_54></location>
</figure>
<caption><location><page_15><loc_50><loc_15><loc_89><loc_18></location>Figure 16: Example of how post-processing helps to restore mis-aligned bounding boxes prediction artifact.</caption>

File diff suppressed because one or more lines are too long

View File

@ -219,40 +219,18 @@ Table 4: Results of structure with content retrieved using cell detection on Pub
- Red - PDF cells, Green - predicted bounding boxes, Blue - post-processed predictions matched to PDF cells
Japanese language (previously unseen by TableFormer):
Example table from FinTabNet:
<!-- image -->
Japanese language (previously unseen by TableFormer): Example table from FinTabNet:
b. Structure predicted by TableFormer, with superimposed matched PDF cell text:
Japanese language (previously unseen by TableFormer): Example table from FinTabNet:b. Structure predicted by TableFormer, with superimposed matched PDF cell text:
<!-- image -->
| | | 論文ファイル | 論文ファイル | 参考文献 | 参考文献 |
|----------------------------------------------------|-------------|----------------|----------------|------------|------------|
| 出典 | ファイル 数 | 英語 | 日本語 | 英語 | 日本語 |
| Association for Computational Linguistics(ACL2003) | 65 | 65 | 0 | 150 | 0 |
| Computational Linguistics(COLING2002) | 140 | 140 | 0 | 150 | 0 |
| 電気情報通信学会 2003 年総合大会 | 150 | 8 | 142 | 223 | 147 |
| 情報処理学会第 65 回全国大会 (2003) | 177 | 1 | 176 | 150 | 236 |
| 第 17 回人工知能学会全国大会 (2003) | 208 | 5 | 203 | 152 | 244 |
| 自然言語処理研究会第 146 〜 155 回 | 98 | 2 | 96 | 150 | 232 |
| WWW から収集した論文 | 107 | 73 | 34 | 147 | 96 |
| | 945 | 294 | 651 | 1122 | 955 |
<!-- image -->
Text is aligned to match original for ease of viewing
| | Shares (in millions) | Shares (in millions) | Weighted Average Grant Date Fair Value | Weighted Average Grant Date Fair Value |
|--------------------------|------------------------|------------------------|------------------------------------------|------------------------------------------|
| | RS U s | PSUs | RSUs | PSUs |
| Nonvested on Janua ry 1 | 1. 1 | 0.3 | 90.10 $ | $ 91.19 |
| Granted | 0. 5 | 0.1 | 117.44 | 122.41 |
| Vested | (0. 5 ) | (0.1) | 87.08 | 81.14 |
| Canceled or forfeited | (0. 1 ) | - | 102.01 | 92.18 |
| Nonvested on December 31 | 1.0 | 0.3 | 104.85 $ | $ 104.51 |
<!-- image -->
Figure 5: One of the benefits of TableFormer is that it is language agnostic, as an example, the left part of the illustration demonstrates TableFormer predictions on previously unseen language (Japanese). Additionally, we see that TableFormer is robust to variability in style and content, right side of the illustration shows the example of the TableFormer prediction from the FinTabNet dataset.
<!-- image -->
@ -458,6 +436,7 @@ Figure 13: Table predictions example on colorful table.
<!-- image -->
Figure 14: Example with multi-line text.
<!-- image -->
<!-- image -->
@ -472,6 +451,9 @@ Figure 15: Example with triangular table.
<!-- image -->
<!-- image -->
<!-- image -->
Figure 16: Example of how post-processing helps to restore mis-aligned bounding boxes prediction artifact.

File diff suppressed because one or more lines are too long

View File

@ -3,17 +3,16 @@
<paragraph><location><page_1><loc_15><loc_77><loc_32><loc_83></location>Birgit Pfitzmann IBM Research Rueschlikon, Switzerland bpf@zurich.ibm.com</paragraph>
<paragraph><location><page_1><loc_42><loc_77><loc_58><loc_83></location>Christoph Auer IBM Research Rueschlikon, Switzerland cau@zurich.ibm.com</paragraph>
<paragraph><location><page_1><loc_69><loc_77><loc_85><loc_83></location>Michele Dolfi IBM Research Rueschlikon, Switzerland dol@zurich.ibm.com</paragraph>
<paragraph><location><page_1><loc_28><loc_70><loc_45><loc_76></location>Ahmed S. Nassar IBM Research Rueschlikon, Switzerland ahn@zurich.ibm.com</paragraph>
<paragraph><location><page_1><loc_55><loc_70><loc_72><loc_76></location>Peter Staar IBM Research Rueschlikon, Switzerland taa@zurich.ibm.com</paragraph>
<paragraph><location><page_1><loc_28><loc_71><loc_45><loc_76></location>Ahmed S. Nassar IBM Research Rueschlikon, Switzerland</paragraph>
<paragraph><location><page_1><loc_29><loc_70><loc_44><loc_71></location>ahn@zurich.ibm.com</paragraph>
<subtitle-level-1><location><page_1><loc_9><loc_67><loc_18><loc_69></location>ABSTRACT</subtitle-level-1>
<paragraph><location><page_1><loc_9><loc_33><loc_48><loc_67></location>Accurate document layout analysis is a key requirement for highquality PDF document conversion. With the recent availability of public, large ground-truth datasets such as PubLayNet and DocBank, deep-learning models have proven to be very effective at layout detection and segmentation. While these datasets are of adequate size to train such models, they severely lack in layout variability since they are sourced from scientific article repositories such as PubMed and arXiv only. Consequently, the accuracy of the layout segmentation drops significantly when these models are applied on more challenging and diverse layouts. In this paper, we present DocLayNet , a new, publicly available, document-layout annotation dataset in COCO format. It contains 80863 manually annotated pages from diverse data sources to represent a wide variability in layouts. For each PDF page, the layout annotations provide labelled bounding-boxes with a choice of 11 distinct classes. DocLayNet also provides a subset of double- and triple-annotated pages to determine the inter-annotator agreement. In multiple experiments, we provide baseline accuracy scores (in mAP) for a set of popular object detection models. We also demonstrate that these models fall approximately 10% behind the inter-annotator agreement. Furthermore, we provide evidence that DocLayNet is of sufficient size. Lastly, we compare models trained on PubLayNet, DocBank and DocLayNet, showing that layout predictions of the DocLayNettrained models are more robust and thus the preferred choice for general-purpose document-layout analysis.</paragraph>
<subtitle-level-1><location><page_1><loc_9><loc_29><loc_22><loc_30></location>CCS CONCEPTS</subtitle-level-1>
<paragraph><location><page_1><loc_9><loc_25><loc_49><loc_29></location>· Information systems → Document structure ; · Applied computing → Document analysis ; · Computing methodologies → Machine learning ; Computer vision ; Object detection ;</paragraph>
<paragraph><location><page_1><loc_9><loc_15><loc_48><loc_20></location>Permission to make digital or hard copies of part or all of this work for personal or classroom use is granted without fee provided that copies are not made or distributed for profit or commercial advantage and that copies bear this notice and the full citation on the first page. Copyrights for third-party components of this work must be honored. For all other uses, contact the owner/author(s).</paragraph>
<paragraph><location><page_1><loc_9><loc_14><loc_32><loc_15></location>KDD '22, August 14-18, 2022, Washington, DC, USA</paragraph>
<paragraph><location><page_1><loc_9><loc_13><loc_31><loc_14></location>© 2022 Copyright held by the owner/author(s).</paragraph>
<paragraph><location><page_1><loc_9><loc_12><loc_26><loc_13></location>ACM ISBN 978-1-4503-9385-0/22/08.</paragraph>
<paragraph><location><page_1><loc_9><loc_12><loc_32><loc_15></location>KDD '22, August 14-18, 2022, Washington, DC, USA © 2022 Copyright held by the owner/author(s). ACM ISBN 978-1-4503-9385-0/22/08.</paragraph>
<paragraph><location><page_1><loc_9><loc_11><loc_27><loc_12></location>https://doi.org/10.1145/3534678.3539043</paragraph>
<paragraph><location><page_1><loc_55><loc_70><loc_72><loc_76></location>Peter Staar IBM Research Rueschlikon, Switzerland taa@zurich.ibm.com</paragraph>
<caption><location><page_1><loc_52><loc_29><loc_91><loc_32></location>Figure 1: Four examples of complex page layouts across different document categories</caption>
<figure>
<location><page_1><loc_53><loc_34><loc_90><loc_68></location>

File diff suppressed because one or more lines are too long

View File

@ -6,9 +6,9 @@ Christoph Auer IBM Research Rueschlikon, Switzerland cau@zurich.ibm.com
Michele Dolfi IBM Research Rueschlikon, Switzerland dol@zurich.ibm.com
Ahmed S. Nassar IBM Research Rueschlikon, Switzerland ahn@zurich.ibm.com
Ahmed S. Nassar IBM Research Rueschlikon, Switzerland
Peter Staar IBM Research Rueschlikon, Switzerland taa@zurich.ibm.com
ahn@zurich.ibm.com
## ABSTRACT
@ -20,14 +20,12 @@ Accurate document layout analysis is a key requirement for highquality PDF docum
Permission to make digital or hard copies of part or all of this work for personal or classroom use is granted without fee provided that copies are not made or distributed for profit or commercial advantage and that copies bear this notice and the full citation on the first page. Copyrights for third-party components of this work must be honored. For all other uses, contact the owner/author(s).
KDD '22, August 14-18, 2022, Washington, DC, USA
© 2022 Copyright held by the owner/author(s).
ACM ISBN 978-1-4503-9385-0/22/08.
KDD '22, August 14-18, 2022, Washington, DC, USA © 2022 Copyright held by the owner/author(s). ACM ISBN 978-1-4503-9385-0/22/08.
https://doi.org/10.1145/3534678.3539043
Peter Staar IBM Research Rueschlikon, Switzerland taa@zurich.ibm.com
Figure 1: Four examples of complex page layouts across different document categories
<!-- image -->

File diff suppressed because one or more lines are too long

View File

@ -1,6 +1,6 @@
<document>
<subtitle-level-1><location><page_1><loc_22><loc_82><loc_79><loc_85></location>Optimized Table Tokenization for Table Structure Recognition</subtitle-level-1>
<paragraph><location><page_1><loc_23><loc_75><loc_78><loc_79></location>Maksym Lysak [0000 - 0002 - 3723 - $^{6960]}$, Ahmed Nassar[0000 - 0002 - 9468 - $^{0822]}$, Nikolaos Livathinos [0000 - 0001 - 8513 - $^{3491]}$, Christoph Auer[0000 - 0001 - 5761 - $^{0422]}$, [0000 - 0002 - 8088 - 0823]</paragraph>
<paragraph><location><page_1><loc_23><loc_75><loc_78><loc_79></location>Maksym Lysak [0000 0002 3723 $^{6960]}$, Ahmed Nassar[0000 0002 9468 $^{0822]}$, Nikolaos Livathinos [0000 0001 8513 $^{3491]}$, Christoph Auer[0000 0001 5761 $^{0422]}$, [0000 0002 8088 0823]</paragraph>
<paragraph><location><page_1><loc_38><loc_74><loc_49><loc_75></location>and Peter Staar</paragraph>
<paragraph><location><page_1><loc_46><loc_72><loc_55><loc_73></location>IBM Research</paragraph>
<paragraph><location><page_1><loc_36><loc_70><loc_64><loc_71></location>{mly,ahn,nli,cau,taa}@zurich.ibm.com</paragraph>

File diff suppressed because one or more lines are too long

View File

@ -1,6 +1,6 @@
## Optimized Table Tokenization for Table Structure Recognition
Maksym Lysak [0000 - 0002 - 3723 - $^{6960]}$, Ahmed Nassar[0000 - 0002 - 9468 - $^{0822]}$, Nikolaos Livathinos [0000 - 0001 - 8513 - $^{3491]}$, Christoph Auer[0000 - 0001 - 5761 - $^{0422]}$, [0000 - 0002 - 8088 - 0823]
Maksym Lysak [0000 0002 3723 $^{6960]}$, Ahmed Nassar[0000 0002 9468 $^{0822]}$, Nikolaos Livathinos [0000 0001 8513 $^{3491]}$, Christoph Auer[0000 0001 5761 $^{0422]}$, [0000 0002 8088 0823]
and Peter Staar

File diff suppressed because one or more lines are too long

View File

@ -5,10 +5,7 @@
</figure>
<subtitle-level-1><location><page_1><loc_6><loc_79><loc_96><loc_89></location>Row and Column Access Control Support in IBM DB2 for i</subtitle-level-1>
<figure>
<location><page_1><loc_5><loc_11><loc_96><loc_63></location>
</figure>
<figure>
<location><page_1><loc_52><loc_2><loc_95><loc_10></location>
<location><page_1><loc_3><loc_1><loc_96><loc_64></location>
</figure>
<subtitle-level-1><location><page_2><loc_11><loc_88><loc_28><loc_91></location>Contents</subtitle-level-1>
<table>
@ -105,7 +102,9 @@
<location><page_5><loc_5><loc_70><loc_39><loc_91></location>
</figure>
<paragraph><location><page_5><loc_13><loc_65><loc_19><loc_66></location>Chapter 1.</paragraph>
<paragraph><location><page_5><loc_82><loc_84><loc_85><loc_88></location>1</paragraph>
<figure>
<location><page_5><loc_78><loc_82><loc_89><loc_91></location>
</figure>
<subtitle-level-1><location><page_5><loc_22><loc_61><loc_89><loc_68></location>Securing and protecting IBM DB2 data</subtitle-level-1>
<paragraph><location><page_5><loc_22><loc_46><loc_89><loc_56></location>Recent news headlines are filled with reports of data breaches and cyber-attacks impacting global businesses of all sizes. The Identity Theft Resource Center$^{1}$ reports that almost 5000 data breaches have occurred since 2005, exposing over 600 million records of data. The financial cost of these data breaches is skyrocketing. Studies from the Ponemon Institute$^{2}$ revealed that the average cost of a data breach increased in 2013 by 15% globally and resulted in a brand equity loss of $9.4 million per attack. The average cost that is incurred for each lost record containing sensitive information increased more than 9% to $145 per record.</paragraph>
<paragraph><location><page_5><loc_22><loc_38><loc_86><loc_44></location>Businesses must make a serious effort to secure their data and recognize that securing information assets is a cost of doing business. In many parts of the world and in many industries, securing the data is required by law and subject to audits. Data security is no longer an option; it is a requirement.</paragraph>
@ -155,17 +154,7 @@
</table>
<paragraph><location><page_8><loc_22><loc_40><loc_89><loc_43></location>To discover who has authorization to define and manage RCAC, you can use the query that is shown in Example 2-1.</paragraph>
<paragraph><location><page_8><loc_22><loc_38><loc_76><loc_39></location>Example 2-1 Query to determine who has authority to define and manage RCAC</paragraph>
<paragraph><location><page_8><loc_22><loc_35><loc_28><loc_36></location>SELECT</paragraph>
<paragraph><location><page_8><loc_30><loc_35><loc_41><loc_36></location>function_id,</paragraph>
<paragraph><location><page_8><loc_27><loc_34><loc_39><loc_35></location>user_name,</paragraph>
<paragraph><location><page_8><loc_28><loc_32><loc_36><loc_33></location>usage,</paragraph>
<paragraph><location><page_8><loc_27><loc_31><loc_39><loc_32></location>user_type</paragraph>
<paragraph><location><page_8><loc_22><loc_29><loc_26><loc_30></location>FROM</paragraph>
<paragraph><location><page_8><loc_29><loc_29><loc_43><loc_30></location>function_usage</paragraph>
<paragraph><location><page_8><loc_22><loc_28><loc_27><loc_29></location>WHERE</paragraph>
<paragraph><location><page_8><loc_29><loc_28><loc_54><loc_29></location>function_id=QIBM_DB_SECADM</paragraph>
<paragraph><location><page_8><loc_22><loc_26><loc_29><loc_27></location>ORDER BY</paragraph>
<paragraph><location><page_8><loc_31><loc_26><loc_39><loc_27></location>user_name;</paragraph>
<table><location><page_8><loc_22><loc_26><loc_89><loc_37></location>SELECT function_id, user_name, usage, user_type FROM function_usage WHERE function_id=QIBM_DB_SECADM ORDER BY user_name;</table>
<subtitle-level-1><location><page_8><loc_11><loc_20><loc_41><loc_22></location>2.2 Separation of duties</subtitle-level-1>
<paragraph><location><page_8><loc_22><loc_10><loc_89><loc_18></location>Separation of duties helps businesses comply with industry regulations or organizational requirements and simplifies the management of authorities. Separation of duties is commonly used to prevent fraudulent activities or errors by a single person. It provides the ability for administrative functions to be divided across individuals without overlapping responsibilities, so that one user does not possess unlimited authority, such as with the *ALLOBJ authority.</paragraph>
<paragraph><location><page_9><loc_22><loc_82><loc_89><loc_91></location>For example, assume that a business has assigned the duty to manage security on IBM i to Theresa. Before release IBM i 7.2, to grant privileges, Theresa had to have the same privileges Theresa was granting to others. Therefore, to grant *USE privileges to the PAYROLL table, Theresa had to have *OBJMGT and *USE authority (or a higher level of authority, such as *ALLOBJ). This requirement allowed Theresa to access the data in the PAYROLL table even though Theresa's job description was only to manage its security.</paragraph>
@ -247,7 +236,7 @@
<paragraph><location><page_12><loc_22><loc_34><loc_66><loc_35></location>- 1. There are user profiles for MGR, JANE, JUDY, and TONY.</paragraph>
<paragraph><location><page_12><loc_22><loc_32><loc_65><loc_33></location>- 2. The user profile JANE specifies a group profile of MGR.</paragraph>
<paragraph><location><page_12><loc_22><loc_28><loc_88><loc_31></location>- 3. If a user is connected to the server using user profile JANE, all of the following function invocations return a value of 1:</paragraph>
<paragraph><location><page_12><loc_25><loc_19><loc_74><loc_27></location>VERIFY_GROUP_FOR_USER (CURRENT_USER, 'MGR') VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JANE', 'MGR') The following function invocation returns a value of 0: VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JUDY', 'TONY') VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JANE', 'MGR', 'STEVE')</paragraph>
<paragraph><location><page_12><loc_25><loc_19><loc_74><loc_27></location>VERIFY_GROUP_FOR_USER (CURRENT_USER, 'MGR') VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JANE', 'MGR') VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JANE', 'MGR', 'STEVE') The following function invocation returns a value of 0: VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JUDY', 'TONY')</paragraph>
<paragraph><location><page_13><loc_22><loc_90><loc_27><loc_91></location>RETURN</paragraph>
<paragraph><location><page_13><loc_22><loc_88><loc_26><loc_89></location>CASE</paragraph>
<paragraph><location><page_13><loc_22><loc_67><loc_85><loc_88></location>WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'HR', 'EMP' ) = 1 THEN EMPLOYEES . DATE_OF_BIRTH WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'MGR' ) = 1 AND SESSION_USER = EMPLOYEES . USER_ID THEN EMPLOYEES . DATE_OF_BIRTH WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'MGR' ) = 1 AND SESSION_USER <> EMPLOYEES . USER_ID THEN ( 9999 || '-' || MONTH ( EMPLOYEES . DATE_OF_BIRTH ) || '-' || DAY (EMPLOYEES.DATE_OF_BIRTH )) ELSE NULL END ENABLE ;</paragraph>
@ -269,12 +258,7 @@
<paragraph><location><page_14><loc_22><loc_67><loc_89><loc_71></location>Now that you have created the row permission and the two column masks, RCAC must be activated. The row permission and the two column masks are enabled (last clause in the scripts), but now you must activate RCAC on the table. To do so, complete the following steps:</paragraph>
<paragraph><location><page_14><loc_22><loc_65><loc_67><loc_66></location>- 1. Run the SQL statements that are shown in Example 3-10.</paragraph>
<subtitle-level-1><location><page_14><loc_22><loc_62><loc_61><loc_63></location>Example 3-10 Activating RCAC on the EMPLOYEES table</subtitle-level-1>
<paragraph><location><page_14><loc_22><loc_60><loc_62><loc_61></location>- /* Active Row Access Control (permissions) */</paragraph>
<paragraph><location><page_14><loc_22><loc_58><loc_58><loc_60></location>- /* Active Column Access Control (masks)</paragraph>
<paragraph><location><page_14><loc_60><loc_58><loc_62><loc_60></location>*/</paragraph>
<paragraph><location><page_14><loc_22><loc_57><loc_48><loc_58></location>ALTER TABLE HR_SCHEMA.EMPLOYEES</paragraph>
<paragraph><location><page_14><loc_22><loc_55><loc_44><loc_56></location>ACTIVATE ROW ACCESS CONTROL</paragraph>
<paragraph><location><page_14><loc_22><loc_54><loc_48><loc_55></location>ACTIVATE COLUMN ACCESS CONTROL;</paragraph>
<paragraph><location><page_14><loc_22><loc_54><loc_62><loc_61></location>- /* Active Row Access Control (permissions) */ /* Active Column Access Control (masks) */ ALTER TABLE HR_SCHEMA.EMPLOYEES ACTIVATE ROW ACCESS CONTROL ACTIVATE COLUMN ACCESS CONTROL;</paragraph>
<paragraph><location><page_14><loc_22><loc_48><loc_88><loc_52></location>- 2. Look at the definition of the EMPLOYEE table, as shown in Figure 3-11. To do this, from the main navigation pane of System i Navigator, click Schemas  HR_SCHEMA  Tables , right-click the EMPLOYEES table, and click Definition .</paragraph>
<caption><location><page_14><loc_11><loc_17><loc_57><loc_18></location>Figure 3-11 Selecting the EMPLOYEES table from System i Navigator</caption>
<figure>

File diff suppressed because one or more lines are too long

View File

@ -6,9 +6,6 @@ Front cover
## Row and Column Access Control Support in IBM DB2 for i
<!-- image -->
<!-- image -->
## Contents
@ -141,7 +138,8 @@ Hernando Bedoya is a Senior IT Specialist at STG Lab Services and Training in Ro
Chapter 1.
1
<!-- image -->
## Securing and protecting IBM DB2 data
@ -223,27 +221,7 @@ To discover who has authorization to define and manage RCAC, you can use the que
Example 2-1 Query to determine who has authority to define and manage RCAC
SELECT
function_id,
user_name,
usage,
user_type
FROM
function_usage
WHERE
function_id=QIBM_DB_SECADM
ORDER BY
user_name;
SELECT function_id, user_name, usage, user_type FROM function_usage WHERE function_id=QIBM_DB_SECADM ORDER BY user_name;
## 2.2 Separation of duties
@ -350,7 +328,7 @@ Here is an example of using the VERIFY_GROUP_FOR_USER function:
- 3. If a user is connected to the server using user profile JANE, all of the following function invocations return a value of 1:
VERIFY_GROUP_FOR_USER (CURRENT_USER, 'MGR') VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JANE', 'MGR') The following function invocation returns a value of 0: VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JUDY', 'TONY') VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JANE', 'MGR', 'STEVE')
VERIFY_GROUP_FOR_USER (CURRENT_USER, 'MGR') VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JANE', 'MGR') VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JANE', 'MGR', 'STEVE') The following function invocation returns a value of 0: VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JUDY', 'TONY')
RETURN
@ -387,17 +365,7 @@ Now that you have created the row permission and the two column masks, RCAC must
## Example 3-10 Activating RCAC on the EMPLOYEES table
- /* Active Row Access Control (permissions) */
- /* Active Column Access Control (masks)
*/
ALTER TABLE HR_SCHEMA.EMPLOYEES
ACTIVATE ROW ACCESS CONTROL
ACTIVATE COLUMN ACCESS CONTROL;
- /* Active Row Access Control (permissions) */ /* Active Column Access Control (masks) */ ALTER TABLE HR_SCHEMA.EMPLOYEES ACTIVATE ROW ACCESS CONTROL ACTIVATE COLUMN ACCESS CONTROL;
- 2. Look at the definition of the EMPLOYEE table, as shown in Figure 3-11. To do this, from the main navigation pane of System i Navigator, click Schemas  HR_SCHEMA  Tables , right-click the EMPLOYEES table, and click Definition .

File diff suppressed because one or more lines are too long

View File

@ -156,39 +156,17 @@
<list_item><location><page_8><loc_9><loc_89><loc_10><loc_90></location>a.</list_item>
<list_item><location><page_8><loc_11><loc_89><loc_82><loc_90></location>Red - PDF cells, Green - predicted bounding boxes, Blue - post-processed predictions matched to PDF cells</list_item>
</unordered_list>
<text><location><page_8><loc_9><loc_87><loc_46><loc_88></location>Japanese language (previously unseen by TableFormer):</text>
<text><location><page_8><loc_50><loc_87><loc_70><loc_88></location>Example table from FinTabNet:</text>
<figure>
<location><page_8><loc_8><loc_76><loc_49><loc_87></location>
<caption>Japanese language (previously unseen by TableFormer): Example table from FinTabNet:b. Structure predicted by TableFormer, with superimposed matched PDF cell text:</caption>
</figure>
<figure>
<location><page_8><loc_50><loc_77><loc_91><loc_88></location>
<caption>b. Structure predicted by TableFormer, with superimposed matched PDF cell text:</caption>
</figure>
<table>
<location><page_8><loc_9><loc_63><loc_49><loc_72></location>
<row_0><col_0><body></col_0><col_1><body></col_1><col_2><col_header>論文ファイル</col_2><col_3><col_header>論文ファイル</col_3><col_4><col_header>参考文献</col_4><col_5><col_header>参考文献</col_5></row_0>
<row_1><col_0><col_header>出典</col_0><col_1><col_header>ファイル 数</col_1><col_2><col_header>英語</col_2><col_3><col_header>日本語</col_3><col_4><col_header>英語</col_4><col_5><col_header>日本語</col_5></row_1>
<row_2><col_0><row_header>Association for Computational Linguistics(ACL2003)</col_0><col_1><body>65</col_1><col_2><body>65</col_2><col_3><body>0</col_3><col_4><body>150</col_4><col_5><body>0</col_5></row_2>
<row_3><col_0><row_header>Computational Linguistics(COLING2002)</col_0><col_1><body>140</col_1><col_2><body>140</col_2><col_3><body>0</col_3><col_4><body>150</col_4><col_5><body>0</col_5></row_3>
<row_4><col_0><row_header>電気情報通信学会 2003 年総合大会</col_0><col_1><body>150</col_1><col_2><body>8</col_2><col_3><body>142</col_3><col_4><body>223</col_4><col_5><body>147</col_5></row_4>
<row_5><col_0><row_header>情報処理学会第 65 回全国大会 (2003)</col_0><col_1><body>177</col_1><col_2><body>1</col_2><col_3><body>176</col_3><col_4><body>150</col_4><col_5><body>236</col_5></row_5>
<row_6><col_0><row_header>第 17 回人工知能学会全国大会 (2003)</col_0><col_1><body>208</col_1><col_2><body>5</col_2><col_3><body>203</col_3><col_4><body>152</col_4><col_5><body>244</col_5></row_6>
<row_7><col_0><row_header>自然言語処理研究会第 146 〜 155 回</col_0><col_1><body>98</col_1><col_2><body>2</col_2><col_3><body>96</col_3><col_4><body>150</col_4><col_5><body>232</col_5></row_7>
<row_8><col_0><row_header>WWW から収集した論文</col_0><col_1><body>107</col_1><col_2><body>73</col_2><col_3><body>34</col_3><col_4><body>147</col_4><col_5><body>96</col_5></row_8>
<row_9><col_0><body></col_0><col_1><body>945</col_1><col_2><body>294</col_2><col_3><body>651</col_3><col_4><body>1122</col_4><col_5><body>955</col_5></row_9>
</table>
<table>
</figure>
<figure>
<location><page_8><loc_50><loc_64><loc_90><loc_72></location>
<caption>Text is aligned to match original for ease of viewing</caption>
<row_0><col_0><body></col_0><col_1><col_header>Shares (in millions)</col_1><col_2><col_header>Shares (in millions)</col_2><col_3><col_header>Weighted Average Grant Date Fair Value</col_3><col_4><col_header>Weighted Average Grant Date Fair Value</col_4></row_0>
<row_1><col_0><body></col_0><col_1><col_header>RS U s</col_1><col_2><col_header>PSUs</col_2><col_3><col_header>RSUs</col_3><col_4><col_header>PSUs</col_4></row_1>
<row_2><col_0><row_header>Nonvested on Janua ry 1</col_0><col_1><body>1. 1</col_1><col_2><body>0.3</col_2><col_3><body>90.10 $</col_3><col_4><body>$ 91.19</col_4></row_2>
<row_3><col_0><row_header>Granted</col_0><col_1><body>0. 5</col_1><col_2><body>0.1</col_2><col_3><body>117.44</col_3><col_4><body>122.41</col_4></row_3>
<row_4><col_0><row_header>Vested</col_0><col_1><body>(0. 5 )</col_1><col_2><body>(0.1)</col_2><col_3><body>87.08</col_3><col_4><body>81.14</col_4></row_4>
<row_5><col_0><row_header>Canceled or forfeited</col_0><col_1><body>(0. 1 )</col_1><col_2><body>-</col_2><col_3><body>102.01</col_3><col_4><body>92.18</col_4></row_5>
<row_6><col_0><row_header>Nonvested on December 31</col_0><col_1><body>1.0</col_1><col_2><body>0.3</col_2><col_3><body>104.85 $</col_3><col_4><body>$ 104.51</col_4></row_6>
</table>
</figure>
<figure>
<location><page_8><loc_8><loc_44><loc_35><loc_52></location>
<caption>Figure 5: One of the benefits of TableFormer is that it is language agnostic, as an example, the left part of the illustration demonstrates TableFormer predictions on previously unseen language (Japanese). Additionally, we see that TableFormer is robust to variability in style and content, right side of the illustration shows the example of the TableFormer prediction from the FinTabNet dataset.</caption>
@ -316,7 +294,7 @@
<text><location><page_13><loc_8><loc_83><loc_47><loc_86></location>Aditional images with examples of TableFormer predictions and post-processing can be found below.</text>
<paragraph><location><page_13><loc_10><loc_35><loc_45><loc_37></location>Figure 8: Example of a table with multi-line header.</paragraph>
<figure>
<location><page_13><loc_51><loc_63><loc_70><loc_68></location>
<location><page_13><loc_51><loc_63><loc_91><loc_87></location>
<caption>Figure 9: Example of a table with big empty distance between cells.</caption>
</figure>
<figure>
@ -335,7 +313,10 @@
<location><page_14><loc_52><loc_55><loc_87><loc_89></location>
<caption>Figure 13: Table predictions example on colorful table.</caption>
</figure>
<paragraph><location><page_14><loc_56><loc_13><loc_83><loc_14></location>Figure 14: Example with multi-line text.</paragraph>
<figure>
<location><page_14><loc_52><loc_25><loc_85><loc_31></location>
<caption>Figure 14: Example with multi-line text.</caption>
</figure>
<figure>
<location><page_15><loc_9><loc_69><loc_46><loc_83></location>
</figure>
@ -350,6 +331,9 @@
<caption>Figure 15: Example with triangular table.</caption>
</figure>
<figure>
<location><page_15><loc_53><loc_72><loc_86><loc_85></location>
</figure>
<figure>
<location><page_15><loc_53><loc_41><loc_86><loc_54></location>
</figure>
<figure>

File diff suppressed because one or more lines are too long

View File

@ -223,38 +223,15 @@ Table 4: Results of structure with content retrieved using cell detection on Pub
- a.
- Red - PDF cells, Green - predicted bounding boxes, Blue - post-processed predictions matched to PDF cells
Japanese language (previously unseen by TableFormer):
Example table from FinTabNet:
Japanese language (previously unseen by TableFormer): Example table from FinTabNet:b. Structure predicted by TableFormer, with superimposed matched PDF cell text:
<!-- image -->
b. Structure predicted by TableFormer, with superimposed matched PDF cell text:
<!-- image -->
| | | 論文ファイル | 論文ファイル | 参考文献 | 参考文献 |
|----------------------------------------------------|-------------|----------------|----------------|------------|------------|
| 出典 | ファイル 数 | 英語 | 日本語 | 英語 | 日本語 |
| Association for Computational Linguistics(ACL2003) | 65 | 65 | 0 | 150 | 0 |
| Computational Linguistics(COLING2002) | 140 | 140 | 0 | 150 | 0 |
| 電気情報通信学会 2003 年総合大会 | 150 | 8 | 142 | 223 | 147 |
| 情報処理学会第 65 回全国大会 (2003) | 177 | 1 | 176 | 150 | 236 |
| 第 17 回人工知能学会全国大会 (2003) | 208 | 5 | 203 | 152 | 244 |
| 自然言語処理研究会第 146 〜 155 回 | 98 | 2 | 96 | 150 | 232 |
| WWW から収集した論文 | 107 | 73 | 34 | 147 | 96 |
| | 945 | 294 | 651 | 1122 | 955 |
Text is aligned to match original for ease of viewing
| | Shares (in millions) | Shares (in millions) | Weighted Average Grant Date Fair Value | Weighted Average Grant Date Fair Value |
|--------------------------|------------------------|------------------------|------------------------------------------|------------------------------------------|
| | RS U s | PSUs | RSUs | PSUs |
| Nonvested on Janua ry 1 | 1. 1 | 0.3 | 90.10 $ | $ 91.19 |
| Granted | 0. 5 | 0.1 | 117.44 | 122.41 |
| Vested | (0. 5 ) | (0.1) | 87.08 | 81.14 |
| Canceled or forfeited | (0. 1 ) | - | 102.01 | 92.18 |
| Nonvested on December 31 | 1.0 | 0.3 | 104.85 $ | $ 104.51 |
<!-- image -->
Figure 5: One of the benefits of TableFormer is that it is language agnostic, as an example, the left part of the illustration demonstrates TableFormer predictions on previously unseen language (Japanese). Additionally, we see that TableFormer is robust to variability in style and content, right side of the illustration shows the example of the TableFormer prediction from the FinTabNet dataset.
@ -426,12 +403,16 @@ Figure 14: Example with multi-line text.
<!-- image -->
<!-- image -->
Figure 15: Example with triangular table.
<!-- image -->
<!-- image -->
<!-- image -->
Figure 16: Example of how post-processing helps to restore mis-aligned bounding boxes prediction artifact.
<!-- image -->

File diff suppressed because one or more lines are too long

View File

@ -3,17 +3,16 @@
<text><location><page_1><loc_15><loc_77><loc_32><loc_83></location>Birgit Pfitzmann IBM Research Rueschlikon, Switzerland bpf@zurich.ibm.com</text>
<text><location><page_1><loc_42><loc_77><loc_58><loc_83></location>Christoph Auer IBM Research Rueschlikon, Switzerland cau@zurich.ibm.com</text>
<text><location><page_1><loc_69><loc_77><loc_85><loc_83></location>Michele Dolfi IBM Research Rueschlikon, Switzerland dol@zurich.ibm.com</text>
<text><location><page_1><loc_28><loc_70><loc_45><loc_76></location>Ahmed S. Nassar IBM Research Rueschlikon, Switzerland ahn@zurich.ibm.com</text>
<text><location><page_1><loc_55><loc_70><loc_72><loc_76></location>Peter Staar IBM Research Rueschlikon, Switzerland taa@zurich.ibm.com</text>
<text><location><page_1><loc_28><loc_71><loc_45><loc_76></location>Ahmed S. Nassar IBM Research Rueschlikon, Switzerland</text>
<text><location><page_1><loc_29><loc_70><loc_44><loc_71></location>ahn@zurich.ibm.com</text>
<section_header_level_1><location><page_1><loc_9><loc_67><loc_18><loc_69></location>ABSTRACT</section_header_level_1>
<text><location><page_1><loc_9><loc_33><loc_48><loc_67></location>Accurate document layout analysis is a key requirement for highquality PDF document conversion. With the recent availability of public, large ground-truth datasets such as PubLayNet and DocBank, deep-learning models have proven to be very effective at layout detection and segmentation. While these datasets are of adequate size to train such models, they severely lack in layout variability since they are sourced from scientific article repositories such as PubMed and arXiv only. Consequently, the accuracy of the layout segmentation drops significantly when these models are applied on more challenging and diverse layouts. In this paper, we present DocLayNet , a new, publicly available, document-layout annotation dataset in COCO format. It contains 80863 manually annotated pages from diverse data sources to represent a wide variability in layouts. For each PDF page, the layout annotations provide labelled bounding-boxes with a choice of 11 distinct classes. DocLayNet also provides a subset of double- and triple-annotated pages to determine the inter-annotator agreement. In multiple experiments, we provide baseline accuracy scores (in mAP) for a set of popular object detection models. We also demonstrate that these models fall approximately 10% behind the inter-annotator agreement. Furthermore, we provide evidence that DocLayNet is of sufficient size. Lastly, we compare models trained on PubLayNet, DocBank and DocLayNet, showing that layout predictions of the DocLayNettrained models are more robust and thus the preferred choice for general-purpose document-layout analysis.</text>
<section_header_level_1><location><page_1><loc_9><loc_29><loc_22><loc_30></location>CCS CONCEPTS</section_header_level_1>
<text><location><page_1><loc_9><loc_25><loc_49><loc_29></location>· Information systems → Document structure ; · Applied computing → Document analysis ; · Computing methodologies → Machine learning ; Computer vision ; Object detection ;</text>
<text><location><page_1><loc_9><loc_15><loc_48><loc_20></location>Permission to make digital or hard copies of part or all of this work for personal or classroom use is granted without fee provided that copies are not made or distributed for profit or commercial advantage and that copies bear this notice and the full citation on the first page. Copyrights for third-party components of this work must be honored. For all other uses, contact the owner/author(s).</text>
<text><location><page_1><loc_9><loc_14><loc_32><loc_15></location>KDD '22, August 14-18, 2022, Washington, DC, USA</text>
<text><location><page_1><loc_9><loc_13><loc_31><loc_14></location>© 2022 Copyright held by the owner/author(s).</text>
<text><location><page_1><loc_9><loc_12><loc_26><loc_13></location>ACM ISBN 978-1-4503-9385-0/22/08.</text>
<text><location><page_1><loc_9><loc_12><loc_32><loc_15></location>KDD '22, August 14-18, 2022, Washington, DC, USA © 2022 Copyright held by the owner/author(s). ACM ISBN 978-1-4503-9385-0/22/08.</text>
<text><location><page_1><loc_9><loc_11><loc_27><loc_12></location>https://doi.org/10.1145/3534678.3539043</text>
<text><location><page_1><loc_55><loc_70><loc_72><loc_76></location>Peter Staar IBM Research Rueschlikon, Switzerland taa@zurich.ibm.com</text>
<figure>
<location><page_1><loc_53><loc_34><loc_90><loc_68></location>
<caption>Figure 1: Four examples of complex page layouts across different document categories</caption>

File diff suppressed because one or more lines are too long

View File

@ -6,9 +6,9 @@ Christoph Auer IBM Research Rueschlikon, Switzerland cau@zurich.ibm.com
Michele Dolfi IBM Research Rueschlikon, Switzerland dol@zurich.ibm.com
Ahmed S. Nassar IBM Research Rueschlikon, Switzerland ahn@zurich.ibm.com
Ahmed S. Nassar IBM Research Rueschlikon, Switzerland
Peter Staar IBM Research Rueschlikon, Switzerland taa@zurich.ibm.com
ahn@zurich.ibm.com
## ABSTRACT
@ -20,14 +20,12 @@ Accurate document layout analysis is a key requirement for highquality PDF docum
Permission to make digital or hard copies of part or all of this work for personal or classroom use is granted without fee provided that copies are not made or distributed for profit or commercial advantage and that copies bear this notice and the full citation on the first page. Copyrights for third-party components of this work must be honored. For all other uses, contact the owner/author(s).
KDD '22, August 14-18, 2022, Washington, DC, USA
© 2022 Copyright held by the owner/author(s).
ACM ISBN 978-1-4503-9385-0/22/08.
KDD '22, August 14-18, 2022, Washington, DC, USA © 2022 Copyright held by the owner/author(s). ACM ISBN 978-1-4503-9385-0/22/08.
https://doi.org/10.1145/3534678.3539043
Peter Staar IBM Research Rueschlikon, Switzerland taa@zurich.ibm.com
Figure 1: Four examples of complex page layouts across different document categories
<!-- image -->

File diff suppressed because one or more lines are too long

View File

@ -1,6 +1,6 @@
<document>
<section_header_level_1><location><page_1><loc_22><loc_82><loc_79><loc_85></location>Optimized Table Tokenization for Table Structure Recognition</section_header_level_1>
<text><location><page_1><loc_23><loc_75><loc_78><loc_79></location>Maksym Lysak [0000 - 0002 - 3723 - $^{6960]}$, Ahmed Nassar[0000 - 0002 - 9468 - $^{0822]}$, Nikolaos Livathinos [0000 - 0001 - 8513 - $^{3491]}$, Christoph Auer[0000 - 0001 - 5761 - $^{0422]}$, [0000 - 0002 - 8088 - 0823]</text>
<text><location><page_1><loc_23><loc_75><loc_78><loc_79></location>Maksym Lysak [0000 0002 3723 $^{6960]}$, Ahmed Nassar[0000 0002 9468 $^{0822]}$, Nikolaos Livathinos [0000 0001 8513 $^{3491]}$, Christoph Auer[0000 0001 5761 $^{0422]}$, [0000 0002 8088 0823]</text>
<text><location><page_1><loc_38><loc_74><loc_49><loc_75></location>and Peter Staar</text>
<text><location><page_1><loc_46><loc_72><loc_55><loc_73></location>IBM Research</text>
<text><location><page_1><loc_36><loc_70><loc_64><loc_71></location>{mly,ahn,nli,cau,taa}@zurich.ibm.com</text>

File diff suppressed because one or more lines are too long

View File

@ -1,6 +1,6 @@
## Optimized Table Tokenization for Table Structure Recognition
Maksym Lysak [0000 - 0002 - 3723 - $^{6960]}$, Ahmed Nassar[0000 - 0002 - 9468 - $^{0822]}$, Nikolaos Livathinos [0000 - 0001 - 8513 - $^{3491]}$, Christoph Auer[0000 - 0001 - 5761 - $^{0422]}$, [0000 - 0002 - 8088 - 0823]
Maksym Lysak [0000 0002 3723 $^{6960]}$, Ahmed Nassar[0000 0002 9468 $^{0822]}$, Nikolaos Livathinos [0000 0001 8513 $^{3491]}$, Christoph Auer[0000 0001 5761 $^{0422]}$, [0000 0002 8088 0823]
and Peter Staar

File diff suppressed because one or more lines are too long

View File

@ -5,10 +5,7 @@
</figure>
<section_header_level_1><location><page_1><loc_6><loc_79><loc_96><loc_89></location>Row and Column Access Control Support in IBM DB2 for i</section_header_level_1>
<figure>
<location><page_1><loc_5><loc_11><loc_96><loc_63></location>
</figure>
<figure>
<location><page_1><loc_52><loc_2><loc_95><loc_10></location>
<location><page_1><loc_3><loc_1><loc_96><loc_64></location>
</figure>
<section_header_level_1><location><page_2><loc_11><loc_88><loc_28><loc_91></location>Contents</section_header_level_1>
<table>
@ -109,7 +106,9 @@
<location><page_5><loc_5><loc_70><loc_39><loc_91></location>
</figure>
<text><location><page_5><loc_13><loc_65><loc_19><loc_66></location>Chapter 1.</text>
<text><location><page_5><loc_82><loc_84><loc_85><loc_88></location>1</text>
<figure>
<location><page_5><loc_78><loc_82><loc_89><loc_91></location>
</figure>
<section_header_level_1><location><page_5><loc_22><loc_61><loc_89><loc_68></location>Securing and protecting IBM DB2 data</section_header_level_1>
<text><location><page_5><loc_22><loc_46><loc_89><loc_56></location>Recent news headlines are filled with reports of data breaches and cyber-attacks impacting global businesses of all sizes. The Identity Theft Resource Center$^{1}$ reports that almost 5000 data breaches have occurred since 2005, exposing over 600 million records of data. The financial cost of these data breaches is skyrocketing. Studies from the Ponemon Institute$^{2}$ revealed that the average cost of a data breach increased in 2013 by 15% globally and resulted in a brand equity loss of $9.4 million per attack. The average cost that is incurred for each lost record containing sensitive information increased more than 9% to $145 per record.</text>
<text><location><page_5><loc_22><loc_38><loc_86><loc_44></location>Businesses must make a serious effort to secure their data and recognize that securing information assets is a cost of doing business. In many parts of the world and in many industries, securing the data is required by law and subject to audits. Data security is no longer an option; it is a requirement.</text>
@ -165,17 +164,7 @@
</table>
<text><location><page_8><loc_22><loc_40><loc_89><loc_43></location>To discover who has authorization to define and manage RCAC, you can use the query that is shown in Example 2-1.</text>
<paragraph><location><page_8><loc_22><loc_38><loc_76><loc_39></location>Example 2-1 Query to determine who has authority to define and manage RCAC</paragraph>
<text><location><page_8><loc_22><loc_35><loc_28><loc_36></location>SELECT</text>
<text><location><page_8><loc_30><loc_35><loc_41><loc_36></location>function_id,</text>
<text><location><page_8><loc_27><loc_34><loc_39><loc_35></location>user_name,</text>
<text><location><page_8><loc_28><loc_32><loc_36><loc_33></location>usage,</text>
<text><location><page_8><loc_27><loc_31><loc_39><loc_32></location>user_type</text>
<text><location><page_8><loc_22><loc_29><loc_26><loc_30></location>FROM</text>
<text><location><page_8><loc_29><loc_29><loc_43><loc_30></location>function_usage</text>
<text><location><page_8><loc_22><loc_28><loc_27><loc_29></location>WHERE</text>
<text><location><page_8><loc_29><loc_28><loc_54><loc_29></location>function_id=QIBM_DB_SECADM</text>
<text><location><page_8><loc_22><loc_26><loc_29><loc_27></location>ORDER BY</text>
<text><location><page_8><loc_31><loc_26><loc_39><loc_27></location>user_name;</text>
<table><location><page_8><loc_22><loc_26><loc_89><loc_37></location>SELECT function_id, user_name, usage, user_type FROM function_usage WHERE function_id=QIBM_DB_SECADM ORDER BY user_name;</table>
<section_header_level_1><location><page_8><loc_11><loc_20><loc_41><loc_22></location>2.2 Separation of duties</section_header_level_1>
<text><location><page_8><loc_22><loc_10><loc_89><loc_18></location>Separation of duties helps businesses comply with industry regulations or organizational requirements and simplifies the management of authorities. Separation of duties is commonly used to prevent fraudulent activities or errors by a single person. It provides the ability for administrative functions to be divided across individuals without overlapping responsibilities, so that one user does not possess unlimited authority, such as with the *ALLOBJ authority.</text>
<text><location><page_9><loc_22><loc_82><loc_89><loc_91></location>For example, assume that a business has assigned the duty to manage security on IBM i to Theresa. Before release IBM i 7.2, to grant privileges, Theresa had to have the same privileges Theresa was granting to others. Therefore, to grant *USE privileges to the PAYROLL table, Theresa had to have *OBJMGT and *USE authority (or a higher level of authority, such as *ALLOBJ). This requirement allowed Theresa to access the data in the PAYROLL table even though Theresa's job description was only to manage its security.</text>
@ -255,7 +244,7 @@
<list_item><location><page_12><loc_22><loc_32><loc_65><loc_33></location>2. The user profile JANE specifies a group profile of MGR.</list_item>
<list_item><location><page_12><loc_22><loc_28><loc_88><loc_31></location>3. If a user is connected to the server using user profile JANE, all of the following function invocations return a value of 1:</list_item>
</unordered_list>
<code><location><page_12><loc_25><loc_19><loc_74><loc_27></location>VERIFY_GROUP_FOR_USER (CURRENT_USER, 'MGR') VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JANE', 'MGR') The following function invocation returns a value of 0: VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JUDY', 'TONY') VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JANE', 'MGR', 'STEVE')</code>
<code><location><page_12><loc_25><loc_19><loc_74><loc_27></location>VERIFY_GROUP_FOR_USER (CURRENT_USER, 'MGR') VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JANE', 'MGR') VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JANE', 'MGR', 'STEVE') The following function invocation returns a value of 0: VERIFY_GROUP_FOR_USER (CURRENT_USER, 'JUDY', 'TONY')</code>
<text><location><page_13><loc_22><loc_90><loc_27><loc_91></location>RETURN</text>
<text><location><page_13><loc_22><loc_88><loc_26><loc_89></location>CASE</text>
<code><location><page_13><loc_22><loc_67><loc_85><loc_88></location>WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'HR', 'EMP' ) = 1 THEN EMPLOYEES . DATE_OF_BIRTH WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'MGR' ) = 1 AND SESSION_USER = EMPLOYEES . USER_ID THEN EMPLOYEES . DATE_OF_BIRTH WHEN VERIFY_GROUP_FOR_USER ( SESSION_USER , 'MGR' ) = 1 AND SESSION_USER <> EMPLOYEES . USER_ID THEN ( 9999 || '-' || MONTH ( EMPLOYEES . DATE_OF_BIRTH ) || '-' || DAY (EMPLOYEES.DATE_OF_BIRTH )) ELSE NULL END ENABLE ;</code>
@ -283,14 +272,7 @@
</unordered_list>
<section_header_level_1><location><page_14><loc_22><loc_62><loc_61><loc_63></location>Example 3-10 Activating RCAC on the EMPLOYEES table</section_header_level_1>
<unordered_list>
<list_item><location><page_14><loc_22><loc_60><loc_62><loc_61></location>/* Active Row Access Control (permissions) */</list_item>
<list_item><location><page_14><loc_22><loc_58><loc_58><loc_60></location>/* Active Column Access Control (masks)</list_item>
</unordered_list>
<text><location><page_14><loc_60><loc_58><loc_62><loc_60></location>*/</text>
<text><location><page_14><loc_22><loc_57><loc_48><loc_58></location>ALTER TABLE HR_SCHEMA.EMPLOYEES</text>
<text><location><page_14><loc_22><loc_55><loc_44><loc_56></location>ACTIVATE ROW ACCESS CONTROL</text>
<text><location><page_14><loc_22><loc_54><loc_48><loc_55></location>ACTIVATE COLUMN ACCESS CONTROL;</text>
<unordered_list>
<list_item><location><page_14><loc_22><loc_54><loc_62><loc_61></location>/* Active Row Access Control (permissions) */ /* Active Column Access Control (masks) */ ALTER TABLE HR_SCHEMA.EMPLOYEES ACTIVATE ROW ACCESS CONTROL ACTIVATE COLUMN ACCESS CONTROL;</list_item>
<list_item><location><page_14><loc_22><loc_48><loc_88><loc_52></location>2. Look at the definition of the EMPLOYEE table, as shown in Figure 3-11. To do this, from the main navigation pane of System i Navigator, click Schemas  HR_SCHEMA  Tables , right-click the EMPLOYEES table, and click Definition .</list_item>
</unordered_list>
<figure>

File diff suppressed because one or more lines are too long

View File

@ -6,8 +6,6 @@ Front cover
<!-- image -->
<!-- image -->
## Contents
| Notices | . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vii |
@ -120,7 +118,7 @@ Hernando Bedoya is a Senior IT Specialist at STG Lab Services and Training in Ro
Chapter 1.
1
<!-- image -->
## Securing and protecting IBM DB2 data
@ -198,27 +196,7 @@ To discover who has authorization to define and manage RCAC, you can use the que
Example 2-1 Query to determine who has authority to define and manage RCAC
SELECT
function\_id,
user\_name,
usage,
user\_type
FROM
function\_usage
WHERE
function\_id=QIBM\_DB\_SECADM
ORDER BY
user\_name;
SELECT function\_id, user\_name, usage, user\_type FROM function\_usage WHERE function\_id=QIBM\_DB\_SECADM ORDER BY user\_name;
## 2.2 Separation of duties
@ -318,7 +296,7 @@ Here is an example of using the VERIFY\_GROUP\_FOR\_USER function:
- 3. If a user is connected to the server using user profile JANE, all of the following function invocations return a value of 1:
```
VERIFY\_GROUP\_FOR\_USER (CURRENT\_USER, 'MGR') VERIFY\_GROUP\_FOR\_USER (CURRENT\_USER, 'JANE', 'MGR') The following function invocation returns a value of 0: VERIFY\_GROUP\_FOR\_USER (CURRENT\_USER, 'JUDY', 'TONY') VERIFY\_GROUP\_FOR\_USER (CURRENT\_USER, 'JANE', 'MGR', 'STEVE')
VERIFY\_GROUP\_FOR\_USER (CURRENT\_USER, 'MGR') VERIFY\_GROUP\_FOR\_USER (CURRENT\_USER, 'JANE', 'MGR') VERIFY\_GROUP\_FOR\_USER (CURRENT\_USER, 'JANE', 'MGR', 'STEVE') The following function invocation returns a value of 0: VERIFY\_GROUP\_FOR\_USER (CURRENT\_USER, 'JUDY', 'TONY')
```
RETURN
@ -356,17 +334,7 @@ Now that you have created the row permission and the two column masks, RCAC must
## Example 3-10 Activating RCAC on the EMPLOYEES table
- /* Active Row Access Control (permissions) */
- /* Active Column Access Control (masks)
*/
ALTER TABLE HR\_SCHEMA.EMPLOYEES
ACTIVATE ROW ACCESS CONTROL
ACTIVATE COLUMN ACCESS CONTROL;
- /* Active Row Access Control (permissions) */ /* Active Column Access Control (masks) */ ALTER TABLE HR\_SCHEMA.EMPLOYEES ACTIVATE ROW ACCESS CONTROL ACTIVATE COLUMN ACCESS CONTROL;
- 2. Look at the definition of the EMPLOYEE table, as shown in Figure 3-11. To do this, from the main navigation pane of System i Navigator, click Schemas  HR\_SCHEMA  Tables , right-click the EMPLOYEES table, and click Definition .
Figure 3-11 Selecting the EMPLOYEES table from System i Navigator

File diff suppressed because one or more lines are too long

View File

@ -8,8 +8,8 @@ from docling.document_converter import DocumentConverter, PdfFormatOption
from .verify_utils import verify_conversion_result_v1, verify_conversion_result_v2
GENERATE_V1 = False
GENERATE_V2 = False
GENERATE_V1 = True
GENERATE_V2 = True
def get_pdf_paths():

View File

@ -18,8 +18,8 @@ from docling.document_converter import DocumentConverter, PdfFormatOption
from .verify_utils import verify_conversion_result_v1, verify_conversion_result_v2
GENERATE_V1 = False
GENERATE_V2 = False
GENERATE_V1 = True
GENERATE_V2 = True
def get_pdf_paths():