Pass nested cluster processing through full pipeline

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer
2024-12-03 13:08:45 +01:00
parent 7245cc6080
commit 4dcc738b6d
6 changed files with 62 additions and 35 deletions

View File

@@ -132,6 +132,12 @@ class LayoutPrediction(BaseModel):
clusters: List[Cluster] = []
class ContainerElement(
BasePageElement
): # Used for Form and Key-Value-Regions, only for typing.
pass
class Table(BasePageElement):
otsl_seq: List[str]
num_rows: int = 0
@@ -171,7 +177,7 @@ class PagePredictions(BaseModel):
equations_prediction: Optional[EquationPrediction] = None
PageElement = Union[TextElement, Table, FigureElement]
PageElement = Union[TextElement, Table, FigureElement, ContainerElement]
class AssembledUnit(BaseModel):

View File

@@ -77,6 +77,8 @@ layout_label_to_ds_type = {
DocItemLabel.PICTURE: "figure",
DocItemLabel.TEXT: "paragraph",
DocItemLabel.PARAGRAPH: "paragraph",
DocItemLabel.FORM: DocItemLabel.FORM.value,
DocItemLabel.KEY_VALUE_REGION: DocItemLabel.KEY_VALUE_REGION.value,
}
_EMPTY_DOCLING_DOC = DoclingDocument(name="dummy")