diff --git a/tests/data/groundtruth/docling_v2/inline_and_formatting.md.md b/tests/data/groundtruth/docling_v2/inline_and_formatting.md.md index 31c3f3be..98dc0040 100644 --- a/tests/data/groundtruth/docling_v2/inline_and_formatting.md.md +++ b/tests/data/groundtruth/docling_v2/inline_and_formatting.md.md @@ -11,10 +11,15 @@ Create your feature branch: `git checkout -b feature/AmazingFeature` . 3. Commit your changes ( `git commit -m 'Add some AmazingFeature'` ) 4. Push to the branch ( `git push origin feature/AmazingFeature` ) 5. Open a Pull Request +6. [<RawText children='Whole list item has same formatting'>] +7. List item has *mixed or partial* formatting + +# [<RawText children='Whole heading is italic'>] + +Bar ## -*Second* section +*Partially formatted* heading -- **First** : Lorem ipsum. -- **Second** : Dolor `sit` amet. +End diff --git a/tests/data/groundtruth/docling_v2/inline_and_formatting.md.yaml b/tests/data/groundtruth/docling_v2/inline_and_formatting.md.yaml index 0cdc5c54..95c86a96 100644 --- a/tests/data/groundtruth/docling_v2/inline_and_formatting.md.yaml +++ b/tests/data/groundtruth/docling_v2/inline_and_formatting.md.yaml @@ -5,8 +5,10 @@ body: - $ref: '#/groups/0' - $ref: '#/groups/1' - $ref: '#/groups/2' - - $ref: '#/texts/27' - - $ref: '#/groups/8' + - $ref: '#/texts/32' + - $ref: '#/texts/33' + - $ref: '#/texts/34' + - $ref: '#/texts/37' content_layer: body label: unspecified name: _root_ @@ -47,6 +49,8 @@ groups: - $ref: '#/texts/18' - $ref: '#/texts/22' - $ref: '#/texts/26' + - $ref: '#/texts/27' + - $ref: '#/texts/28' content_layer: body label: ordered_list name: list @@ -94,47 +98,28 @@ groups: $ref: '#/texts/22' self_ref: '#/groups/6' - children: - - $ref: '#/texts/28' - $ref: '#/texts/29' + - $ref: '#/texts/30' + - $ref: '#/texts/31' content_layer: body label: inline name: group parent: - $ref: '#/texts/27' + $ref: '#/texts/28' self_ref: '#/groups/7' - children: - - $ref: '#/texts/30' - - $ref: '#/texts/33' - content_layer: body - label: list - name: list - parent: - $ref: '#/body' - self_ref: '#/groups/8' -- children: - - $ref: '#/texts/31' - - $ref: '#/texts/32' - content_layer: body - label: inline - name: group - parent: - $ref: '#/texts/30' - self_ref: '#/groups/9' -- children: - - $ref: '#/texts/34' - $ref: '#/texts/35' - $ref: '#/texts/36' - - $ref: '#/texts/37' content_layer: body label: inline name: group parent: - $ref: '#/texts/33' - self_ref: '#/groups/10' + $ref: '#/texts/34' + self_ref: '#/groups/8' key_value_items: [] name: inline_and_formatting origin: - binary_hash: 9342273634728023910 + binary_hash: 13696403111835531717 filename: inline_and_formatting.md mimetype: text/markdown pages: {} @@ -436,16 +421,89 @@ texts: prov: [] self_ref: '#/texts/26' text: Open a Pull Request +- children: [] + content_layer: body + enumerated: true + label: list_item + marker: '-' + orig: '[]' + parent: + $ref: '#/groups/2' + prov: [] + self_ref: '#/texts/27' + text: '[]' - children: - $ref: '#/groups/7' content_layer: body + enumerated: true + label: list_item + marker: '-' + orig: '' + parent: + $ref: '#/groups/2' + prov: [] + self_ref: '#/texts/28' + text: '' +- children: [] + content_layer: body + label: text + orig: List item has + parent: + $ref: '#/groups/7' + prov: [] + self_ref: '#/texts/29' + text: List item has +- children: [] + content_layer: body + formatting: + bold: false + italic: true + strikethrough: false + underline: false + label: text + orig: mixed or partial + parent: + $ref: '#/groups/7' + prov: [] + self_ref: '#/texts/30' + text: mixed or partial +- children: [] + content_layer: body + label: text + orig: formatting + parent: + $ref: '#/groups/7' + prov: [] + self_ref: '#/texts/31' + text: formatting +- children: [] + content_layer: body + label: title + orig: '[]' + parent: + $ref: '#/body' + prov: [] + self_ref: '#/texts/32' + text: '[]' +- children: [] + content_layer: body + label: text + orig: Bar + parent: + $ref: '#/body' + prov: [] + self_ref: '#/texts/33' + text: Bar +- children: + - $ref: '#/groups/8' + content_layer: body label: section_header level: 1 orig: '' parent: $ref: '#/body' prov: [] - self_ref: '#/texts/27' + self_ref: '#/texts/34' text: '' - children: [] content_layer: body @@ -455,111 +513,28 @@ texts: strikethrough: false underline: false label: text - orig: Second - parent: - $ref: '#/groups/7' - prov: [] - self_ref: '#/texts/28' - text: Second -- children: [] - content_layer: body - label: text - orig: section - parent: - $ref: '#/groups/7' - prov: [] - self_ref: '#/texts/29' - text: section -- children: - - $ref: '#/groups/9' - content_layer: body - enumerated: false - label: list_item - marker: '-' - orig: '' + orig: Partially formatted parent: $ref: '#/groups/8' prov: [] - self_ref: '#/texts/30' - text: '' -- children: [] - content_layer: body - formatting: - bold: true - italic: false - strikethrough: false - underline: false - label: text - orig: First - parent: - $ref: '#/groups/9' - prov: [] - self_ref: '#/texts/31' - text: First -- children: [] - content_layer: body - label: text - orig: ': Lorem ipsum.' - parent: - $ref: '#/groups/9' - prov: [] - self_ref: '#/texts/32' - text: ': Lorem ipsum.' -- children: - - $ref: '#/groups/10' - content_layer: body - enumerated: false - label: list_item - marker: '-' - orig: '' - parent: - $ref: '#/groups/8' - prov: [] - self_ref: '#/texts/33' - text: '' -- children: [] - content_layer: body - formatting: - bold: true - italic: false - strikethrough: false - underline: false - label: text - orig: Second - parent: - $ref: '#/groups/10' - prov: [] - self_ref: '#/texts/34' - text: Second -- children: [] - content_layer: body - label: text - orig: ': Dolor' - parent: - $ref: '#/groups/10' - prov: [] self_ref: '#/texts/35' - text: ': Dolor' -- captions: [] - children: [] - code_language: unknown - content_layer: body - footnotes: [] - label: code - orig: sit - parent: - $ref: '#/groups/10' - prov: [] - references: [] - self_ref: '#/texts/36' - text: sit + text: Partially formatted - children: [] content_layer: body label: text - orig: amet. + orig: heading parent: - $ref: '#/groups/10' + $ref: '#/groups/8' + prov: [] + self_ref: '#/texts/36' + text: heading +- children: [] + content_layer: body + label: text + orig: End + parent: + $ref: '#/body' prov: [] self_ref: '#/texts/37' - text: amet. + text: End version: 1.3.0 diff --git a/tests/data/md/inline_and_formatting.md b/tests/data/md/inline_and_formatting.md index e18a46c5..a889eea7 100644 --- a/tests/data/md/inline_and_formatting.md +++ b/tests/data/md/inline_and_formatting.md @@ -11,8 +11,13 @@ Create your feature branch: `git checkout -b feature/AmazingFeature`. 3. Commit your changes (`git commit -m 'Add some AmazingFeature'`) 4. Push to the branch (`git push origin feature/AmazingFeature`) 5. Open a Pull Request +6. **Whole list item has same formatting** +7. List item has *mixed or partial* formatting -## *Second* section +# *Whole heading is italic* -- **First**: Lorem ipsum. -- **Second**: Dolor `sit` amet. +Bar + +## *Partially formatted* heading + +End