mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-25 19:44:34 +00:00
fix: update md table classification
Signed-off-by: Michael Honaker <Michael.Honaker@ibm.com>
This commit is contained in:
parent
861abcdcb0
commit
6a34f6f5c5
@ -276,7 +276,7 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
|
||||
_log.debug(f" - Paragraph (raw text): {element.children}")
|
||||
snippet_text = element.children.strip()
|
||||
# Detect start of the table:
|
||||
if "|" in snippet_text:
|
||||
if "|" in snippet_text or self.in_table:
|
||||
# most likely part of the markdown table
|
||||
self.in_table = True
|
||||
if len(self.md_table_buffer) > 0:
|
||||
|
@ -12,9 +12,13 @@ Create your feature branch: `git checkout -b feature/AmazingFeature` .
|
||||
4. Push to the branch ( `git push origin feature/AmazingFeature` )
|
||||
5. Open a Pull Request
|
||||
|
||||
##
|
||||
##
|
||||
|
||||
*Second* section
|
||||
|
||||
- **First** : Lorem ipsum.
|
||||
- **Second** : Dolor `sit` amet.
|
||||
|
||||
| Bold Heading | Italic Heading |
|
||||
|----------------|------------------|
|
||||
| data a | data b |
|
@ -7,6 +7,8 @@ body:
|
||||
- $ref: '#/groups/2'
|
||||
- $ref: '#/texts/27'
|
||||
- $ref: '#/groups/8'
|
||||
- $ref: '#/groups/11'
|
||||
- $ref: '#/tables/0'
|
||||
content_layer: body
|
||||
label: unspecified
|
||||
name: _root_
|
||||
@ -131,16 +133,158 @@ groups:
|
||||
parent:
|
||||
$ref: '#/texts/33'
|
||||
self_ref: '#/groups/10'
|
||||
- children: []
|
||||
content_layer: body
|
||||
label: inline
|
||||
name: group
|
||||
parent:
|
||||
$ref: '#/body'
|
||||
self_ref: '#/groups/11'
|
||||
key_value_items: []
|
||||
name: inline_and_formatting
|
||||
origin:
|
||||
binary_hash: 9342273634728023910
|
||||
binary_hash: 15980020574215496313
|
||||
filename: inline_and_formatting.md
|
||||
mimetype: text/markdown
|
||||
pages: {}
|
||||
pictures: []
|
||||
schema_name: DoclingDocument
|
||||
tables: []
|
||||
tables:
|
||||
- captions: []
|
||||
children: []
|
||||
content_layer: body
|
||||
data:
|
||||
grid:
|
||||
- - col_span: 1
|
||||
column_header: true
|
||||
end_col_offset_idx: 1
|
||||
end_row_offset_idx: 1
|
||||
row_header: false
|
||||
row_section: false
|
||||
row_span: 1
|
||||
start_col_offset_idx: 0
|
||||
start_row_offset_idx: 0
|
||||
text: Bold Heading
|
||||
- col_span: 1
|
||||
column_header: true
|
||||
end_col_offset_idx: 2
|
||||
end_row_offset_idx: 1
|
||||
row_header: false
|
||||
row_section: false
|
||||
row_span: 1
|
||||
start_col_offset_idx: 1
|
||||
start_row_offset_idx: 0
|
||||
text: Italic Heading
|
||||
- - col_span: 1
|
||||
column_header: false
|
||||
end_col_offset_idx: 1
|
||||
end_row_offset_idx: 2
|
||||
row_header: false
|
||||
row_section: false
|
||||
row_span: 1
|
||||
start_col_offset_idx: 0
|
||||
start_row_offset_idx: 1
|
||||
text: data a
|
||||
- col_span: 1
|
||||
column_header: false
|
||||
end_col_offset_idx: 2
|
||||
end_row_offset_idx: 2
|
||||
row_header: false
|
||||
row_section: false
|
||||
row_span: 1
|
||||
start_col_offset_idx: 1
|
||||
start_row_offset_idx: 1
|
||||
text: data b
|
||||
num_cols: 2
|
||||
num_rows: 2
|
||||
table_cells:
|
||||
- col_span: 1
|
||||
column_header: true
|
||||
end_col_offset_idx: 1
|
||||
end_row_offset_idx: 1
|
||||
row_header: false
|
||||
row_section: false
|
||||
row_span: 1
|
||||
start_col_offset_idx: 0
|
||||
start_row_offset_idx: 0
|
||||
text: Bold Heading
|
||||
- col_span: 1
|
||||
column_header: true
|
||||
end_col_offset_idx: 2
|
||||
end_row_offset_idx: 1
|
||||
row_header: false
|
||||
row_section: false
|
||||
row_span: 1
|
||||
start_col_offset_idx: 1
|
||||
start_row_offset_idx: 0
|
||||
text: Italic Heading
|
||||
- col_span: 1
|
||||
column_header: false
|
||||
end_col_offset_idx: 1
|
||||
end_row_offset_idx: 2
|
||||
row_header: false
|
||||
row_section: false
|
||||
row_span: 1
|
||||
start_col_offset_idx: 0
|
||||
start_row_offset_idx: 1
|
||||
text: data a
|
||||
- col_span: 1
|
||||
column_header: false
|
||||
end_col_offset_idx: 2
|
||||
end_row_offset_idx: 2
|
||||
row_header: false
|
||||
row_section: false
|
||||
row_span: 1
|
||||
start_col_offset_idx: 1
|
||||
start_row_offset_idx: 1
|
||||
text: data b
|
||||
- col_span: 1
|
||||
column_header: true
|
||||
end_col_offset_idx: 1
|
||||
end_row_offset_idx: 1
|
||||
row_header: false
|
||||
row_section: false
|
||||
row_span: 1
|
||||
start_col_offset_idx: 0
|
||||
start_row_offset_idx: 0
|
||||
text: Bold Heading
|
||||
- col_span: 1
|
||||
column_header: true
|
||||
end_col_offset_idx: 2
|
||||
end_row_offset_idx: 1
|
||||
row_header: false
|
||||
row_section: false
|
||||
row_span: 1
|
||||
start_col_offset_idx: 1
|
||||
start_row_offset_idx: 0
|
||||
text: Italic Heading
|
||||
- col_span: 1
|
||||
column_header: false
|
||||
end_col_offset_idx: 1
|
||||
end_row_offset_idx: 2
|
||||
row_header: false
|
||||
row_section: false
|
||||
row_span: 1
|
||||
start_col_offset_idx: 0
|
||||
start_row_offset_idx: 1
|
||||
text: data a
|
||||
- col_span: 1
|
||||
column_header: false
|
||||
end_col_offset_idx: 2
|
||||
end_row_offset_idx: 2
|
||||
row_header: false
|
||||
row_section: false
|
||||
row_span: 1
|
||||
start_col_offset_idx: 1
|
||||
start_row_offset_idx: 1
|
||||
text: data b
|
||||
footnotes: []
|
||||
label: table
|
||||
parent:
|
||||
$ref: '#/body'
|
||||
prov: []
|
||||
references: []
|
||||
self_ref: '#/tables/0'
|
||||
texts:
|
||||
- children: []
|
||||
content_layer: body
|
||||
@ -562,4 +706,4 @@ texts:
|
||||
prov: []
|
||||
self_ref: '#/texts/37'
|
||||
text: amet.
|
||||
version: 1.3.0
|
||||
version: 1.3.0
|
4
tests/data/md/inline_and_formatting.md
vendored
4
tests/data/md/inline_and_formatting.md
vendored
@ -16,3 +16,7 @@ Create your feature branch: `git checkout -b feature/AmazingFeature`.
|
||||
|
||||
- **First**: Lorem ipsum.
|
||||
- **Second**: Dolor `sit` amet.
|
||||
|
||||
| **Bold Heading** | *Italic Heading* |
|
||||
|------------------|------------------|
|
||||
| data a | data b |
|
||||
|
Loading…
Reference in New Issue
Block a user