mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 12:34:22 +00:00
reformatted the code
Signed-off-by: Peter Staar <taa@zurich.ibm.com>
This commit is contained in:
parent
5016daeae3
commit
70b2ae3fab
@ -1,6 +1,5 @@
|
|||||||
import re
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Set, Union
|
from typing import Set, Union
|
||||||
@ -16,6 +15,7 @@ from docling_core.types.doc import (
|
|||||||
|
|
||||||
from docling.backend.abstract_backend import DeclarativeDocumentBackend
|
from docling.backend.abstract_backend import DeclarativeDocumentBackend
|
||||||
from docling.datamodel.base_models import InputFormat
|
from docling.datamodel.base_models import InputFormat
|
||||||
|
|
||||||
# from docling.datamodel.document import InputDocument
|
# from docling.datamodel.document import InputDocument
|
||||||
|
|
||||||
_log = logging.getLogger(__name__)
|
_log = logging.getLogger(__name__)
|
||||||
@ -148,8 +148,12 @@ class AsciidocBackend(DeclarativeDocumentBackend):
|
|||||||
return re.match(r"^==+", line)
|
return re.match(r"^==+", line)
|
||||||
|
|
||||||
def parse_section_header(self, line):
|
def parse_section_header(self, line):
|
||||||
header_level = line.count('=') # number of '=' represents level
|
header_level = line.count("=") # number of '=' represents level
|
||||||
return {"type": "header", "level": header_level, "text": line[header_level:].strip()}
|
return {
|
||||||
|
"type": "header",
|
||||||
|
"level": header_level,
|
||||||
|
"text": line[header_level:].strip(),
|
||||||
|
}
|
||||||
|
|
||||||
# Lists
|
# Lists
|
||||||
def is_list_item(self, line):
|
def is_list_item(self, line):
|
||||||
@ -164,7 +168,7 @@ class AsciidocBackend(DeclarativeDocumentBackend):
|
|||||||
|
|
||||||
def parse_table_line(self, line):
|
def parse_table_line(self, line):
|
||||||
# Split table cells and trim extra spaces
|
# Split table cells and trim extra spaces
|
||||||
return [cell.strip() for cell in line.split('|') if cell.strip()]
|
return [cell.strip() for cell in line.split("|") if cell.strip()]
|
||||||
|
|
||||||
def populate_table_as_grid(self, table_data):
|
def populate_table_as_grid(self, table_data):
|
||||||
|
|
||||||
@ -191,7 +195,8 @@ class AsciidocBackend(DeclarativeDocumentBackend):
|
|||||||
start_col_offset_idx=col_idx,
|
start_col_offset_idx=col_idx,
|
||||||
end_col_offset_idx=col_idx + col_span,
|
end_col_offset_idx=col_idx + col_span,
|
||||||
col_header=False,
|
col_header=False,
|
||||||
row_header=False)
|
row_header=False,
|
||||||
|
)
|
||||||
data.table_cells.append(cell)
|
data.table_cells.append(cell)
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
@ -1,14 +1,11 @@
|
|||||||
import glob
|
import glob
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from docling_core.types.doc import BoundingBox
|
from docling_core.types.doc import BoundingBox
|
||||||
|
|
||||||
from docling.backend.asciidoc_backend import (
|
from docling.backend.asciidoc_backend import AsciidocBackend
|
||||||
AsciidocBackend,
|
|
||||||
)
|
|
||||||
from docling.datamodel.base_models import InputFormat
|
from docling.datamodel.base_models import InputFormat
|
||||||
from docling.datamodel.document import InputDocument
|
from docling.datamodel.document import InputDocument
|
||||||
|
|
||||||
@ -49,6 +46,3 @@ def test_asciidocs_examples():
|
|||||||
fw.write(pred_mddoc)
|
fw.write(pred_mddoc)
|
||||||
|
|
||||||
print("\n\n", doc.export_to_markdown())
|
print("\n\n", doc.export_to_markdown())
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user