reformatted the code

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
This commit is contained in:
Peter Staar 2024-10-18 16:57:26 +02:00
parent 5016daeae3
commit 70b2ae3fab
3 changed files with 51 additions and 52 deletions

View File

@ -1,6 +1,5 @@
import re
import logging import logging
import re
from io import BytesIO from io import BytesIO
from pathlib import Path from pathlib import Path
from typing import Set, Union from typing import Set, Union
@ -16,6 +15,7 @@ from docling_core.types.doc import (
from docling.backend.abstract_backend import DeclarativeDocumentBackend from docling.backend.abstract_backend import DeclarativeDocumentBackend
from docling.datamodel.base_models import InputFormat from docling.datamodel.base_models import InputFormat
# from docling.datamodel.document import InputDocument # from docling.datamodel.document import InputDocument
_log = logging.getLogger(__name__) _log = logging.getLogger(__name__)
@ -148,8 +148,12 @@ class AsciidocBackend(DeclarativeDocumentBackend):
return re.match(r"^==+", line) return re.match(r"^==+", line)
def parse_section_header(self, line): def parse_section_header(self, line):
header_level = line.count('=') # number of '=' represents level header_level = line.count("=") # number of '=' represents level
return {"type": "header", "level": header_level, "text": line[header_level:].strip()} return {
"type": "header",
"level": header_level,
"text": line[header_level:].strip(),
}
# Lists # Lists
def is_list_item(self, line): def is_list_item(self, line):
@ -164,7 +168,7 @@ class AsciidocBackend(DeclarativeDocumentBackend):
def parse_table_line(self, line): def parse_table_line(self, line):
# Split table cells and trim extra spaces # Split table cells and trim extra spaces
return [cell.strip() for cell in line.split('|') if cell.strip()] return [cell.strip() for cell in line.split("|") if cell.strip()]
def populate_table_as_grid(self, table_data): def populate_table_as_grid(self, table_data):
@ -191,7 +195,8 @@ class AsciidocBackend(DeclarativeDocumentBackend):
start_col_offset_idx=col_idx, start_col_offset_idx=col_idx,
end_col_offset_idx=col_idx + col_span, end_col_offset_idx=col_idx + col_span,
col_header=False, col_header=False,
row_header=False) row_header=False,
)
data.table_cells.append(cell) data.table_cells.append(cell)
return data return data

View File

@ -1,14 +1,11 @@
import glob import glob
import os import os
from pathlib import Path from pathlib import Path
import pytest import pytest
from docling_core.types.doc import BoundingBox from docling_core.types.doc import BoundingBox
from docling.backend.asciidoc_backend import ( from docling.backend.asciidoc_backend import AsciidocBackend
AsciidocBackend,
)
from docling.datamodel.base_models import InputFormat from docling.datamodel.base_models import InputFormat
from docling.datamodel.document import InputDocument from docling.datamodel.document import InputDocument
@ -49,6 +46,3 @@ def test_asciidocs_examples():
fw.write(pred_mddoc) fw.write(pred_mddoc)
print("\n\n", doc.export_to_markdown()) print("\n\n", doc.export_to_markdown())