reformatted the code

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
2025-07-27 04:24:45 +00:00 · 2024-10-18 16:57:26 +02:00 · 2024-10-18 16:57:26 +02:00 · 70b2ae3fab
commit 70b2ae3fab
parent 5016daeae3
3 changed files with 51 additions and 52 deletions
--- a/docling/backend/asciidoc_backend.py
+++ b/docling/backend/asciidoc_backend.py
@ -1,6 +1,5 @@
-import re
-    
 import logging
+import re
 from io import BytesIO
 from pathlib import Path
 from typing import Set, Union
@ -16,7 +15,8 @@ from docling_core.types.doc import (

 from docling.backend.abstract_backend import DeclarativeDocumentBackend
 from docling.datamodel.base_models import InputFormat
-#from docling.datamodel.document import InputDocument
+
+# from docling.datamodel.document import InputDocument

 _log = logging.getLogger(__name__)

@ -75,11 +75,11 @@ class AsciidocBackend(DeclarativeDocumentBackend):
        title, section headers, text, lists, and tables.
        """

-        content=""
+        content = ""
        with open(self.path_or_stream, "r") as fr:
            self.lines = fr.readlines()

-        #self.lines = file_content.splitlines()
+        # self.lines = file_content.splitlines()

        in_list = False
        in_table = False
@ -127,7 +127,7 @@ class AsciidocBackend(DeclarativeDocumentBackend):
                item = self.parse_text(line)
                doc.add_text(text=item["text"], label="text")

-        if in_table and len(table_data)>0:
+        if in_table and len(table_data) > 0:
            data = self.populate_table_as_grid(table_data)
            doc.add_table(data=data)

@ -148,8 +148,12 @@ class AsciidocBackend(DeclarativeDocumentBackend):
        return re.match(r"^==+", line)

    def parse_section_header(self, line):
-        header_level = line.count('=')  # number of '=' represents level
-        return {"type": "header", "level": header_level, "text": line[header_level:].strip()}
+        header_level = line.count("=")  # number of '=' represents level
+        return {
+            "type": "header",
+            "level": header_level,
+            "text": line[header_level:].strip(),
+        }

    # Lists
    def is_list_item(self, line):
@ -164,7 +168,7 @@ class AsciidocBackend(DeclarativeDocumentBackend):

    def parse_table_line(self, line):
        # Split table cells and trim extra spaces
-        return [cell.strip() for cell in line.split('|') if cell.strip()]
+        return [cell.strip() for cell in line.split("|") if cell.strip()]

    def populate_table_as_grid(self, table_data):

@ -174,11 +178,11 @@ class AsciidocBackend(DeclarativeDocumentBackend):
        num_cols = max(len(row) for row in table_data)

        data = TableData(num_rows=num_rows, num_cols=num_cols, table_cells=[])
-        for row_idx,row in enumerate(table_data):
+        for row_idx, row in enumerate(table_data):
            # Pad rows with empty strings to match column count
-            #grid.append(row + [''] * (max_cols - len(row)))
+            # grid.append(row + [''] * (max_cols - len(row)))

-            for col_idx,text in enumerate(row):
+            for col_idx, text in enumerate(row):
                row_span = 1
                col_span = 1

@ -191,7 +195,8 @@ class AsciidocBackend(DeclarativeDocumentBackend):
                    start_col_offset_idx=col_idx,
                    end_col_offset_idx=col_idx + col_span,
                    col_header=False,
-                    row_header=False)
+                    row_header=False,
+                )
                data.table_cells.append(cell)

        return data
--- a/tests/test_backend_asciidoc.py
+++ b/tests/test_backend_asciidoc.py
@ -1,14 +1,11 @@
 import glob
 import os
-
 from pathlib import Path

 import pytest
 from docling_core.types.doc import BoundingBox

-from docling.backend.asciidoc_backend import (
-    AsciidocBackend,
-)
+from docling.backend.asciidoc_backend import AsciidocBackend
 from docling.datamodel.base_models import InputFormat
 from docling.datamodel.document import InputDocument

@ -32,7 +29,7 @@ def test_asciidocs_examples():
        print(f"reading {fname}")

        bname = os.path.basename(fname)
-        gname = os.path.join("./tests/data/groundtruth/docling_v2/", bname+".md")
+        gname = os.path.join("./tests/data/groundtruth/docling_v2/", bname + ".md")

        doc_backend = _get_backend(Path(fname))
        doc = doc_backend.convert()
@ -43,12 +40,9 @@ def test_asciidocs_examples():
            with open(gname, "r") as fr:
                true_mddoc = fr.read()

-            assert pred_mddoc==true_mddoc, "pred_mddoc!=true_mddoc for asciidoc"
+            assert pred_mddoc == true_mddoc, "pred_mddoc!=true_mddoc for asciidoc"
        else:
            with open(gname, "w") as fw:
                fw.write(pred_mddoc)

            print("\n\n", doc.export_to_markdown())
-
-            
-