From 70b2ae3fabc9196bc1021695db490e758d0a13db Mon Sep 17 00:00:00 2001
From: Peter Staar <taa@zurich.ibm.com>
Date: Fri, 18 Oct 2024 16:57:26 +0200
Subject: [PATCH] reformatted the code

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
---
 docling/backend/asciidoc_backend.py | 81 +++++++++++++++--------------
 docling/datamodel/base_models.py    |  2 +-
 tests/test_backend_asciidoc.py      | 20 +++----
 3 files changed, 51 insertions(+), 52 deletions(-)

diff --git a/docling/backend/asciidoc_backend.py b/docling/backend/asciidoc_backend.py
index 164a4465..8a6ca7df 100644
--- a/docling/backend/asciidoc_backend.py
+++ b/docling/backend/asciidoc_backend.py
@@ -1,6 +1,5 @@
-import re
-    
 import logging
+import re
 from io import BytesIO
 from pathlib import Path
 from typing import Set, Union
@@ -16,7 +15,8 @@ from docling_core.types.doc import (
 
 from docling.backend.abstract_backend import DeclarativeDocumentBackend
 from docling.datamodel.base_models import InputFormat
-#from docling.datamodel.document import InputDocument
+
+# from docling.datamodel.document import InputDocument
 
 _log = logging.getLogger(__name__)
 
@@ -29,7 +29,7 @@ class AsciidocBackend(DeclarativeDocumentBackend):
         self.path_or_stream = path_or_stream
 
         self.valid = True
-        
+
     def is_valid(self) -> bool:
         return self.valid
 
@@ -61,12 +61,12 @@ class AsciidocBackend(DeclarativeDocumentBackend):
         if len(fname) > 0:
             docname = Path(fname).stem
         else:
-            docname = "stream"            
-        
+            docname = "stream"
+
         doc = DoclingDocument(name=docname, origin=origin)
 
         doc = self.parse(doc)
-        
+
         return doc
 
     def parse(self, doc: DoclingDocument):
@@ -75,19 +75,19 @@ class AsciidocBackend(DeclarativeDocumentBackend):
         title, section headers, text, lists, and tables.
         """
 
-        content=""
+        content = ""
         with open(self.path_or_stream, "r") as fr:
             self.lines = fr.readlines()
-        
-        #self.lines = file_content.splitlines()
-        
+
+        # self.lines = file_content.splitlines()
+
         in_list = False
         in_table = False
         table_data = []
-        
+
         for line in self.lines:
             line = line.strip()
-            
+
             # Title
             if self.is_title(line):
                 item = self.parse_title(line)
@@ -105,41 +105,41 @@ class AsciidocBackend(DeclarativeDocumentBackend):
 
                 item = self.parse_list_item(line)
                 doc.add_list_item(item["text"])
-            
+
             elif in_list and not self.is_list_item(line):
                 in_list = False
-            
+
             # Tables
             elif self.is_table_line(line):
-                in_table = True                
+                in_table = True
                 table_data.append(self.parse_table_line(line))
-            
+
             elif in_table and not self.is_table_line(line):
-                
+
                 data = self.populate_table_as_grid(table_data)
                 doc.add_table(data=data)
-                
+
                 in_table = False
                 table_data = []
-            
+
             # Plain text
             elif line:
                 item = self.parse_text(line)
                 doc.add_text(text=item["text"], label="text")
 
-        if in_table and len(table_data)>0:
+        if in_table and len(table_data) > 0:
             data = self.populate_table_as_grid(table_data)
             doc.add_table(data=data)
-            
+
             in_table = False
             table_data = []
-                
+
         return doc
 
     # Title
     def is_title(self, line):
         return re.match(r"^= ", line)
-    
+
     def parse_title(self, line):
         return {"type": "title", "text": line[2:].strip()}
 
@@ -148,9 +148,13 @@ class AsciidocBackend(DeclarativeDocumentBackend):
         return re.match(r"^==+", line)
 
     def parse_section_header(self, line):
-        header_level = line.count('=')  # number of '=' represents level
-        return {"type": "header", "level": header_level, "text": line[header_level:].strip()}
-    
+        header_level = line.count("=")  # number of '=' represents level
+        return {
+            "type": "header",
+            "level": header_level,
+            "text": line[header_level:].strip(),
+        }
+
     # Lists
     def is_list_item(self, line):
         return re.match(r"^(\*|-|\d+\.|\w+\.) ", line)
@@ -164,24 +168,24 @@ class AsciidocBackend(DeclarativeDocumentBackend):
 
     def parse_table_line(self, line):
         # Split table cells and trim extra spaces
-        return [cell.strip() for cell in line.split('|') if cell.strip()]
+        return [cell.strip() for cell in line.split("|") if cell.strip()]
 
     def populate_table_as_grid(self, table_data):
 
         num_rows = len(table_data)
-        
+
         # Adjust the table data into a grid format
         num_cols = max(len(row) for row in table_data)
 
-        data = TableData(num_rows=num_rows, num_cols=num_cols, table_cells=[])        
-        for row_idx,row in enumerate(table_data):
+        data = TableData(num_rows=num_rows, num_cols=num_cols, table_cells=[])
+        for row_idx, row in enumerate(table_data):
             # Pad rows with empty strings to match column count
-            #grid.append(row + [''] * (max_cols - len(row)))
+            # grid.append(row + [''] * (max_cols - len(row)))
 
-            for col_idx,text in enumerate(row):
+            for col_idx, text in enumerate(row):
                 row_span = 1
                 col_span = 1
-                
+
                 cell = TableCell(
                     text=text,
                     row_span=row_span,
@@ -191,11 +195,12 @@ class AsciidocBackend(DeclarativeDocumentBackend):
                     start_col_offset_idx=col_idx,
                     end_col_offset_idx=col_idx + col_span,
                     col_header=False,
-                    row_header=False)
-                data.table_cells.append(cell)                
-            
+                    row_header=False,
+                )
+                data.table_cells.append(cell)
+
         return data
-    
+
     # Plain text
     def parse_text(self, line):
         return {"type": "text", "text": line}
diff --git a/docling/datamodel/base_models.py b/docling/datamodel/base_models.py
index 0a424351..53f8ddef 100644
--- a/docling/datamodel/base_models.py
+++ b/docling/datamodel/base_models.py
@@ -46,7 +46,7 @@ FormatToExtensions: Dict[InputFormat, List[str]] = {
     InputFormat.PDF: ["pdf"],
     InputFormat.HTML: ["html", "htm", "xhtml"],
     InputFormat.IMAGE: ["jpg", "jpeg", "png", "tif", "tiff", "bmp"],
-    InputFormat.ASCIIDOC: ["adoc", ".asciidoc", "asc"],    
+    InputFormat.ASCIIDOC: ["adoc", ".asciidoc", "asc"],
 }
 
 FormatToMimeType: Dict[InputFormat, Set[str]] = {
diff --git a/tests/test_backend_asciidoc.py b/tests/test_backend_asciidoc.py
index eaad403a..5c159125 100644
--- a/tests/test_backend_asciidoc.py
+++ b/tests/test_backend_asciidoc.py
@@ -1,14 +1,11 @@
 import glob
 import os
-
 from pathlib import Path
 
 import pytest
 from docling_core.types.doc import BoundingBox
 
-from docling.backend.asciidoc_backend import (
-    AsciidocBackend,
-)
+from docling.backend.asciidoc_backend import AsciidocBackend
 from docling.datamodel.base_models import InputFormat
 from docling.datamodel.document import InputDocument
 
@@ -25,15 +22,15 @@ def _get_backend(fname):
 
 
 def test_asciidocs_examples():
-    
+
     fnames = sorted(glob.glob("./tests/data/*.asciidoc"))
-    
+
     for fname in fnames:
         print(f"reading {fname}")
 
         bname = os.path.basename(fname)
-        gname = os.path.join("./tests/data/groundtruth/docling_v2/", bname+".md")
-        
+        gname = os.path.join("./tests/data/groundtruth/docling_v2/", bname + ".md")
+
         doc_backend = _get_backend(Path(fname))
         doc = doc_backend.convert()
 
@@ -43,12 +40,9 @@ def test_asciidocs_examples():
             with open(gname, "r") as fr:
                 true_mddoc = fr.read()
 
-            assert pred_mddoc==true_mddoc, "pred_mddoc!=true_mddoc for asciidoc"
-        else:            
+            assert pred_mddoc == true_mddoc, "pred_mddoc!=true_mddoc for asciidoc"
+        else:
             with open(gname, "w") as fw:
                 fw.write(pred_mddoc)
 
             print("\n\n", doc.export_to_markdown())
-
-            
-