fixed the mypy

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
2025-08-02 07:22:14 +00:00 · 2024-10-22 09:44:27 +02:00 · 2024-10-22 09:44:27 +02:00 · bb3db07836
commit bb3db07836
parent b04f14ec24
2 changed files with 150 additions and 105 deletions
--- a/docling/backend/asciidoc_backend.py
+++ b/docling/backend/asciidoc_backend.py
@ -4,20 +4,20 @@ from io import BytesIO
 from pathlib import Path
 from typing import Set, Union
 from pydantic import (
    AnyUrl,
 )
 from docling_core.types.doc import (
-    Size,    
+    DocItem,
    DocItemLabel,
    DoclingDocument,
    DocumentOrigin,
    GroupItem,
    GroupLabel,
    ImageRef,
    NodeItem,
    Size,
    TableCell,
    TableData,
    ImageRef,
 )
 from pydantic import AnyUrl
 from docling.backend.abstract_backend import DeclarativeDocumentBackend
 from docling.datamodel.base_models import InputFormat
@ -90,12 +90,14 @@ class AsciidocBackend(DeclarativeDocumentBackend):
        in_list = False
        in_table = False
-        text_data = []
+        text_data: list[str] = []
-        table_data = []
+        table_data: list[str] = []
-        caption_data = []
+        caption_data: list[str] = []
-        parents = {}
+        # parents: dict[int, Union[DocItem, GroupItem, None]] = {}
-        indents = {}
+        parents: dict[int, Union[GroupItem, None]] = {}
        # indents: dict[int, Union[DocItem, GroupItem, None]] = {}
        indents: dict[int, Union[GroupItem, None]] = {}
        for i in range(0, 10):
            parents[i] = None
@ -109,14 +111,18 @@ class AsciidocBackend(DeclarativeDocumentBackend):
                item = self.parse_title(line)
                level = item["level"]
-                parents[level] = doc.add_text(text=item["text"], label=DocItemLabel.TITLE)
+                parents[level] = doc.add_text(
                    text=item["text"], label=DocItemLabel.TITLE
                )
            # Section headers
            elif self.is_section_header(line):
                item = self.parse_section_header(line)
                level = item["level"]
-                parents[level] = doc.add_heading(text=item["text"], level=item["level"], parent=parents[level-1])
+                parents[level] = doc.add_heading(
                    text=item["text"], level=item["level"], parent=parents[level - 1]
                )
                for k, v in parents.items():
                    if k > level:
                        parents[k] = None
@ -169,16 +175,22 @@ class AsciidocBackend(DeclarativeDocumentBackend):
                in_table = True
                table_data.append(self.parse_table_line(line))
-            elif in_table and ((not self.is_table_line(line)) or line.strip()=="|==="): # end of table
+            elif in_table and (
                (not self.is_table_line(line)) or line.strip() == "|==="
            ):  # end of table
                caption = None
                if len(caption_data) > 0:
-                    caption = doc.add_text(text=" ".join(caption_data), label=DocItemLabel.CAPTION)
+                    caption = doc.add_text(
                        text=" ".join(caption_data), label=DocItemLabel.CAPTION
                    )
                caption_data = []
                data = self.populate_table_as_grid(table_data)
-                doc.add_table(data=data, parent=self.get_current_parent(parents), caption=caption)
+                doc.add_table(
                    data=data, parent=self.get_current_parent(parents), caption=caption
                )
                in_table = False
                table_data = []
@ -188,7 +200,9 @@ class AsciidocBackend(DeclarativeDocumentBackend):
                caption = None
                if len(caption_data) > 0:
-                    caption = doc.add_text(text=" ".join(caption_data), label=DocItemLabel.CAPTION)
+                    caption = doc.add_text(
                        text=" ".join(caption_data), label=DocItemLabel.CAPTION
                    )
                caption_data = []
@ -200,9 +214,17 @@ class AsciidocBackend(DeclarativeDocumentBackend):
                    size = Size(width=int(item["width"]), height=int(item["height"]))
                uri = None
-                if "uri" in item and not item["uri"].startswith("http") and item["uri"].startswith("//"):
+                if (
                    "uri" in item
                    and not item["uri"].startswith("http")
                    and item["uri"].startswith("//")
                ):
                    uri = "file:" + item["uri"]
-                elif "uri" in item and not item["uri"].startswith("http") and item["uri"].startswith("/"):
+                elif (
                    "uri" in item
                    and not item["uri"].startswith("http")
                    and item["uri"].startswith("/")
                ):
                    uri = "file:/" + item["uri"]
                elif "uri" in item and not item["uri"].startswith("http"):
                    uri = "file://" + item["uri"]
@ -215,14 +237,19 @@ class AsciidocBackend(DeclarativeDocumentBackend):
                item = self.parse_caption(line)
                caption_data.append(item["text"])
-            elif len(line.strip())>0 and len(caption_data)>0: # allow multiline captions
+            elif (
                len(line.strip()) > 0 and len(caption_data) > 0
            ):  # allow multiline captions
                item = self.parse_text(line)
                caption_data.append(item["text"])
            # Plain text
            elif len(line.strip()) == 0 and len(text_data) > 0:
-                doc.add_text(text=" ".join(text_data), label=DocItemLabel.PARAGRAPH,
+                doc.add_text(
-                             parent=self.get_current_parent(parents))
+                    text=" ".join(text_data),
                    label=DocItemLabel.PARAGRAPH,
                    parent=self.get_current_parent(parents),
                )
                text_data = []
            elif len(line.strip()) > 0:  # allow multiline texts
@ -231,8 +258,11 @@ class AsciidocBackend(DeclarativeDocumentBackend):
                text_data.append(item["text"])
        if len(text_data) > 0:
-            doc.add_text(text=" ".join(text_data), label=DocItemLabel.PARAGRAPH,
+            doc.add_text(
-                         parent=self.get_current_parent(parents))
+                text=" ".join(text_data),
                label=DocItemLabel.PARAGRAPH,
                parent=self.get_current_parent(parents),
            )
            text_data = []
        if in_table and len(table_data) > 0:
@ -296,15 +326,30 @@ class AsciidocBackend(DeclarativeDocumentBackend):
            text = match.group(3)  # The actual text of the list item
            if marker == "*" or marker == "-":
-                return {"type": "list_item", "marker": marker, "text": text.strip(),
+                return {
-                        "numbered": False, "indent": 0 if indent==None else len(indent)}
+                    "type": "list_item",
                    "marker": marker,
                    "text": text.strip(),
                    "numbered": False,
                    "indent": 0 if indent == None else len(indent),
                }
            else:
-                return {"type": "list_item", "marker": marker, "text": text.strip(),
+                return {
-                        "numbered": True, "indent": 0 if indent==None else len(indent)}
+                    "type": "list_item",
                    "marker": marker,
                    "text": text.strip(),
                    "numbered": True,
                    "indent": 0 if indent == None else len(indent),
                }
        else:
            # Fallback if no match
-            return {"type": "list_item", "marker": item_marker, "text": line,
+            return {
-                    "numbered": False, "indent": 0}
+                "type": "list_item",
                "marker": item_marker,
                "text": line,
                "numbered": False,
                "indent": 0,
            }
    #   =========   Tables
    def is_table_line(self, line):
@ -357,14 +402,14 @@ class AsciidocBackend(DeclarativeDocumentBackend):
        mtch = re.match(r"^image::(.+)\[(.*)\]$", line)
        if mtch:
            picture_path = mtch.group(1).strip()
-            attributes = mtch.group(2).split(',')
+            attributes = mtch.group(2).split(",")
            picture_info = {"type": "picture", "uri": picture_path}
            # Extract optional attributes (alt text, width, height, alignment)
            if attributes:
                picture_info["alt"] = attributes[0].strip() if attributes[0] else ""
                for attr in attributes[1:]:
-                    key, value = attr.split('=')
+                    key, value = attr.split("=")
                    picture_info[key.strip()] = value.strip()
            return picture_info