mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-02 15:32:30 +00:00
chore: add type hinting to docx backend
Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>
This commit is contained in:
parent
40145b59b3
commit
e0f89029db
@ -2,23 +2,28 @@ import logging
|
|||||||
import re
|
import re
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional, Set, Union
|
from typing import Any, Optional, Union
|
||||||
|
|
||||||
import docx
|
|
||||||
from docling_core.types.doc import (
|
from docling_core.types.doc import (
|
||||||
DocItemLabel,
|
DocItemLabel,
|
||||||
DoclingDocument,
|
DoclingDocument,
|
||||||
DocumentOrigin,
|
DocumentOrigin,
|
||||||
GroupLabel,
|
GroupLabel,
|
||||||
ImageRef,
|
ImageRef,
|
||||||
|
NodeItem,
|
||||||
TableCell,
|
TableCell,
|
||||||
TableData,
|
TableData,
|
||||||
)
|
)
|
||||||
|
from docx import Document
|
||||||
|
from docx.document import Document as DocxDocument
|
||||||
from docx.oxml.table import CT_Tc
|
from docx.oxml.table import CT_Tc
|
||||||
|
from docx.oxml.xmlchemy import BaseOxmlElement
|
||||||
from docx.table import Table, _Cell
|
from docx.table import Table, _Cell
|
||||||
|
from docx.text.paragraph import Paragraph
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from lxml.etree import XPath
|
from lxml.etree import XPath
|
||||||
from PIL import Image, UnidentifiedImageError
|
from PIL import Image, UnidentifiedImageError
|
||||||
|
from typing_extensions import override
|
||||||
|
|
||||||
from docling.backend.abstract_backend import DeclarativeDocumentBackend
|
from docling.backend.abstract_backend import DeclarativeDocumentBackend
|
||||||
from docling.datamodel.base_models import InputFormat
|
from docling.datamodel.base_models import InputFormat
|
||||||
@ -28,7 +33,10 @@ _log = logging.getLogger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
||||||
def __init__(self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path]):
|
@override
|
||||||
|
def __init__(
|
||||||
|
self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path]
|
||||||
|
) -> None:
|
||||||
super().__init__(in_doc, path_or_stream)
|
super().__init__(in_doc, path_or_stream)
|
||||||
self.XML_KEY = (
|
self.XML_KEY = (
|
||||||
"{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val"
|
"{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val"
|
||||||
@ -38,19 +46,19 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
}
|
}
|
||||||
# self.initialise(path_or_stream)
|
# self.initialise(path_or_stream)
|
||||||
# Word file:
|
# Word file:
|
||||||
self.path_or_stream = path_or_stream
|
self.path_or_stream: Union[BytesIO, Path] = path_or_stream
|
||||||
self.valid = False
|
self.valid: bool = False
|
||||||
# Initialise the parents for the hierarchy
|
# Initialise the parents for the hierarchy
|
||||||
self.max_levels = 10
|
self.max_levels: int = 10
|
||||||
self.level_at_new_list = None
|
self.level_at_new_list: Optional[int] = None
|
||||||
self.parents = {} # type: ignore
|
self.parents: dict[int, Optional[NodeItem]] = {}
|
||||||
for i in range(-1, self.max_levels):
|
for i in range(-1, self.max_levels):
|
||||||
self.parents[i] = None
|
self.parents[i] = None
|
||||||
|
|
||||||
self.level = 0
|
self.level = 0
|
||||||
self.listIter = 0
|
self.listIter = 0
|
||||||
|
|
||||||
self.history = {
|
self.history: dict[str, Any] = {
|
||||||
"names": [None],
|
"names": [None],
|
||||||
"levels": [None],
|
"levels": [None],
|
||||||
"numids": [None],
|
"numids": [None],
|
||||||
@ -60,9 +68,9 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
self.docx_obj = None
|
self.docx_obj = None
|
||||||
try:
|
try:
|
||||||
if isinstance(self.path_or_stream, BytesIO):
|
if isinstance(self.path_or_stream, BytesIO):
|
||||||
self.docx_obj = docx.Document(self.path_or_stream)
|
self.docx_obj = Document(self.path_or_stream)
|
||||||
elif isinstance(self.path_or_stream, Path):
|
elif isinstance(self.path_or_stream, Path):
|
||||||
self.docx_obj = docx.Document(str(self.path_or_stream))
|
self.docx_obj = Document(str(self.path_or_stream))
|
||||||
|
|
||||||
self.valid = True
|
self.valid = True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -70,13 +78,16 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
f"MsPowerpointDocumentBackend could not load document with hash {self.document_hash}"
|
f"MsPowerpointDocumentBackend could not load document with hash {self.document_hash}"
|
||||||
) from e
|
) from e
|
||||||
|
|
||||||
|
@override
|
||||||
def is_valid(self) -> bool:
|
def is_valid(self) -> bool:
|
||||||
return self.valid
|
return self.valid
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@override
|
||||||
def supports_pagination(cls) -> bool:
|
def supports_pagination(cls) -> bool:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@override
|
||||||
def unload(self):
|
def unload(self):
|
||||||
if isinstance(self.path_or_stream, BytesIO):
|
if isinstance(self.path_or_stream, BytesIO):
|
||||||
self.path_or_stream.close()
|
self.path_or_stream.close()
|
||||||
@ -84,11 +95,17 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
self.path_or_stream = None
|
self.path_or_stream = None
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def supported_formats(cls) -> Set[InputFormat]:
|
@override
|
||||||
|
def supported_formats(cls) -> set[InputFormat]:
|
||||||
return {InputFormat.DOCX}
|
return {InputFormat.DOCX}
|
||||||
|
|
||||||
|
@override
|
||||||
def convert(self) -> DoclingDocument:
|
def convert(self) -> DoclingDocument:
|
||||||
# Parses the DOCX into a structured document model.
|
"""Parses the DOCX into a structured document model.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The parsed document.
|
||||||
|
"""
|
||||||
|
|
||||||
origin = DocumentOrigin(
|
origin = DocumentOrigin(
|
||||||
filename=self.file.name or "file",
|
filename=self.file.name or "file",
|
||||||
@ -106,23 +123,29 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
f"Cannot convert doc with {self.document_hash} because the backend failed to init."
|
f"Cannot convert doc with {self.document_hash} because the backend failed to init."
|
||||||
)
|
)
|
||||||
|
|
||||||
def update_history(self, name, level, numid, ilevel):
|
def update_history(
|
||||||
|
self,
|
||||||
|
name: str,
|
||||||
|
level: Optional[int],
|
||||||
|
numid: Optional[int],
|
||||||
|
ilevel: Optional[int],
|
||||||
|
):
|
||||||
self.history["names"].append(name)
|
self.history["names"].append(name)
|
||||||
self.history["levels"].append(level)
|
self.history["levels"].append(level)
|
||||||
|
|
||||||
self.history["numids"].append(numid)
|
self.history["numids"].append(numid)
|
||||||
self.history["indents"].append(ilevel)
|
self.history["indents"].append(ilevel)
|
||||||
|
|
||||||
def prev_name(self):
|
def prev_name(self) -> Optional[str]:
|
||||||
return self.history["names"][-1]
|
return self.history["names"][-1]
|
||||||
|
|
||||||
def prev_level(self):
|
def prev_level(self) -> Optional[int]:
|
||||||
return self.history["levels"][-1]
|
return self.history["levels"][-1]
|
||||||
|
|
||||||
def prev_numid(self):
|
def prev_numid(self) -> Optional[int]:
|
||||||
return self.history["numids"][-1]
|
return self.history["numids"][-1]
|
||||||
|
|
||||||
def prev_indent(self):
|
def prev_indent(self) -> Optional[int]:
|
||||||
return self.history["indents"][-1]
|
return self.history["indents"][-1]
|
||||||
|
|
||||||
def get_level(self) -> int:
|
def get_level(self) -> int:
|
||||||
@ -132,7 +155,12 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
return k
|
return k
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
def walk_linear(self, body, docx_obj, doc) -> DoclingDocument:
|
def walk_linear(
|
||||||
|
self,
|
||||||
|
body: BaseOxmlElement,
|
||||||
|
docx_obj: DocxDocument,
|
||||||
|
doc: DoclingDocument,
|
||||||
|
) -> DoclingDocument:
|
||||||
for element in body:
|
for element in body:
|
||||||
tag_name = etree.QName(element).localname
|
tag_name = etree.QName(element).localname
|
||||||
# Check for Inline Images (blip elements)
|
# Check for Inline Images (blip elements)
|
||||||
@ -152,7 +180,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
_log.debug("could not parse a table, broken docx table")
|
_log.debug("could not parse a table, broken docx table")
|
||||||
|
|
||||||
elif drawing_blip:
|
elif drawing_blip:
|
||||||
self.handle_pictures(element, docx_obj, drawing_blip, doc)
|
self.handle_pictures(docx_obj, drawing_blip, doc)
|
||||||
# Check for the sdt containers, like table of contents
|
# Check for the sdt containers, like table of contents
|
||||||
elif tag_name in ["sdt"]:
|
elif tag_name in ["sdt"]:
|
||||||
sdt_content = element.find(".//w:sdtContent", namespaces=namespaces)
|
sdt_content = element.find(".//w:sdtContent", namespaces=namespaces)
|
||||||
@ -169,7 +197,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
_log.debug(f"Ignoring element in DOCX with tag: {tag_name}")
|
_log.debug(f"Ignoring element in DOCX with tag: {tag_name}")
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
def str_to_int(self, s, default=0):
|
def str_to_int(self, s: Optional[str], default: Optional[int] = 0) -> Optional[int]:
|
||||||
if s is None:
|
if s is None:
|
||||||
return None
|
return None
|
||||||
try:
|
try:
|
||||||
@ -177,7 +205,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
except ValueError:
|
except ValueError:
|
||||||
return default
|
return default
|
||||||
|
|
||||||
def split_text_and_number(self, input_string):
|
def split_text_and_number(self, input_string: str) -> list[str]:
|
||||||
match = re.match(r"(\D+)(\d+)$|^(\d+)(\D+)", input_string)
|
match = re.match(r"(\D+)(\d+)$|^(\d+)(\D+)", input_string)
|
||||||
if match:
|
if match:
|
||||||
parts = list(filter(None, match.groups()))
|
parts = list(filter(None, match.groups()))
|
||||||
@ -185,7 +213,9 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
else:
|
else:
|
||||||
return [input_string]
|
return [input_string]
|
||||||
|
|
||||||
def get_numId_and_ilvl(self, paragraph):
|
def get_numId_and_ilvl(
|
||||||
|
self, paragraph: Paragraph
|
||||||
|
) -> tuple[Optional[int], Optional[int]]:
|
||||||
# Access the XML element of the paragraph
|
# Access the XML element of the paragraph
|
||||||
numPr = paragraph._element.find(
|
numPr = paragraph._element.find(
|
||||||
".//w:numPr", namespaces=paragraph._element.nsmap
|
".//w:numPr", namespaces=paragraph._element.nsmap
|
||||||
@ -198,13 +228,11 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
numId = numId_elem.get(self.XML_KEY) if numId_elem is not None else None
|
numId = numId_elem.get(self.XML_KEY) if numId_elem is not None else None
|
||||||
ilvl = ilvl_elem.get(self.XML_KEY) if ilvl_elem is not None else None
|
ilvl = ilvl_elem.get(self.XML_KEY) if ilvl_elem is not None else None
|
||||||
|
|
||||||
return self.str_to_int(numId, default=None), self.str_to_int(
|
return self.str_to_int(numId, None), self.str_to_int(ilvl, None)
|
||||||
ilvl, default=None
|
|
||||||
)
|
|
||||||
|
|
||||||
return None, None # If the paragraph is not part of a list
|
return None, None # If the paragraph is not part of a list
|
||||||
|
|
||||||
def get_label_and_level(self, paragraph):
|
def get_label_and_level(self, paragraph: Paragraph) -> tuple[str, Optional[int]]:
|
||||||
if paragraph.style is None:
|
if paragraph.style is None:
|
||||||
return "Normal", None
|
return "Normal", None
|
||||||
label = paragraph.style.style_id
|
label = paragraph.style.style_id
|
||||||
@ -220,20 +248,25 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
|
|
||||||
if "Heading" in label and len(parts) == 2:
|
if "Heading" in label and len(parts) == 2:
|
||||||
parts.sort()
|
parts.sort()
|
||||||
label_str = ""
|
label_str: str = ""
|
||||||
label_level = 0
|
label_level: Optional[int] = 0
|
||||||
if parts[0] == "Heading":
|
if parts[0] == "Heading":
|
||||||
label_str = parts[0]
|
label_str = parts[0]
|
||||||
label_level = self.str_to_int(parts[1], default=None)
|
label_level = self.str_to_int(parts[1], None)
|
||||||
if parts[1] == "Heading":
|
if parts[1] == "Heading":
|
||||||
label_str = parts[1]
|
label_str = parts[1]
|
||||||
label_level = self.str_to_int(parts[0], default=None)
|
label_level = self.str_to_int(parts[0], None)
|
||||||
return label_str, label_level
|
return label_str, label_level
|
||||||
else:
|
else:
|
||||||
return label, None
|
return label, None
|
||||||
|
|
||||||
def handle_text_elements(self, element, docx_obj, doc):
|
def handle_text_elements(
|
||||||
paragraph = docx.text.paragraph.Paragraph(element, docx_obj)
|
self,
|
||||||
|
element: BaseOxmlElement,
|
||||||
|
docx_obj: DocxDocument,
|
||||||
|
doc: DoclingDocument,
|
||||||
|
) -> None:
|
||||||
|
paragraph = Paragraph(element, docx_obj)
|
||||||
|
|
||||||
if paragraph.text is None:
|
if paragraph.text is None:
|
||||||
return
|
return
|
||||||
@ -257,11 +290,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
and p_style_id not in ["Title", "Heading"]
|
and p_style_id not in ["Title", "Heading"]
|
||||||
):
|
):
|
||||||
self.add_listitem(
|
self.add_listitem(
|
||||||
element,
|
|
||||||
docx_obj,
|
|
||||||
doc,
|
doc,
|
||||||
p_style_id,
|
|
||||||
p_level,
|
|
||||||
numid,
|
numid,
|
||||||
ilevel,
|
ilevel,
|
||||||
text,
|
text,
|
||||||
@ -286,13 +315,13 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
self.level = 0
|
self.level = 0
|
||||||
|
|
||||||
if p_style_id in ["Title"]:
|
if p_style_id in ["Title"]:
|
||||||
for key, val in self.parents.items():
|
for key in range(len(self.parents)):
|
||||||
self.parents[key] = None
|
self.parents[key] = None
|
||||||
self.parents[0] = doc.add_text(
|
self.parents[0] = doc.add_text(
|
||||||
parent=None, label=DocItemLabel.TITLE, text=text
|
parent=None, label=DocItemLabel.TITLE, text=text
|
||||||
)
|
)
|
||||||
elif "Heading" in p_style_id:
|
elif "Heading" in p_style_id:
|
||||||
self.add_header(element, docx_obj, doc, p_style_id, p_level, text)
|
self.add_header(doc, p_level, text)
|
||||||
|
|
||||||
elif p_style_id in [
|
elif p_style_id in [
|
||||||
"Paragraph",
|
"Paragraph",
|
||||||
@ -320,7 +349,9 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
self.update_history(p_style_id, p_level, numid, ilevel)
|
self.update_history(p_style_id, p_level, numid, ilevel)
|
||||||
return
|
return
|
||||||
|
|
||||||
def add_header(self, element, docx_obj, doc, curr_name, curr_level, text: str):
|
def add_header(
|
||||||
|
self, doc: DoclingDocument, curr_level: Optional[int], text: str
|
||||||
|
) -> None:
|
||||||
level = self.get_level()
|
level = self.get_level()
|
||||||
if isinstance(curr_level, int):
|
if isinstance(curr_level, int):
|
||||||
if curr_level > level:
|
if curr_level > level:
|
||||||
@ -333,7 +364,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
)
|
)
|
||||||
elif curr_level < level:
|
elif curr_level < level:
|
||||||
# remove the tail
|
# remove the tail
|
||||||
for key, val in self.parents.items():
|
for key in range(len(self.parents)):
|
||||||
if key >= curr_level:
|
if key >= curr_level:
|
||||||
self.parents[key] = None
|
self.parents[key] = None
|
||||||
|
|
||||||
@ -352,22 +383,18 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
|
|
||||||
def add_listitem(
|
def add_listitem(
|
||||||
self,
|
self,
|
||||||
element,
|
doc: DoclingDocument,
|
||||||
docx_obj,
|
numid: int,
|
||||||
doc,
|
ilevel: int,
|
||||||
p_style_id,
|
|
||||||
p_level,
|
|
||||||
numid,
|
|
||||||
ilevel,
|
|
||||||
text: str,
|
text: str,
|
||||||
is_numbered=False,
|
is_numbered: bool = False,
|
||||||
):
|
) -> None:
|
||||||
# is_numbered = is_numbered
|
|
||||||
enum_marker = ""
|
enum_marker = ""
|
||||||
|
|
||||||
level = self.get_level()
|
level = self.get_level()
|
||||||
|
prev_indent = self.prev_indent()
|
||||||
if self.prev_numid() is None: # Open new list
|
if self.prev_numid() is None: # Open new list
|
||||||
self.level_at_new_list = level # type: ignore
|
self.level_at_new_list = level
|
||||||
|
|
||||||
self.parents[level] = doc.add_group(
|
self.parents[level] = doc.add_group(
|
||||||
label=GroupLabel.LIST, name="list", parent=self.parents[level - 1]
|
label=GroupLabel.LIST, name="list", parent=self.parents[level - 1]
|
||||||
@ -386,10 +413,13 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
)
|
)
|
||||||
|
|
||||||
elif (
|
elif (
|
||||||
self.prev_numid() == numid and self.prev_indent() < ilevel
|
self.prev_numid() == numid
|
||||||
|
and self.level_at_new_list is not None
|
||||||
|
and prev_indent is not None
|
||||||
|
and prev_indent < ilevel
|
||||||
): # Open indented list
|
): # Open indented list
|
||||||
for i in range(
|
for i in range(
|
||||||
self.level_at_new_list + self.prev_indent() + 1,
|
self.level_at_new_list + prev_indent + 1,
|
||||||
self.level_at_new_list + ilevel + 1,
|
self.level_at_new_list + ilevel + 1,
|
||||||
):
|
):
|
||||||
# Determine if this is an unordered list or an ordered list.
|
# Determine if this is an unordered list or an ordered list.
|
||||||
@ -418,7 +448,12 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
text=text,
|
text=text,
|
||||||
)
|
)
|
||||||
|
|
||||||
elif self.prev_numid() == numid and ilevel < self.prev_indent(): # Close list
|
elif (
|
||||||
|
self.prev_numid() == numid
|
||||||
|
and self.level_at_new_list is not None
|
||||||
|
and prev_indent is not None
|
||||||
|
and ilevel < prev_indent
|
||||||
|
): # Close list
|
||||||
for k, v in self.parents.items():
|
for k, v in self.parents.items():
|
||||||
if k > self.level_at_new_list + ilevel:
|
if k > self.level_at_new_list + ilevel:
|
||||||
self.parents[k] = None
|
self.parents[k] = None
|
||||||
@ -436,7 +471,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
)
|
)
|
||||||
self.listIter = 0
|
self.listIter = 0
|
||||||
|
|
||||||
elif self.prev_numid() == numid or self.prev_indent() == ilevel:
|
elif self.prev_numid() == numid or prev_indent == ilevel:
|
||||||
# TODO: Set marker and enumerated arguments if this is an enumeration element.
|
# TODO: Set marker and enumerated arguments if this is an enumeration element.
|
||||||
self.listIter += 1
|
self.listIter += 1
|
||||||
if is_numbered:
|
if is_numbered:
|
||||||
@ -450,7 +485,12 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
def handle_tables(self, element, docx_obj, doc):
|
def handle_tables(
|
||||||
|
self,
|
||||||
|
element: BaseOxmlElement,
|
||||||
|
docx_obj: DocxDocument,
|
||||||
|
doc: DoclingDocument,
|
||||||
|
) -> None:
|
||||||
table: Table = Table(element, docx_obj)
|
table: Table = Table(element, docx_obj)
|
||||||
num_rows = len(table.rows)
|
num_rows = len(table.rows)
|
||||||
num_cols = len(table.columns)
|
num_cols = len(table.columns)
|
||||||
@ -509,8 +549,10 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
doc.add_table(data=data, parent=self.parents[level - 1])
|
doc.add_table(data=data, parent=self.parents[level - 1])
|
||||||
return
|
return
|
||||||
|
|
||||||
def handle_pictures(self, element, docx_obj, drawing_blip, doc):
|
def handle_pictures(
|
||||||
def get_docx_image(element, drawing_blip):
|
self, docx_obj: DocxDocument, drawing_blip: Any, doc: DoclingDocument
|
||||||
|
) -> None:
|
||||||
|
def get_docx_image(drawing_blip):
|
||||||
rId = drawing_blip[0].get(
|
rId = drawing_blip[0].get(
|
||||||
"{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed"
|
"{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed"
|
||||||
)
|
)
|
||||||
@ -523,7 +565,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
level = self.get_level()
|
level = self.get_level()
|
||||||
# Open the BytesIO object with PIL to create an Image
|
# Open the BytesIO object with PIL to create an Image
|
||||||
try:
|
try:
|
||||||
image_data = get_docx_image(element, drawing_blip)
|
image_data = get_docx_image(drawing_blip)
|
||||||
image_bytes = BytesIO(image_data)
|
image_bytes = BytesIO(image_data)
|
||||||
pil_image = Image.open(image_bytes)
|
pil_image = Image.open(image_bytes)
|
||||||
doc.add_picture(
|
doc.add_picture(
|
||||||
|
Loading…
Reference in New Issue
Block a user