feat: add textbox content extraction in msword_backend

Signed-off-by: Andrew <tsai247365@gmail.com>
This commit is contained in:
Andrew 2025-05-13 17:11:43 +08:00
parent b09fd45a46
commit 498fc79392

View File

@ -359,15 +359,17 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
@classmethod @classmethod
def _get_format_from_run(cls, run: Run) -> Optional[Formatting]: def _get_format_from_run(cls, run: Run) -> Optional[Formatting]:
has_any_formatting = run.bold or run.italic or run.underline # The .bold and .italic properties are booleans, but .underline can be an enum
return ( # like WD_UNDERLINE.THICK (value 6), so we need to convert it to a boolean
Formatting( has_bold = run.bold or False
bold=run.bold or False, has_italic = run.italic or False
italic=run.italic or False, # Convert any non-None underline value to True
underline=run.underline or False, has_underline = bool(run.underline is not None and run.underline)
)
if has_any_formatting return Formatting(
else None bold=has_bold,
italic=has_italic,
underline=has_underline,
) )
def _get_paragraph_elements(self, paragraph: Paragraph): def _get_paragraph_elements(self, paragraph: Paragraph):