mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 04:24:45 +00:00
feat: add textbox content extraction in msword_backend
Signed-off-by: Andrew <tsai247365@gmail.com>
This commit is contained in:
parent
b09fd45a46
commit
498fc79392
@ -359,15 +359,17 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _get_format_from_run(cls, run: Run) -> Optional[Formatting]:
|
def _get_format_from_run(cls, run: Run) -> Optional[Formatting]:
|
||||||
has_any_formatting = run.bold or run.italic or run.underline
|
# The .bold and .italic properties are booleans, but .underline can be an enum
|
||||||
return (
|
# like WD_UNDERLINE.THICK (value 6), so we need to convert it to a boolean
|
||||||
Formatting(
|
has_bold = run.bold or False
|
||||||
bold=run.bold or False,
|
has_italic = run.italic or False
|
||||||
italic=run.italic or False,
|
# Convert any non-None underline value to True
|
||||||
underline=run.underline or False,
|
has_underline = bool(run.underline is not None and run.underline)
|
||||||
)
|
|
||||||
if has_any_formatting
|
return Formatting(
|
||||||
else None
|
bold=has_bold,
|
||||||
|
italic=has_italic,
|
||||||
|
underline=has_underline,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _get_paragraph_elements(self, paragraph: Paragraph):
|
def _get_paragraph_elements(self, paragraph: Paragraph):
|
||||||
|
Loading…
Reference in New Issue
Block a user