diff --git a/tests/data/docx/textbox.docx b/tests/data/docx/textbox.docx new file mode 100644 index 00000000..8945f25e Binary files /dev/null and b/tests/data/docx/textbox.docx differ diff --git a/tests/test_backend_msword.py b/tests/test_backend_msword.py index c50e0718..3c1500ef 100644 --- a/tests/test_backend_msword.py +++ b/tests/test_backend_msword.py @@ -16,6 +16,27 @@ from .verify_utils import verify_document, verify_export GENERATE = GEN_TEST_DATA +def test_textbox_extraction(): + in_path = Path("tests/data/docx/textbox.docx") + in_doc = InputDocument( + path_or_stream=in_path, + format=InputFormat.DOCX, + backend=MsWordDocumentBackend, + ) + backend = MsWordDocumentBackend( + in_doc=in_doc, + path_or_stream=in_path, + ) + doc = backend.convert() + + # Verify if a particular textbox content is extracted + textbox_found = False + for item, _ in doc.iterate_items(): + if item.text[:30] == """Suggested Reportable Symptoms:""": + textbox_found = True + assert textbox_found + + def test_heading_levels(): in_path = Path("tests/data/docx/word_sample.docx") in_doc = InputDocument(