test: mark flaky test (#1698)

* test: cleanse Word test file

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>

* mark textbox file test as flaky

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>

* fix path usage

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>

---------

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>
This commit is contained in:
Panos Vagenas
2025-06-03 13:13:44 +02:00
committed by GitHub
parent cfdf4cea25
commit 61d0d6c755
5 changed files with 190 additions and 139 deletions

View File

@@ -1,5 +1,7 @@
from pathlib import Path
import pytest
from docling.backend.msword_backend import MsWordDocumentBackend
from docling.datamodel.base_models import InputFormat
from docling.datamodel.document import (
@@ -16,6 +18,7 @@ from .verify_utils import verify_document, verify_export
GENERATE = GEN_TEST_DATA
@pytest.mark.xfail(strict=False)
def test_textbox_extraction():
in_path = Path("tests/data/docx/textbox.docx")
in_doc = InputDocument(
@@ -77,8 +80,7 @@ def get_converter():
return converter
def test_e2e_docx_conversions():
docx_paths = get_docx_paths()
def _test_e2e_docx_conversions_impl(docx_paths: list[Path]):
converter = get_converter()
for docx_path in docx_paths:
@@ -115,3 +117,17 @@ def test_e2e_docx_conversions():
gtfile=str(gt_path) + ".html",
generate=GENERATE,
), "export to html"
flaky_path = Path("tests/data/docx/textbox.docx")
def test_e2e_docx_conversions():
_test_e2e_docx_conversions_impl(
docx_paths=[path for path in get_docx_paths() if path != flaky_path]
)
@pytest.mark.xfail(strict=False)
def test_textbox_conversion():
_test_e2e_docx_conversions_impl(docx_paths=[flaky_path])