Added HTML backend implementation, few improvements for other backends

Signed-off-by: Maxim Lysak <mly@zurich.ibm.com>
This commit is contained in:
Maxim Lysak
2024-10-08 11:14:44 +02:00
parent f773d8a621
commit 89e58ca730
6 changed files with 420 additions and 34 deletions

View File

@@ -23,11 +23,11 @@ _log = logging.getLogger(__name__)
USE_EXPERIMENTAL = False
input_paths = [
# Path("tests/data/wiki_duck.html"),
Path("tests/data/wiki_duck.html"),
Path("tests/data/word_sample.docx"),
Path("tests/data/lorem_ipsum.docx"),
Path("tests/data/powerpoint_sample.pptx"),
# Path("tests/data/2206.01062.pdf"),
Path("tests/data/2206.01062.pdf"),
]
input = DocumentConversionInput.from_paths(input_paths)