mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 12:48:28 +00:00
ci: add coverage and ruff (#1383)
* add coverage calculation and push Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * new codecov version and usage of token Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * enable ruff formatter instead of black and isort Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * apply ruff lint fixes Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * apply ruff unsafe fixes Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add removed imports Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * runs 1 on linter issues Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * finalize linter fixes Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * Update pyproject.toml Co-authored-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com> Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> --------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> Co-authored-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>
This commit is contained in:
@@ -1,4 +1,3 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from docling.backend.msword_backend import MsWordDocumentBackend
|
||||
@@ -43,7 +42,6 @@ def test_heading_levels():
|
||||
|
||||
|
||||
def get_docx_paths():
|
||||
|
||||
# Define the directory you want to search
|
||||
directory = Path("./tests/data/docx/")
|
||||
|
||||
@@ -53,14 +51,12 @@ def get_docx_paths():
|
||||
|
||||
|
||||
def get_converter():
|
||||
|
||||
converter = DocumentConverter(allowed_formats=[InputFormat.DOCX])
|
||||
|
||||
return converter
|
||||
|
||||
|
||||
def test_e2e_docx_conversions():
|
||||
|
||||
docx_paths = get_docx_paths()
|
||||
converter = get_converter()
|
||||
|
||||
@@ -76,20 +72,20 @@ def test_e2e_docx_conversions():
|
||||
doc: DoclingDocument = conv_result.document
|
||||
|
||||
pred_md: str = doc.export_to_markdown()
|
||||
assert verify_export(
|
||||
pred_md, str(gt_path) + ".md", generate=GENERATE
|
||||
), "export to md"
|
||||
assert verify_export(pred_md, str(gt_path) + ".md", generate=GENERATE), (
|
||||
"export to md"
|
||||
)
|
||||
|
||||
pred_itxt: str = doc._export_to_indented_text(
|
||||
max_text_len=70, explicit_tables=False
|
||||
)
|
||||
assert verify_export(
|
||||
pred_itxt, str(gt_path) + ".itxt", generate=GENERATE
|
||||
), "export to indented-text"
|
||||
assert verify_export(pred_itxt, str(gt_path) + ".itxt", generate=GENERATE), (
|
||||
"export to indented-text"
|
||||
)
|
||||
|
||||
assert verify_document(
|
||||
doc, str(gt_path) + ".json", generate=GENERATE
|
||||
), "document document"
|
||||
assert verify_document(doc, str(gt_path) + ".json", generate=GENERATE), (
|
||||
"document document"
|
||||
)
|
||||
|
||||
if docx_path.name == "word_tables.docx":
|
||||
pred_html: str = doc.export_to_html()
|
||||
|
||||
Reference in New Issue
Block a user