mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 20:58:11 +00:00
test: avoid testing exact JSON (#1027)
* test: avoid testing exact JSON Avoid testing exact JSON output in html and xml backends. Reuse the JSON verify helper function among backend test files. Improve type annotations in html backend. Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com> * Update tests/test_backend_patent_uspto.py Co-authored-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com> --------- Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com> Co-authored-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
6796f0a132
commit
1ac010354f
@@ -1,6 +1,5 @@
|
||||
"""Test methods in module docling.backend.patent_uspto_backend.py."""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
@@ -14,6 +13,8 @@ from docling.backend.xml.uspto_backend import PatentUsptoDocumentBackend, XmlTab
|
||||
from docling.datamodel.base_models import InputFormat
|
||||
from docling.datamodel.document import InputDocument
|
||||
|
||||
from .verify_utils import verify_document
|
||||
|
||||
GENERATE: bool = False
|
||||
DATA_PATH: Path = Path("./tests/data/uspto/")
|
||||
GT_PATH: Path = Path("./tests/data/groundtruth/docling_v2/")
|
||||
@@ -110,12 +111,11 @@ def test_patent_groundtruth(patents, groundtruth):
|
||||
assert (
|
||||
pred_md == gt_names[md_name]
|
||||
), f"Markdown file mismatch against groundtruth {md_name}"
|
||||
json_name = path.stem + ".json"
|
||||
if json_name in gt_names:
|
||||
pred_json = json.dumps(doc.export_to_dict(), indent=2)
|
||||
assert (
|
||||
pred_json == gt_names[json_name]
|
||||
), f"JSON file mismatch against groundtruth {json_name}"
|
||||
json_path = path.with_suffix(".json")
|
||||
if json_path.stem in gt_names:
|
||||
assert verify_document(
|
||||
doc, str(json_path), GENERATE
|
||||
), f"JSON file mismatch against groundtruth {json_path}"
|
||||
itxt_name = path.stem + ".itxt"
|
||||
if itxt_name in gt_names:
|
||||
pred_itxt = doc._export_to_indented_text()
|
||||
|
||||
Reference in New Issue
Block a user