mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 04:24:45 +00:00
apply ruff unsafe fixes
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
parent
73cec158c6
commit
557efde7dc
@ -81,8 +81,6 @@ class AsciiDocBackend(DeclarativeDocumentBackend):
|
||||
title, section headers, text, lists, and tables.
|
||||
"""
|
||||
|
||||
content = ""
|
||||
|
||||
in_list = False
|
||||
in_table = False
|
||||
|
||||
@ -268,14 +266,14 @@ class AsciiDocBackend(DeclarativeDocumentBackend):
|
||||
|
||||
def _get_current_level(self, parents):
|
||||
for k, v in parents.items():
|
||||
if v == None and k > 0:
|
||||
if v is None and k > 0:
|
||||
return k - 1
|
||||
|
||||
return 0
|
||||
|
||||
def _get_current_parent(self, parents):
|
||||
for k, v in parents.items():
|
||||
if v == None and k > 0:
|
||||
if v is None and k > 0:
|
||||
return parents[k - 1]
|
||||
|
||||
return None
|
||||
@ -323,7 +321,7 @@ class AsciiDocBackend(DeclarativeDocumentBackend):
|
||||
"marker": marker,
|
||||
"text": text.strip(),
|
||||
"numbered": False,
|
||||
"indent": 0 if indent == None else len(indent),
|
||||
"indent": 0 if indent is None else len(indent),
|
||||
}
|
||||
else:
|
||||
return {
|
||||
@ -331,7 +329,7 @@ class AsciiDocBackend(DeclarativeDocumentBackend):
|
||||
"marker": marker,
|
||||
"text": text.strip(),
|
||||
"numbered": True,
|
||||
"indent": 0 if indent == None else len(indent),
|
||||
"indent": 0 if indent is None else len(indent),
|
||||
}
|
||||
else:
|
||||
# Fallback if no match
|
||||
|
@ -328,7 +328,7 @@ class oMath2Latex(Tag2Method):
|
||||
t_dict = self.process_children_dict(elm, include=("e", "lim"))
|
||||
latex_s = LIM_FUNC.get(t_dict["e"])
|
||||
if not latex_s:
|
||||
raise RuntimeError("Not support lim %s" % t_dict["e"])
|
||||
raise RuntimeError("Not support lim {}".format(t_dict["e"]))
|
||||
else:
|
||||
return latex_s.format(lim=t_dict.get("lim"))
|
||||
|
||||
|
@ -146,7 +146,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
||||
item for item in element.next_siblings if isinstance(item, Tag)
|
||||
]
|
||||
if element.next_sibling is None or any(
|
||||
[item.name in TAGS_FOR_NODE_ITEMS for item in siblings]
|
||||
item.name in TAGS_FOR_NODE_ITEMS for item in siblings
|
||||
):
|
||||
text = text.strip()
|
||||
if text and tag.name in ["div"]:
|
||||
|
@ -126,7 +126,6 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
|
||||
enum_list_item_value = 0
|
||||
new_list = None
|
||||
bullet_type = "None"
|
||||
list_text = ""
|
||||
list_label = GroupLabel.LIST
|
||||
doc_label = DocItemLabel.LIST_ITEM
|
||||
prov = self.generate_prov(shape, slide_ind, shape.text.strip(), slide_size)
|
||||
@ -368,8 +367,6 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
|
||||
slide_width = pptx_obj.slide_width
|
||||
slide_height = pptx_obj.slide_height
|
||||
|
||||
text_content = [] # type: ignore
|
||||
|
||||
max_levels = 10
|
||||
parents = {} # type: ignore
|
||||
for i in range(max_levels):
|
||||
@ -383,7 +380,7 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
|
||||
)
|
||||
|
||||
slide_size = Size(width=slide_width, height=slide_height)
|
||||
parent_page = doc.add_page(page_no=slide_ind + 1, size=slide_size)
|
||||
doc.add_page(page_no=slide_ind + 1, size=slide_size)
|
||||
|
||||
def handle_shapes(shape, parent_slide, slide_ind, doc, slide_size):
|
||||
handle_groups(shape, parent_slide, slide_ind, doc, slide_size)
|
||||
|
@ -158,7 +158,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
||||
def _get_level(self) -> int:
|
||||
"""Return the first None index."""
|
||||
for k, v in self.parents.items():
|
||||
if k >= 0 and v == None:
|
||||
if k >= 0 and v is None:
|
||||
return k
|
||||
return 0
|
||||
|
||||
|
@ -102,13 +102,13 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
|
||||
|
||||
doc_info: etree.DocInfo = self.tree.docinfo
|
||||
if doc_info.system_url and any(
|
||||
[kwd in doc_info.system_url for kwd in JATS_DTD_URL]
|
||||
kwd in doc_info.system_url for kwd in JATS_DTD_URL
|
||||
):
|
||||
self.valid = True
|
||||
return
|
||||
for ent in doc_info.internalDTD.iterentities():
|
||||
if ent.system_url and any(
|
||||
[kwd in ent.system_url for kwd in JATS_DTD_URL]
|
||||
kwd in ent.system_url for kwd in JATS_DTD_URL
|
||||
):
|
||||
self.valid = True
|
||||
return
|
||||
@ -232,10 +232,9 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
|
||||
# TODO: once superscript is supported, add label with formatting
|
||||
aff = aff.removeprefix(f"{label[0].text}, ")
|
||||
affiliation_names.append(aff)
|
||||
affiliation_ids_names = {
|
||||
id: name
|
||||
for id, name in zip(meta.xpath(".//aff[@id]/@id"), affiliation_names)
|
||||
}
|
||||
affiliation_ids_names = dict(
|
||||
zip(meta.xpath(".//aff[@id]/@id"), affiliation_names)
|
||||
)
|
||||
|
||||
# Get author names and affiliation names
|
||||
for author_node in meta.xpath(
|
||||
|
@ -1472,9 +1472,7 @@ class XmlTable:
|
||||
if cw == 0:
|
||||
offset_w0.append(col["offset"][ic])
|
||||
|
||||
min_colinfo["offset"] = sorted(
|
||||
list(set(col["offset"] + min_colinfo["offset"]))
|
||||
)
|
||||
min_colinfo["offset"] = sorted(set(col["offset"] + min_colinfo["offset"]))
|
||||
|
||||
# add back the 0 width cols to offset list
|
||||
offset_w0 = list(set(offset_w0))
|
||||
|
@ -430,7 +430,7 @@ def convert(
|
||||
settings.debug.visualize_ocr = debug_visualize_ocr
|
||||
|
||||
if from_formats is None:
|
||||
from_formats = [e for e in InputFormat]
|
||||
from_formats = list(InputFormat)
|
||||
|
||||
parsed_headers: Optional[Dict[str, str]] = None
|
||||
if headers is not None:
|
||||
|
@ -89,14 +89,13 @@ def download(
|
||||
"Cannot simultaneously set 'all' parameter and specify models to download."
|
||||
)
|
||||
if not quiet:
|
||||
FORMAT = "%(message)s"
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="[blue]%(message)s[/blue]",
|
||||
datefmt="[%X]",
|
||||
handlers=[RichHandler(show_level=False, show_time=False, markup=True)],
|
||||
)
|
||||
to_download = models or ([m for m in _AvailableModels] if all else _default_models)
|
||||
to_download = models or (list(_AvailableModels) if all else _default_models)
|
||||
output_dir = download_models(
|
||||
output_dir=output_dir,
|
||||
force=force,
|
||||
|
@ -172,7 +172,7 @@ class DocumentConverter:
|
||||
format_options: Optional[Dict[InputFormat, FormatOption]] = None,
|
||||
):
|
||||
self.allowed_formats = (
|
||||
allowed_formats if allowed_formats is not None else [e for e in InputFormat]
|
||||
allowed_formats if allowed_formats is not None else list(InputFormat)
|
||||
)
|
||||
self.format_to_options = {
|
||||
format: (
|
||||
|
@ -59,12 +59,10 @@ class EasyOcrModel(BaseOcrModel):
|
||||
device = decide_device(accelerator_options.device)
|
||||
# Enable easyocr GPU if running on CUDA, MPS
|
||||
use_gpu = any(
|
||||
[
|
||||
device.startswith(x)
|
||||
for x in [
|
||||
AcceleratorDevice.CUDA.value,
|
||||
AcceleratorDevice.MPS.value,
|
||||
]
|
||||
device.startswith(x)
|
||||
for x in [
|
||||
AcceleratorDevice.CUDA.value,
|
||||
AcceleratorDevice.MPS.value,
|
||||
]
|
||||
)
|
||||
else:
|
||||
|
@ -33,7 +33,7 @@ class BaseFactory(Generic[A], metaclass=ABCMeta):
|
||||
|
||||
@property
|
||||
def registered_kind(self) -> list[str]:
|
||||
return list(opt.kind for opt in self._classes.keys())
|
||||
return [opt.kind for opt in self._classes.keys()]
|
||||
|
||||
def get_enum(self) -> enum.Enum:
|
||||
return enum.Enum(
|
||||
|
@ -121,6 +121,8 @@ class HuggingFaceMlxModel(BasePageModel):
|
||||
generation_time = time.time() - start_time
|
||||
page_tags = output
|
||||
|
||||
_log.debug(f"Generation time {generation_time:.2f} seconds.")
|
||||
|
||||
# inference_time = time.time() - start_time
|
||||
# tokens_per_second = num_tokens / generation_time
|
||||
# print("")
|
||||
|
@ -166,6 +166,10 @@ class HuggingFaceVlmModel(BasePageModel):
|
||||
num_tokens = len(generated_ids[0])
|
||||
page_tags = generated_texts
|
||||
|
||||
_log.debug(
|
||||
f"Generated {num_tokens} tokens in time {generation_time:.2f} seconds."
|
||||
)
|
||||
|
||||
# inference_time = time.time() - start_time
|
||||
# tokens_per_second = num_tokens / generation_time
|
||||
# print("")
|
||||
|
@ -64,7 +64,7 @@ class TesseractOcrCliModel(BaseOcrModel):
|
||||
)
|
||||
|
||||
def _get_name_and_version(self) -> Tuple[str, str]:
|
||||
if self._name != None and self._version != None:
|
||||
if self._name is not None and self._version is not None:
|
||||
return self._name, self._version # type: ignore
|
||||
|
||||
cmd = [self.options.tesseract_cmd, "--version"]
|
||||
@ -187,7 +187,7 @@ class TesseractOcrCliModel(BaseOcrModel):
|
||||
self._tesseract_languages = df[0].tolist()[1:]
|
||||
|
||||
# Decide the script prefix
|
||||
if any([l.startswith("script/") for l in self._tesseract_languages]):
|
||||
if any(l.startswith("script/") for l in self._tesseract_languages):
|
||||
script_prefix = "script/"
|
||||
else:
|
||||
script_prefix = ""
|
||||
|
@ -76,7 +76,7 @@ class TesseractOcrModel(BaseOcrModel):
|
||||
_log.debug("Initializing TesserOCR: %s", tesseract_version)
|
||||
lang = "+".join(self.options.lang)
|
||||
|
||||
if any([l.startswith("script/") for l in self._tesserocr_languages]):
|
||||
if any(l.startswith("script/") for l in self._tesserocr_languages):
|
||||
self.script_prefix = "script/"
|
||||
else:
|
||||
self.script_prefix = ""
|
||||
|
@ -13,7 +13,7 @@ def chunkify(iterator, chunk_size):
|
||||
if isinstance(iterator, List):
|
||||
iterator = iter(iterator)
|
||||
for first in iterator: # Take the first element from the iterator
|
||||
yield [first] + list(islice(iterator, chunk_size - 1))
|
||||
yield [first, *list(islice(iterator, chunk_size - 1))]
|
||||
|
||||
|
||||
def create_file_hash(path_or_stream: Union[BytesIO, Path]) -> str:
|
||||
|
@ -544,7 +544,7 @@
|
||||
"source": [
|
||||
"doc = backend.convert()\n",
|
||||
"\n",
|
||||
"claims_sec = [item for item in doc.texts if item.text == \"CLAIMS\"][0]\n",
|
||||
"claims_sec = next(item for item in doc.texts if item.text == \"CLAIMS\")\n",
|
||||
"print(f'Patent \"{doc.texts[0].text}\" has {len(claims_sec.children)} claims')"
|
||||
]
|
||||
},
|
||||
|
@ -84,7 +84,7 @@ def main():
|
||||
)
|
||||
}
|
||||
)
|
||||
result = doc_converter.convert(input_doc_path)
|
||||
doc_converter.convert(input_doc_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
@ -807,10 +807,12 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from typing import Optional\n",
|
||||
"\n",
|
||||
"from azure.search.documents.models import VectorizableTextQuery\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def generate_chat_response(prompt: str, system_message: str = None):\n",
|
||||
"def generate_chat_response(prompt: str, system_message: Optional[str] = None):\n",
|
||||
" \"\"\"\n",
|
||||
" Generates a single-turn chat response using Azure OpenAI Chat.\n",
|
||||
" If you need multi-turn conversation or follow-up queries, you'll have to\n",
|
||||
|
@ -51,10 +51,9 @@ def main():
|
||||
}
|
||||
)
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
conv_res = doc_converter.convert(input_doc_path)
|
||||
conv_doc = conv_res.document
|
||||
doc_filename = conv_res.input.file
|
||||
|
||||
# Save markdown with embedded pictures in original text
|
||||
md_filename = output_dir / f"{doc_filename}-with-images-orig.md"
|
||||
|
@ -38,7 +38,7 @@ def test_asciidocs_examples():
|
||||
|
||||
if os.path.exists(gname):
|
||||
with open(gname) as fr:
|
||||
true_mddoc = fr.read()
|
||||
fr.read()
|
||||
|
||||
# assert pred_mddoc == true_mddoc, "pred_mddoc!=true_mddoc for asciidoc"
|
||||
else:
|
||||
|
@ -66,7 +66,7 @@ def test_crop_page_image(test_doc_path):
|
||||
page_backend: DoclingParsePageBackend = doc_backend.load_page(0)
|
||||
|
||||
# Crop out "Figure 1" from the DocLayNet paper
|
||||
im = page_backend.get_page_image(
|
||||
page_backend.get_page_image(
|
||||
scale=2, cropbox=BoundingBox(l=317, t=246, r=574, b=527)
|
||||
)
|
||||
# im.show()
|
||||
|
@ -65,7 +65,7 @@ def test_crop_page_image(test_doc_path):
|
||||
page_backend: DoclingParseV2PageBackend = doc_backend.load_page(0)
|
||||
|
||||
# Crop out "Figure 1" from the DocLayNet paper
|
||||
im = page_backend.get_page_image(
|
||||
page_backend.get_page_image(
|
||||
scale=2, cropbox=BoundingBox(l=317, t=246, r=574, b=527)
|
||||
)
|
||||
# im.show()
|
||||
|
@ -65,7 +65,7 @@ def test_crop_page_image(test_doc_path):
|
||||
page_backend: DoclingParseV4PageBackend = doc_backend.load_page(0)
|
||||
|
||||
# Crop out "Figure 1" from the DocLayNet paper
|
||||
im = page_backend.get_page_image(
|
||||
page_backend.get_page_image(
|
||||
scale=2, cropbox=BoundingBox(l=317, t=246, r=574, b=527)
|
||||
)
|
||||
# im.show()
|
||||
|
@ -79,7 +79,7 @@ def test_pages(documents) -> None:
|
||||
documents: The paths and converted documents.
|
||||
"""
|
||||
# number of pages from the backend method
|
||||
path = [item for item in get_xlsx_paths() if item.stem == "test-01"][0]
|
||||
path = next(item for item in get_xlsx_paths() if item.stem == "test-01")
|
||||
in_doc = InputDocument(
|
||||
path_or_stream=path,
|
||||
format=InputFormat.XLSX,
|
||||
@ -90,7 +90,7 @@ def test_pages(documents) -> None:
|
||||
assert backend.page_count() == 3
|
||||
|
||||
# number of pages from the converted document
|
||||
doc = [item for path, item in documents if path.stem == "test-01"][0]
|
||||
doc = next(item for path, item in documents if path.stem == "test-01")
|
||||
assert len(doc.pages) == 3
|
||||
|
||||
# page sizes as number of cells
|
||||
|
@ -129,7 +129,7 @@ def test_tables(tables):
|
||||
"""Test the table parser."""
|
||||
# CHECK table in file tables_20180000016.xml
|
||||
file_name = "tables_ipa20180000016.xml"
|
||||
file_table = [item[1] for item in tables if item[0].name == file_name][0]
|
||||
file_table = next(item[1] for item in tables if item[0].name == file_name)
|
||||
assert file_table.num_rows == 13
|
||||
assert file_table.num_cols == 10
|
||||
assert len(file_table.table_cells) == 130
|
||||
@ -140,7 +140,7 @@ def test_patent_uspto_ice(patents):
|
||||
|
||||
# CHECK application doc number 20200022300
|
||||
file_name = "ipa20200022300.xml"
|
||||
doc = [item[1] for item in patents if item[0].name == file_name][0]
|
||||
doc = next(item[1] for item in patents if item[0].name == file_name)
|
||||
if GENERATE:
|
||||
_generate_groundtruth(doc, Path(file_name).stem)
|
||||
|
||||
@ -278,7 +278,7 @@ def test_patent_uspto_ice(patents):
|
||||
|
||||
# CHECK application doc number 20180000016 for HTML entities, level 2 headings, tables
|
||||
file_name = "ipa20180000016.xml"
|
||||
doc = [item[1] for item in patents if item[0].name == file_name][0]
|
||||
doc = next(item[1] for item in patents if item[0].name == file_name)
|
||||
if GENERATE:
|
||||
_generate_groundtruth(doc, Path(file_name).stem)
|
||||
|
||||
@ -348,7 +348,7 @@ def test_patent_uspto_ice(patents):
|
||||
|
||||
# CHECK application doc number 20110039701 for complex long tables
|
||||
file_name = "ipa20110039701.xml"
|
||||
doc = [item[1] for item in patents if item[0].name == file_name][0]
|
||||
doc = next(item[1] for item in patents if item[0].name == file_name)
|
||||
assert doc.name == file_name
|
||||
assert len(doc.tables) == 17
|
||||
|
||||
@ -358,7 +358,7 @@ def test_patent_uspto_grant_v2(patents):
|
||||
|
||||
# CHECK application doc number 06442728
|
||||
file_name = "pg06442728.xml"
|
||||
doc = [item[1] for item in patents if item[0].name == file_name][0]
|
||||
doc = next(item[1] for item in patents if item[0].name == file_name)
|
||||
if GENERATE:
|
||||
_generate_groundtruth(doc, Path(file_name).stem)
|
||||
|
||||
@ -402,7 +402,7 @@ def test_patent_uspto_app_v1(patents):
|
||||
|
||||
# CHECK application doc number 20010031492
|
||||
file_name = "pa20010031492.xml"
|
||||
doc = [item[1] for item in patents if item[0].name == file_name][0]
|
||||
doc = next(item[1] for item in patents if item[0].name == file_name)
|
||||
if GENERATE:
|
||||
_generate_groundtruth(doc, Path(file_name).stem)
|
||||
|
||||
@ -432,7 +432,7 @@ def test_patent_uspto_grant_aps(patents):
|
||||
|
||||
# CHECK application doc number 057006474
|
||||
file_name = "pftaps057006474.txt"
|
||||
doc = [item[1] for item in patents if item[0].name == file_name][0]
|
||||
doc = next(item[1] for item in patents if item[0].name == file_name)
|
||||
if GENERATE:
|
||||
_generate_groundtruth(doc, Path(file_name).stem)
|
||||
|
||||
|
@ -66,7 +66,7 @@ def test_crop_page_image(test_doc_path):
|
||||
page_backend: PyPdfiumPageBackend = doc_backend.load_page(0)
|
||||
|
||||
# Crop out "Figure 1" from the DocLayNet paper
|
||||
im = page_backend.get_page_image(
|
||||
page_backend.get_page_image(
|
||||
scale=2, cropbox=BoundingBox(l=317, t=246, r=574, b=527)
|
||||
)
|
||||
# im.show()
|
||||
|
@ -14,7 +14,7 @@ from docling.document_converter import PdfFormatOption
|
||||
def test_in_doc_from_valid_path():
|
||||
test_doc_path = Path("./tests/data/pdf/2206.01062.pdf")
|
||||
doc = _make_input_doc(test_doc_path)
|
||||
assert doc.valid == True
|
||||
assert doc.valid is True
|
||||
|
||||
|
||||
def test_in_doc_from_invalid_path():
|
||||
@ -22,7 +22,7 @@ def test_in_doc_from_invalid_path():
|
||||
|
||||
doc = _make_input_doc(test_doc_path)
|
||||
|
||||
assert doc.valid == False
|
||||
assert doc.valid is False
|
||||
|
||||
|
||||
def test_in_doc_from_valid_buf():
|
||||
@ -30,7 +30,7 @@ def test_in_doc_from_valid_buf():
|
||||
stream = DocumentStream(name="my_doc.pdf", stream=buf)
|
||||
|
||||
doc = _make_input_doc_from_stream(stream)
|
||||
assert doc.valid == True
|
||||
assert doc.valid is True
|
||||
|
||||
|
||||
def test_in_doc_from_invalid_buf():
|
||||
@ -38,7 +38,7 @@ def test_in_doc_from_invalid_buf():
|
||||
stream = DocumentStream(name="my_doc.pdf", stream=buf)
|
||||
|
||||
doc = _make_input_doc_from_stream(stream)
|
||||
assert doc.valid == False
|
||||
assert doc.valid is False
|
||||
|
||||
|
||||
def test_image_in_pdf_backend():
|
||||
@ -82,7 +82,7 @@ def test_in_doc_with_page_range():
|
||||
backend=PyPdfiumDocumentBackend,
|
||||
limits=limits,
|
||||
)
|
||||
assert doc.valid == True
|
||||
assert doc.valid is True
|
||||
|
||||
limits.page_range = (9, 9)
|
||||
|
||||
@ -92,7 +92,7 @@ def test_in_doc_with_page_range():
|
||||
backend=PyPdfiumDocumentBackend,
|
||||
limits=limits,
|
||||
)
|
||||
assert doc.valid == True
|
||||
assert doc.valid is True
|
||||
|
||||
limits.page_range = (11, 12)
|
||||
|
||||
@ -102,7 +102,7 @@ def test_in_doc_with_page_range():
|
||||
backend=PyPdfiumDocumentBackend,
|
||||
limits=limits,
|
||||
)
|
||||
assert doc.valid == False
|
||||
assert doc.valid is False
|
||||
|
||||
|
||||
def test_guess_format(tmp_path):
|
||||
@ -187,17 +187,17 @@ def test_guess_format(tmp_path):
|
||||
)
|
||||
doc_path = temp_dir / "docling_test.xml"
|
||||
doc_path.write_text(xml_content, encoding="utf-8")
|
||||
assert dci._guess_format(doc_path) == None
|
||||
assert dci._guess_format(doc_path) is None
|
||||
buf = BytesIO(Path(doc_path).open("rb").read())
|
||||
stream = DocumentStream(name="docling_test.xml", stream=buf)
|
||||
assert dci._guess_format(stream) == None
|
||||
assert dci._guess_format(stream) is None
|
||||
|
||||
# Invalid USPTO patent (as plain text)
|
||||
stream = DocumentStream(name="pftaps057006474.txt", stream=BytesIO(b"xyz"))
|
||||
assert dci._guess_format(stream) == None
|
||||
assert dci._guess_format(stream) is None
|
||||
doc_path = temp_dir / "pftaps_wrong.txt"
|
||||
doc_path.write_text("xyz", encoding="utf-8")
|
||||
assert dci._guess_format(doc_path) == None
|
||||
assert dci._guess_format(doc_path) is None
|
||||
|
||||
# Valid Docling JSON
|
||||
test_str = '{"name": ""}'
|
||||
|
@ -291,7 +291,7 @@ def verify_conversion_result_v1(
|
||||
input_path: Path,
|
||||
doc_result: ConversionResult,
|
||||
generate: bool = False,
|
||||
ocr_engine: str = None,
|
||||
ocr_engine: Optional[str] = None,
|
||||
fuzzy: bool = False,
|
||||
):
|
||||
PageList = TypeAdapter(List[Page])
|
||||
@ -375,7 +375,7 @@ def verify_conversion_result_v2(
|
||||
input_path: Path,
|
||||
doc_result: ConversionResult,
|
||||
generate: bool = False,
|
||||
ocr_engine: str = None,
|
||||
ocr_engine: Optional[str] = None,
|
||||
fuzzy: bool = False,
|
||||
):
|
||||
PageList = TypeAdapter(List[Page])
|
||||
|
Loading…
Reference in New Issue
Block a user