fix mypy reports

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi 2025-02-06 15:55:46 +01:00
parent fce6bb14db
commit 69e8a9d499
5 changed files with 15 additions and 12 deletions

View File

@ -2,6 +2,7 @@ import re
from pathlib import Path
from typing import Iterable, List, Literal, Optional, Tuple, Union
import numpy as np
from docling_core.types.doc import (
CodeItem,
DocItemLabel,
@ -101,7 +102,7 @@ class CodeFormulaModel(BaseItemAndImageEnrichmentModel):
artifacts_path = Path(artifacts_path)
self.code_formula_model = CodeFormulaPredictor(
artifacts_path=artifacts_path,
artifacts_path=str(artifacts_path),
device=device,
num_threads=accelerator_options.num_threads,
)
@ -226,7 +227,7 @@ class CodeFormulaModel(BaseItemAndImageEnrichmentModel):
return
labels: List[str] = []
images: List[Image.Image] = []
images: List[Union[Image.Image, np.ndarray]] = []
elements: List[TextItem] = []
for el in element_batch:
assert isinstance(el.item, TextItem)

View File

@ -1,6 +1,7 @@
from pathlib import Path
from typing import Iterable, List, Literal, Optional, Tuple, Union
import numpy as np
from docling_core.types.doc import (
DoclingDocument,
NodeItem,
@ -93,7 +94,7 @@ class DocumentPictureClassifier(BaseEnrichmentModel):
artifacts_path = Path(artifacts_path)
self.document_picture_classifier = DocumentFigureClassifierPredictor(
artifacts_path=artifacts_path,
artifacts_path=str(artifacts_path),
device=device,
num_threads=accelerator_options.num_threads,
)
@ -159,7 +160,7 @@ class DocumentPictureClassifier(BaseEnrichmentModel):
yield element
return
images: List[Image.Image] = []
images: List[Union[Image.Image, np.ndarray]] = []
elements: List[PictureItem] = []
for el in element_batch:
assert isinstance(el, PictureItem)

View File

@ -106,10 +106,12 @@ class LayoutModel(BasePageModel):
else:
with TimeRecorder(conv_res, "layout"):
assert page.size is not None
page_image = page.get_image(scale=1.0)
assert page_image is not None
clusters = []
for ix, pred_item in enumerate(
self.layout_predictor.predict(page.get_image(scale=1.0))
self.layout_predictor.predict(page_image)
):
label = DocItemLabel(
pred_item["label"]

11
poetry.lock generated
View File

@ -282,18 +282,17 @@ testing = ["jaraco.test", "pytest (!=8.0.*)", "pytest (>=6,!=8.1.*)", "pytest-ch
[[package]]
name = "beautifulsoup4"
version = "4.13.3"
version = "4.12.3"
description = "Screen-scraping library"
optional = false
python-versions = ">=3.7.0"
python-versions = ">=3.6.0"
files = [
{file = "beautifulsoup4-4.13.3-py3-none-any.whl", hash = "sha256:99045d7d3f08f91f0d656bc9b7efbae189426cd913d830294a15eefa0ea4df16"},
{file = "beautifulsoup4-4.13.3.tar.gz", hash = "sha256:1bd32405dacc920b42b83ba01644747ed77456a65760e285fbc47633ceddaf8b"},
{file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"},
{file = "beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051"},
]
[package.dependencies]
soupsieve = ">1.2"
typing-extensions = ">=4.0.0"
[package.extras]
cchardet = ["cchardet"]
@ -7815,4 +7814,4 @@ tesserocr = ["tesserocr"]
[metadata]
lock-version = "2.0"
python-versions = "^3.9"
content-hash = "dcab39f8f6cc4a2b24e25774c147dce5eb9da775309d69a9304b72c47725b021"
content-hash = "241c144d17dc30e30dc3c40cbbaeffd5e03a70c274b86be5ee04089a737fac1e"

View File

@ -47,7 +47,7 @@ scipy = [
typer = "^0.12.5"
python-docx = "^1.1.2"
python-pptx = "^1.0.2"
beautifulsoup4 = "^4.12.3"
beautifulsoup4 = ">=4.12.3,<4.13.0"
pandas = "^2.1.4"
marko = "^2.1.2"
openpyxl = "^3.1.5"