fix mypy reports

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi 2025-02-06 15:55:46 +01:00
parent fce6bb14db
commit 69e8a9d499
5 changed files with 15 additions and 12 deletions

View File

@ -2,6 +2,7 @@ import re
from pathlib import Path from pathlib import Path
from typing import Iterable, List, Literal, Optional, Tuple, Union from typing import Iterable, List, Literal, Optional, Tuple, Union
import numpy as np
from docling_core.types.doc import ( from docling_core.types.doc import (
CodeItem, CodeItem,
DocItemLabel, DocItemLabel,
@ -101,7 +102,7 @@ class CodeFormulaModel(BaseItemAndImageEnrichmentModel):
artifacts_path = Path(artifacts_path) artifacts_path = Path(artifacts_path)
self.code_formula_model = CodeFormulaPredictor( self.code_formula_model = CodeFormulaPredictor(
artifacts_path=artifacts_path, artifacts_path=str(artifacts_path),
device=device, device=device,
num_threads=accelerator_options.num_threads, num_threads=accelerator_options.num_threads,
) )
@ -226,7 +227,7 @@ class CodeFormulaModel(BaseItemAndImageEnrichmentModel):
return return
labels: List[str] = [] labels: List[str] = []
images: List[Image.Image] = [] images: List[Union[Image.Image, np.ndarray]] = []
elements: List[TextItem] = [] elements: List[TextItem] = []
for el in element_batch: for el in element_batch:
assert isinstance(el.item, TextItem) assert isinstance(el.item, TextItem)

View File

@ -1,6 +1,7 @@
from pathlib import Path from pathlib import Path
from typing import Iterable, List, Literal, Optional, Tuple, Union from typing import Iterable, List, Literal, Optional, Tuple, Union
import numpy as np
from docling_core.types.doc import ( from docling_core.types.doc import (
DoclingDocument, DoclingDocument,
NodeItem, NodeItem,
@ -93,7 +94,7 @@ class DocumentPictureClassifier(BaseEnrichmentModel):
artifacts_path = Path(artifacts_path) artifacts_path = Path(artifacts_path)
self.document_picture_classifier = DocumentFigureClassifierPredictor( self.document_picture_classifier = DocumentFigureClassifierPredictor(
artifacts_path=artifacts_path, artifacts_path=str(artifacts_path),
device=device, device=device,
num_threads=accelerator_options.num_threads, num_threads=accelerator_options.num_threads,
) )
@ -159,7 +160,7 @@ class DocumentPictureClassifier(BaseEnrichmentModel):
yield element yield element
return return
images: List[Image.Image] = [] images: List[Union[Image.Image, np.ndarray]] = []
elements: List[PictureItem] = [] elements: List[PictureItem] = []
for el in element_batch: for el in element_batch:
assert isinstance(el, PictureItem) assert isinstance(el, PictureItem)

View File

@ -106,10 +106,12 @@ class LayoutModel(BasePageModel):
else: else:
with TimeRecorder(conv_res, "layout"): with TimeRecorder(conv_res, "layout"):
assert page.size is not None assert page.size is not None
page_image = page.get_image(scale=1.0)
assert page_image is not None
clusters = [] clusters = []
for ix, pred_item in enumerate( for ix, pred_item in enumerate(
self.layout_predictor.predict(page.get_image(scale=1.0)) self.layout_predictor.predict(page_image)
): ):
label = DocItemLabel( label = DocItemLabel(
pred_item["label"] pred_item["label"]

11
poetry.lock generated
View File

@ -282,18 +282,17 @@ testing = ["jaraco.test", "pytest (!=8.0.*)", "pytest (>=6,!=8.1.*)", "pytest-ch
[[package]] [[package]]
name = "beautifulsoup4" name = "beautifulsoup4"
version = "4.13.3" version = "4.12.3"
description = "Screen-scraping library" description = "Screen-scraping library"
optional = false optional = false
python-versions = ">=3.7.0" python-versions = ">=3.6.0"
files = [ files = [
{file = "beautifulsoup4-4.13.3-py3-none-any.whl", hash = "sha256:99045d7d3f08f91f0d656bc9b7efbae189426cd913d830294a15eefa0ea4df16"}, {file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"},
{file = "beautifulsoup4-4.13.3.tar.gz", hash = "sha256:1bd32405dacc920b42b83ba01644747ed77456a65760e285fbc47633ceddaf8b"}, {file = "beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051"},
] ]
[package.dependencies] [package.dependencies]
soupsieve = ">1.2" soupsieve = ">1.2"
typing-extensions = ">=4.0.0"
[package.extras] [package.extras]
cchardet = ["cchardet"] cchardet = ["cchardet"]
@ -7815,4 +7814,4 @@ tesserocr = ["tesserocr"]
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = "^3.9" python-versions = "^3.9"
content-hash = "dcab39f8f6cc4a2b24e25774c147dce5eb9da775309d69a9304b72c47725b021" content-hash = "241c144d17dc30e30dc3c40cbbaeffd5e03a70c274b86be5ee04089a737fac1e"

View File

@ -47,7 +47,7 @@ scipy = [
typer = "^0.12.5" typer = "^0.12.5"
python-docx = "^1.1.2" python-docx = "^1.1.2"
python-pptx = "^1.0.2" python-pptx = "^1.0.2"
beautifulsoup4 = "^4.12.3" beautifulsoup4 = ">=4.12.3,<4.13.0"
pandas = "^2.1.4" pandas = "^2.1.4"
marko = "^2.1.2" marko = "^2.1.2"
openpyxl = "^3.1.5" openpyxl = "^3.1.5"