mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-01 23:12:20 +00:00
fix mypy reports
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
parent
fce6bb14db
commit
69e8a9d499
@ -2,6 +2,7 @@ import re
|
||||
from pathlib import Path
|
||||
from typing import Iterable, List, Literal, Optional, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
from docling_core.types.doc import (
|
||||
CodeItem,
|
||||
DocItemLabel,
|
||||
@ -101,7 +102,7 @@ class CodeFormulaModel(BaseItemAndImageEnrichmentModel):
|
||||
artifacts_path = Path(artifacts_path)
|
||||
|
||||
self.code_formula_model = CodeFormulaPredictor(
|
||||
artifacts_path=artifacts_path,
|
||||
artifacts_path=str(artifacts_path),
|
||||
device=device,
|
||||
num_threads=accelerator_options.num_threads,
|
||||
)
|
||||
@ -226,7 +227,7 @@ class CodeFormulaModel(BaseItemAndImageEnrichmentModel):
|
||||
return
|
||||
|
||||
labels: List[str] = []
|
||||
images: List[Image.Image] = []
|
||||
images: List[Union[Image.Image, np.ndarray]] = []
|
||||
elements: List[TextItem] = []
|
||||
for el in element_batch:
|
||||
assert isinstance(el.item, TextItem)
|
||||
|
@ -1,6 +1,7 @@
|
||||
from pathlib import Path
|
||||
from typing import Iterable, List, Literal, Optional, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
from docling_core.types.doc import (
|
||||
DoclingDocument,
|
||||
NodeItem,
|
||||
@ -93,7 +94,7 @@ class DocumentPictureClassifier(BaseEnrichmentModel):
|
||||
artifacts_path = Path(artifacts_path)
|
||||
|
||||
self.document_picture_classifier = DocumentFigureClassifierPredictor(
|
||||
artifacts_path=artifacts_path,
|
||||
artifacts_path=str(artifacts_path),
|
||||
device=device,
|
||||
num_threads=accelerator_options.num_threads,
|
||||
)
|
||||
@ -159,7 +160,7 @@ class DocumentPictureClassifier(BaseEnrichmentModel):
|
||||
yield element
|
||||
return
|
||||
|
||||
images: List[Image.Image] = []
|
||||
images: List[Union[Image.Image, np.ndarray]] = []
|
||||
elements: List[PictureItem] = []
|
||||
for el in element_batch:
|
||||
assert isinstance(el, PictureItem)
|
||||
|
@ -106,10 +106,12 @@ class LayoutModel(BasePageModel):
|
||||
else:
|
||||
with TimeRecorder(conv_res, "layout"):
|
||||
assert page.size is not None
|
||||
page_image = page.get_image(scale=1.0)
|
||||
assert page_image is not None
|
||||
|
||||
clusters = []
|
||||
for ix, pred_item in enumerate(
|
||||
self.layout_predictor.predict(page.get_image(scale=1.0))
|
||||
self.layout_predictor.predict(page_image)
|
||||
):
|
||||
label = DocItemLabel(
|
||||
pred_item["label"]
|
||||
|
11
poetry.lock
generated
11
poetry.lock
generated
@ -282,18 +282,17 @@ testing = ["jaraco.test", "pytest (!=8.0.*)", "pytest (>=6,!=8.1.*)", "pytest-ch
|
||||
|
||||
[[package]]
|
||||
name = "beautifulsoup4"
|
||||
version = "4.13.3"
|
||||
version = "4.12.3"
|
||||
description = "Screen-scraping library"
|
||||
optional = false
|
||||
python-versions = ">=3.7.0"
|
||||
python-versions = ">=3.6.0"
|
||||
files = [
|
||||
{file = "beautifulsoup4-4.13.3-py3-none-any.whl", hash = "sha256:99045d7d3f08f91f0d656bc9b7efbae189426cd913d830294a15eefa0ea4df16"},
|
||||
{file = "beautifulsoup4-4.13.3.tar.gz", hash = "sha256:1bd32405dacc920b42b83ba01644747ed77456a65760e285fbc47633ceddaf8b"},
|
||||
{file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"},
|
||||
{file = "beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
soupsieve = ">1.2"
|
||||
typing-extensions = ">=4.0.0"
|
||||
|
||||
[package.extras]
|
||||
cchardet = ["cchardet"]
|
||||
@ -7815,4 +7814,4 @@ tesserocr = ["tesserocr"]
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.9"
|
||||
content-hash = "dcab39f8f6cc4a2b24e25774c147dce5eb9da775309d69a9304b72c47725b021"
|
||||
content-hash = "241c144d17dc30e30dc3c40cbbaeffd5e03a70c274b86be5ee04089a737fac1e"
|
||||
|
@ -47,7 +47,7 @@ scipy = [
|
||||
typer = "^0.12.5"
|
||||
python-docx = "^1.1.2"
|
||||
python-pptx = "^1.0.2"
|
||||
beautifulsoup4 = "^4.12.3"
|
||||
beautifulsoup4 = ">=4.12.3,<4.13.0"
|
||||
pandas = "^2.1.4"
|
||||
marko = "^2.1.2"
|
||||
openpyxl = "^3.1.5"
|
||||
|
Loading…
Reference in New Issue
Block a user