mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-02 07:22:14 +00:00
fix mypy reports
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
parent
fce6bb14db
commit
69e8a9d499
@ -2,6 +2,7 @@ import re
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Iterable, List, Literal, Optional, Tuple, Union
|
from typing import Iterable, List, Literal, Optional, Tuple, Union
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
from docling_core.types.doc import (
|
from docling_core.types.doc import (
|
||||||
CodeItem,
|
CodeItem,
|
||||||
DocItemLabel,
|
DocItemLabel,
|
||||||
@ -101,7 +102,7 @@ class CodeFormulaModel(BaseItemAndImageEnrichmentModel):
|
|||||||
artifacts_path = Path(artifacts_path)
|
artifacts_path = Path(artifacts_path)
|
||||||
|
|
||||||
self.code_formula_model = CodeFormulaPredictor(
|
self.code_formula_model = CodeFormulaPredictor(
|
||||||
artifacts_path=artifacts_path,
|
artifacts_path=str(artifacts_path),
|
||||||
device=device,
|
device=device,
|
||||||
num_threads=accelerator_options.num_threads,
|
num_threads=accelerator_options.num_threads,
|
||||||
)
|
)
|
||||||
@ -226,7 +227,7 @@ class CodeFormulaModel(BaseItemAndImageEnrichmentModel):
|
|||||||
return
|
return
|
||||||
|
|
||||||
labels: List[str] = []
|
labels: List[str] = []
|
||||||
images: List[Image.Image] = []
|
images: List[Union[Image.Image, np.ndarray]] = []
|
||||||
elements: List[TextItem] = []
|
elements: List[TextItem] = []
|
||||||
for el in element_batch:
|
for el in element_batch:
|
||||||
assert isinstance(el.item, TextItem)
|
assert isinstance(el.item, TextItem)
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Iterable, List, Literal, Optional, Tuple, Union
|
from typing import Iterable, List, Literal, Optional, Tuple, Union
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
from docling_core.types.doc import (
|
from docling_core.types.doc import (
|
||||||
DoclingDocument,
|
DoclingDocument,
|
||||||
NodeItem,
|
NodeItem,
|
||||||
@ -93,7 +94,7 @@ class DocumentPictureClassifier(BaseEnrichmentModel):
|
|||||||
artifacts_path = Path(artifacts_path)
|
artifacts_path = Path(artifacts_path)
|
||||||
|
|
||||||
self.document_picture_classifier = DocumentFigureClassifierPredictor(
|
self.document_picture_classifier = DocumentFigureClassifierPredictor(
|
||||||
artifacts_path=artifacts_path,
|
artifacts_path=str(artifacts_path),
|
||||||
device=device,
|
device=device,
|
||||||
num_threads=accelerator_options.num_threads,
|
num_threads=accelerator_options.num_threads,
|
||||||
)
|
)
|
||||||
@ -159,7 +160,7 @@ class DocumentPictureClassifier(BaseEnrichmentModel):
|
|||||||
yield element
|
yield element
|
||||||
return
|
return
|
||||||
|
|
||||||
images: List[Image.Image] = []
|
images: List[Union[Image.Image, np.ndarray]] = []
|
||||||
elements: List[PictureItem] = []
|
elements: List[PictureItem] = []
|
||||||
for el in element_batch:
|
for el in element_batch:
|
||||||
assert isinstance(el, PictureItem)
|
assert isinstance(el, PictureItem)
|
||||||
|
@ -106,10 +106,12 @@ class LayoutModel(BasePageModel):
|
|||||||
else:
|
else:
|
||||||
with TimeRecorder(conv_res, "layout"):
|
with TimeRecorder(conv_res, "layout"):
|
||||||
assert page.size is not None
|
assert page.size is not None
|
||||||
|
page_image = page.get_image(scale=1.0)
|
||||||
|
assert page_image is not None
|
||||||
|
|
||||||
clusters = []
|
clusters = []
|
||||||
for ix, pred_item in enumerate(
|
for ix, pred_item in enumerate(
|
||||||
self.layout_predictor.predict(page.get_image(scale=1.0))
|
self.layout_predictor.predict(page_image)
|
||||||
):
|
):
|
||||||
label = DocItemLabel(
|
label = DocItemLabel(
|
||||||
pred_item["label"]
|
pred_item["label"]
|
||||||
|
11
poetry.lock
generated
11
poetry.lock
generated
@ -282,18 +282,17 @@ testing = ["jaraco.test", "pytest (!=8.0.*)", "pytest (>=6,!=8.1.*)", "pytest-ch
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "beautifulsoup4"
|
name = "beautifulsoup4"
|
||||||
version = "4.13.3"
|
version = "4.12.3"
|
||||||
description = "Screen-scraping library"
|
description = "Screen-scraping library"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.7.0"
|
python-versions = ">=3.6.0"
|
||||||
files = [
|
files = [
|
||||||
{file = "beautifulsoup4-4.13.3-py3-none-any.whl", hash = "sha256:99045d7d3f08f91f0d656bc9b7efbae189426cd913d830294a15eefa0ea4df16"},
|
{file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"},
|
||||||
{file = "beautifulsoup4-4.13.3.tar.gz", hash = "sha256:1bd32405dacc920b42b83ba01644747ed77456a65760e285fbc47633ceddaf8b"},
|
{file = "beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
soupsieve = ">1.2"
|
soupsieve = ">1.2"
|
||||||
typing-extensions = ">=4.0.0"
|
|
||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
cchardet = ["cchardet"]
|
cchardet = ["cchardet"]
|
||||||
@ -7815,4 +7814,4 @@ tesserocr = ["tesserocr"]
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = "^3.9"
|
python-versions = "^3.9"
|
||||||
content-hash = "dcab39f8f6cc4a2b24e25774c147dce5eb9da775309d69a9304b72c47725b021"
|
content-hash = "241c144d17dc30e30dc3c40cbbaeffd5e03a70c274b86be5ee04089a737fac1e"
|
||||||
|
@ -47,7 +47,7 @@ scipy = [
|
|||||||
typer = "^0.12.5"
|
typer = "^0.12.5"
|
||||||
python-docx = "^1.1.2"
|
python-docx = "^1.1.2"
|
||||||
python-pptx = "^1.0.2"
|
python-pptx = "^1.0.2"
|
||||||
beautifulsoup4 = "^4.12.3"
|
beautifulsoup4 = ">=4.12.3,<4.13.0"
|
||||||
pandas = "^2.1.4"
|
pandas = "^2.1.4"
|
||||||
marko = "^2.1.2"
|
marko = "^2.1.2"
|
||||||
openpyxl = "^3.1.5"
|
openpyxl = "^3.1.5"
|
||||||
|
Loading…
Reference in New Issue
Block a user