runs 1 on linter issues

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi 2025-04-14 16:03:01 +02:00
parent 0d261e52ed
commit ad28271b4a
10 changed files with 19 additions and 19 deletions

View File

@ -168,7 +168,7 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
) )
self.inline_texts = [] self.inline_texts = []
def _iterate_elements( def _iterate_elements( # noqa: C901
self, self,
element: marko.element.Element, element: marko.element.Element,
depth: int, depth: int,

View File

@ -493,7 +493,7 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentBacken
), ),
), ),
) )
except: except Exception:
_log.error("could not extract the image from excel sheets") _log.error("could not extract the image from excel sheets")
return doc return doc

View File

@ -418,7 +418,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
else prev_parent else prev_parent
) )
def _handle_text_elements( def _handle_text_elements( # noqa: C901
self, self,
element: BaseOxmlElement, element: BaseOxmlElement,
docx_obj: DocxDocument, docx_obj: DocxDocument,

View File

@ -606,7 +606,7 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
) )
return return
def _walk_linear( def _walk_linear( # noqa: C901
self, doc: DoclingDocument, parent: NodeItem, node: etree._Element self, doc: DoclingDocument, parent: NodeItem, node: etree._Element
) -> str: ) -> str:
skip_tags = ["term"] skip_tags = ["term"]

View File

@ -15,6 +15,7 @@ import typer
from docling_core.types.doc import ImageRefMode from docling_core.types.doc import ImageRefMode
from docling_core.utils.file import resolve_source_to_path from docling_core.utils.file import resolve_source_to_path
from pydantic import TypeAdapter from pydantic import TypeAdapter
from rich.console import Console
from docling.backend.docling_parse_backend import DoclingParseDocumentBackend from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
@ -54,7 +55,6 @@ warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic|
warnings.filterwarnings(action="ignore", category=FutureWarning, module="easyocr") warnings.filterwarnings(action="ignore", category=FutureWarning, module="easyocr")
_log = logging.getLogger(__name__) _log = logging.getLogger(__name__)
from rich.console import Console
console = Console() console = Console()
err_console = Console(stderr=True) err_console = Console(stderr=True)
@ -233,7 +233,7 @@ def _split_list(raw: Optional[str]) -> Optional[List[str]]:
@app.command(no_args_is_help=True) @app.command(no_args_is_help=True)
def convert( def convert( # noqa: C901
input_sources: Annotated[ input_sources: Annotated[
List[str], List[str],
typer.Argument( typer.Argument(

View File

@ -12,7 +12,7 @@ from docling_core.types.doc import (
from docling_core.types.doc.page import SegmentedPdfPage, TextCell from docling_core.types.doc.page import SegmentedPdfPage, TextCell
# DO NOT REMOVE; explicitly exposed from this location # DO NOT REMOVE; explicitly exposed from this location
from docling_core.types.io import ( # noqa: F401 from docling_core.types.io import (
DocumentStream, DocumentStream,
) )
from PIL.Image import Image from PIL.Image import Image

View File

@ -19,7 +19,7 @@ from typing import (
import filetype import filetype
# DO NOT REMOVE; explicitly exposed from this location # DO NOT REMOVE; explicitly exposed from this location
from docling_core.types.doc import ( # noqa: F401 from docling_core.types.doc import (
DocItem, DocItem,
DocItemLabel, DocItemLabel,
DoclingDocument, DoclingDocument,
@ -28,8 +28,8 @@ from docling_core.types.doc import ( # noqa: F401
TableItem, TableItem,
TextItem, TextItem,
) )
from docling_core.types.doc.document import ListItem # noqa: F401 from docling_core.types.doc.document import ListItem
from docling_core.types.legacy_doc.base import ( # noqa: F401 from docling_core.types.legacy_doc.base import (
BaseText, BaseText,
Figure, Figure,
GlmTableCell, GlmTableCell,
@ -37,14 +37,14 @@ from docling_core.types.legacy_doc.base import ( # noqa: F401
PageReference, PageReference,
Prov, Prov,
Ref, Ref,
Table as DsSchemaTable,
TableCell,
) )
from docling_core.types.legacy_doc.base import Table as DsSchemaTable # noqa: F401 from docling_core.types.legacy_doc.document import (
from docling_core.types.legacy_doc.base import TableCell # noqa: F401
from docling_core.types.legacy_doc.document import ( # noqa: F401
CCSDocumentDescription as DsDocumentDescription, CCSDocumentDescription as DsDocumentDescription,
CCSFileInfoObject as DsFileInfoObject,
ExportedCCSDocument as DsDocument,
) )
from docling_core.types.legacy_doc.document import CCSFileInfoObject as DsFileInfoObject # noqa: F401
from docling_core.types.legacy_doc.document import ExportedCCSDocument as DsDocument # noqa: F401
from docling_core.utils.file import resolve_source_to_stream from docling_core.utils.file import resolve_source_to_stream
from docling_core.utils.legacy import docling_document_to_legacy from docling_core.utils.legacy import docling_document_to_legacy
from pydantic import BaseModel from pydantic import BaseModel

View File

@ -98,7 +98,7 @@ class ReadingOrderModel:
else: else:
doc.add_text(parent=doc_item, label=c_label, text=c_text, prov=c_prov) doc.add_text(parent=doc_item, label=c_label, text=c_text, prov=c_prov)
def _readingorder_elements_to_docling_doc( def _readingorder_elements_to_docling_doc( # noqa: C901
self, self,
conv_res: ConversionResult, conv_res: ConversionResult,
ro_elements: List[ReadingOrderPageElement], ro_elements: List[ReadingOrderPageElement],

View File

@ -65,7 +65,7 @@ class TesseractOcrModel(BaseOcrModel):
raise ImportError(install_errmsg) raise ImportError(install_errmsg)
try: try:
tesseract_version = tesserocr.tesseract_version() tesseract_version = tesserocr.tesseract_version()
except: except Exception:
raise ImportError(install_errmsg) raise ImportError(install_errmsg)
_, self._tesserocr_languages = tesserocr.get_languages() _, self._tesserocr_languages = tesserocr.get_languages()

View File

@ -47,7 +47,7 @@ def test_picture_classifier():
res = results[0] res = results[0]
assert len(res.annotations) == 1 assert len(res.annotations) == 1
assert type(res.annotations[0]) == PictureClassificationData assert isinstance(res.annotations[0], PictureClassificationData)
classification_data = res.annotations[0] classification_data = res.annotations[0]
assert classification_data.provenance == "DocumentPictureClassifier" assert classification_data.provenance == "DocumentPictureClassifier"
assert len(classification_data.predicted_classes) == 16, ( assert len(classification_data.predicted_classes) == 16, (
@ -63,7 +63,7 @@ def test_picture_classifier():
res = results[1] res = results[1]
assert len(res.annotations) == 1 assert len(res.annotations) == 1
assert type(res.annotations[0]) == PictureClassificationData assert isinstance(res.annotations[0], PictureClassificationData)
classification_data = res.annotations[0] classification_data = res.annotations[0]
assert classification_data.provenance == "DocumentPictureClassifier" assert classification_data.provenance == "DocumentPictureClassifier"
assert len(classification_data.predicted_classes) == 16, ( assert len(classification_data.predicted_classes) == 16, (