From ad28271b4ab60f1a6dc3594183940650459467a8 Mon Sep 17 00:00:00 2001 From: Michele Dolfi Date: Mon, 14 Apr 2025 16:03:01 +0200 Subject: [PATCH] runs 1 on linter issues Signed-off-by: Michele Dolfi --- docling/backend/md_backend.py | 2 +- docling/backend/msexcel_backend.py | 2 +- docling/backend/msword_backend.py | 2 +- docling/backend/xml/jats_backend.py | 2 +- docling/cli/main.py | 4 ++-- docling/datamodel/base_models.py | 2 +- docling/datamodel/document.py | 16 ++++++++-------- docling/models/readingorder_model.py | 2 +- docling/models/tesseract_ocr_model.py | 2 +- tests/test_document_picture_classifier.py | 4 ++-- 10 files changed, 19 insertions(+), 19 deletions(-) diff --git a/docling/backend/md_backend.py b/docling/backend/md_backend.py index f8a97a73..0c6b3065 100644 --- a/docling/backend/md_backend.py +++ b/docling/backend/md_backend.py @@ -168,7 +168,7 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend): ) self.inline_texts = [] - def _iterate_elements( + def _iterate_elements( # noqa: C901 self, element: marko.element.Element, depth: int, diff --git a/docling/backend/msexcel_backend.py b/docling/backend/msexcel_backend.py index f962e326..0ae90831 100644 --- a/docling/backend/msexcel_backend.py +++ b/docling/backend/msexcel_backend.py @@ -493,7 +493,7 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentBacken ), ), ) - except: + except Exception: _log.error("could not extract the image from excel sheets") return doc diff --git a/docling/backend/msword_backend.py b/docling/backend/msword_backend.py index 49ccefef..a108361c 100644 --- a/docling/backend/msword_backend.py +++ b/docling/backend/msword_backend.py @@ -418,7 +418,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend): else prev_parent ) - def _handle_text_elements( + def _handle_text_elements( # noqa: C901 self, element: BaseOxmlElement, docx_obj: DocxDocument, diff --git a/docling/backend/xml/jats_backend.py b/docling/backend/xml/jats_backend.py index e4b29957..23560d30 100755 --- a/docling/backend/xml/jats_backend.py +++ b/docling/backend/xml/jats_backend.py @@ -606,7 +606,7 @@ class JatsDocumentBackend(DeclarativeDocumentBackend): ) return - def _walk_linear( + def _walk_linear( # noqa: C901 self, doc: DoclingDocument, parent: NodeItem, node: etree._Element ) -> str: skip_tags = ["term"] diff --git a/docling/cli/main.py b/docling/cli/main.py index 1f9047da..c0718c8a 100644 --- a/docling/cli/main.py +++ b/docling/cli/main.py @@ -15,6 +15,7 @@ import typer from docling_core.types.doc import ImageRefMode from docling_core.utils.file import resolve_source_to_path from pydantic import TypeAdapter +from rich.console import Console from docling.backend.docling_parse_backend import DoclingParseDocumentBackend from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend @@ -54,7 +55,6 @@ warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic| warnings.filterwarnings(action="ignore", category=FutureWarning, module="easyocr") _log = logging.getLogger(__name__) -from rich.console import Console console = Console() err_console = Console(stderr=True) @@ -233,7 +233,7 @@ def _split_list(raw: Optional[str]) -> Optional[List[str]]: @app.command(no_args_is_help=True) -def convert( +def convert( # noqa: C901 input_sources: Annotated[ List[str], typer.Argument( diff --git a/docling/datamodel/base_models.py b/docling/datamodel/base_models.py index 5fd04949..95dcfe75 100644 --- a/docling/datamodel/base_models.py +++ b/docling/datamodel/base_models.py @@ -12,7 +12,7 @@ from docling_core.types.doc import ( from docling_core.types.doc.page import SegmentedPdfPage, TextCell # DO NOT REMOVE; explicitly exposed from this location -from docling_core.types.io import ( # noqa: F401 +from docling_core.types.io import ( DocumentStream, ) from PIL.Image import Image diff --git a/docling/datamodel/document.py b/docling/datamodel/document.py index 37c264f0..668e8249 100644 --- a/docling/datamodel/document.py +++ b/docling/datamodel/document.py @@ -19,7 +19,7 @@ from typing import ( import filetype # DO NOT REMOVE; explicitly exposed from this location -from docling_core.types.doc import ( # noqa: F401 +from docling_core.types.doc import ( DocItem, DocItemLabel, DoclingDocument, @@ -28,8 +28,8 @@ from docling_core.types.doc import ( # noqa: F401 TableItem, TextItem, ) -from docling_core.types.doc.document import ListItem # noqa: F401 -from docling_core.types.legacy_doc.base import ( # noqa: F401 +from docling_core.types.doc.document import ListItem +from docling_core.types.legacy_doc.base import ( BaseText, Figure, GlmTableCell, @@ -37,14 +37,14 @@ from docling_core.types.legacy_doc.base import ( # noqa: F401 PageReference, Prov, Ref, + Table as DsSchemaTable, + TableCell, ) -from docling_core.types.legacy_doc.base import Table as DsSchemaTable # noqa: F401 -from docling_core.types.legacy_doc.base import TableCell # noqa: F401 -from docling_core.types.legacy_doc.document import ( # noqa: F401 +from docling_core.types.legacy_doc.document import ( CCSDocumentDescription as DsDocumentDescription, + CCSFileInfoObject as DsFileInfoObject, + ExportedCCSDocument as DsDocument, ) -from docling_core.types.legacy_doc.document import CCSFileInfoObject as DsFileInfoObject # noqa: F401 -from docling_core.types.legacy_doc.document import ExportedCCSDocument as DsDocument # noqa: F401 from docling_core.utils.file import resolve_source_to_stream from docling_core.utils.legacy import docling_document_to_legacy from pydantic import BaseModel diff --git a/docling/models/readingorder_model.py b/docling/models/readingorder_model.py index 287bcd67..43735360 100644 --- a/docling/models/readingorder_model.py +++ b/docling/models/readingorder_model.py @@ -98,7 +98,7 @@ class ReadingOrderModel: else: doc.add_text(parent=doc_item, label=c_label, text=c_text, prov=c_prov) - def _readingorder_elements_to_docling_doc( + def _readingorder_elements_to_docling_doc( # noqa: C901 self, conv_res: ConversionResult, ro_elements: List[ReadingOrderPageElement], diff --git a/docling/models/tesseract_ocr_model.py b/docling/models/tesseract_ocr_model.py index 892928d3..39f5b86c 100644 --- a/docling/models/tesseract_ocr_model.py +++ b/docling/models/tesseract_ocr_model.py @@ -65,7 +65,7 @@ class TesseractOcrModel(BaseOcrModel): raise ImportError(install_errmsg) try: tesseract_version = tesserocr.tesseract_version() - except: + except Exception: raise ImportError(install_errmsg) _, self._tesserocr_languages = tesserocr.get_languages() diff --git a/tests/test_document_picture_classifier.py b/tests/test_document_picture_classifier.py index 07a400a6..5dc5e926 100644 --- a/tests/test_document_picture_classifier.py +++ b/tests/test_document_picture_classifier.py @@ -47,7 +47,7 @@ def test_picture_classifier(): res = results[0] assert len(res.annotations) == 1 - assert type(res.annotations[0]) == PictureClassificationData + assert isinstance(res.annotations[0], PictureClassificationData) classification_data = res.annotations[0] assert classification_data.provenance == "DocumentPictureClassifier" assert len(classification_data.predicted_classes) == 16, ( @@ -63,7 +63,7 @@ def test_picture_classifier(): res = results[1] assert len(res.annotations) == 1 - assert type(res.annotations[0]) == PictureClassificationData + assert isinstance(res.annotations[0], PictureClassificationData) classification_data = res.annotations[0] assert classification_data.provenance == "DocumentPictureClassifier" assert len(classification_data.predicted_classes) == 16, (