mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 04:24:45 +00:00
runs 1 on linter issues
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
parent
0d261e52ed
commit
ad28271b4a
@ -168,7 +168,7 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
|
||||
)
|
||||
self.inline_texts = []
|
||||
|
||||
def _iterate_elements(
|
||||
def _iterate_elements( # noqa: C901
|
||||
self,
|
||||
element: marko.element.Element,
|
||||
depth: int,
|
||||
|
@ -493,7 +493,7 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentBacken
|
||||
),
|
||||
),
|
||||
)
|
||||
except:
|
||||
except Exception:
|
||||
_log.error("could not extract the image from excel sheets")
|
||||
|
||||
return doc
|
||||
|
@ -418,7 +418,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
||||
else prev_parent
|
||||
)
|
||||
|
||||
def _handle_text_elements(
|
||||
def _handle_text_elements( # noqa: C901
|
||||
self,
|
||||
element: BaseOxmlElement,
|
||||
docx_obj: DocxDocument,
|
||||
|
@ -606,7 +606,7 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
|
||||
)
|
||||
return
|
||||
|
||||
def _walk_linear(
|
||||
def _walk_linear( # noqa: C901
|
||||
self, doc: DoclingDocument, parent: NodeItem, node: etree._Element
|
||||
) -> str:
|
||||
skip_tags = ["term"]
|
||||
|
@ -15,6 +15,7 @@ import typer
|
||||
from docling_core.types.doc import ImageRefMode
|
||||
from docling_core.utils.file import resolve_source_to_path
|
||||
from pydantic import TypeAdapter
|
||||
from rich.console import Console
|
||||
|
||||
from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
|
||||
from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
|
||||
@ -54,7 +55,6 @@ warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic|
|
||||
warnings.filterwarnings(action="ignore", category=FutureWarning, module="easyocr")
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
from rich.console import Console
|
||||
|
||||
console = Console()
|
||||
err_console = Console(stderr=True)
|
||||
@ -233,7 +233,7 @@ def _split_list(raw: Optional[str]) -> Optional[List[str]]:
|
||||
|
||||
|
||||
@app.command(no_args_is_help=True)
|
||||
def convert(
|
||||
def convert( # noqa: C901
|
||||
input_sources: Annotated[
|
||||
List[str],
|
||||
typer.Argument(
|
||||
|
@ -12,7 +12,7 @@ from docling_core.types.doc import (
|
||||
from docling_core.types.doc.page import SegmentedPdfPage, TextCell
|
||||
|
||||
# DO NOT REMOVE; explicitly exposed from this location
|
||||
from docling_core.types.io import ( # noqa: F401
|
||||
from docling_core.types.io import (
|
||||
DocumentStream,
|
||||
)
|
||||
from PIL.Image import Image
|
||||
|
@ -19,7 +19,7 @@ from typing import (
|
||||
import filetype
|
||||
|
||||
# DO NOT REMOVE; explicitly exposed from this location
|
||||
from docling_core.types.doc import ( # noqa: F401
|
||||
from docling_core.types.doc import (
|
||||
DocItem,
|
||||
DocItemLabel,
|
||||
DoclingDocument,
|
||||
@ -28,8 +28,8 @@ from docling_core.types.doc import ( # noqa: F401
|
||||
TableItem,
|
||||
TextItem,
|
||||
)
|
||||
from docling_core.types.doc.document import ListItem # noqa: F401
|
||||
from docling_core.types.legacy_doc.base import ( # noqa: F401
|
||||
from docling_core.types.doc.document import ListItem
|
||||
from docling_core.types.legacy_doc.base import (
|
||||
BaseText,
|
||||
Figure,
|
||||
GlmTableCell,
|
||||
@ -37,14 +37,14 @@ from docling_core.types.legacy_doc.base import ( # noqa: F401
|
||||
PageReference,
|
||||
Prov,
|
||||
Ref,
|
||||
Table as DsSchemaTable,
|
||||
TableCell,
|
||||
)
|
||||
from docling_core.types.legacy_doc.base import Table as DsSchemaTable # noqa: F401
|
||||
from docling_core.types.legacy_doc.base import TableCell # noqa: F401
|
||||
from docling_core.types.legacy_doc.document import ( # noqa: F401
|
||||
from docling_core.types.legacy_doc.document import (
|
||||
CCSDocumentDescription as DsDocumentDescription,
|
||||
CCSFileInfoObject as DsFileInfoObject,
|
||||
ExportedCCSDocument as DsDocument,
|
||||
)
|
||||
from docling_core.types.legacy_doc.document import CCSFileInfoObject as DsFileInfoObject # noqa: F401
|
||||
from docling_core.types.legacy_doc.document import ExportedCCSDocument as DsDocument # noqa: F401
|
||||
from docling_core.utils.file import resolve_source_to_stream
|
||||
from docling_core.utils.legacy import docling_document_to_legacy
|
||||
from pydantic import BaseModel
|
||||
|
@ -98,7 +98,7 @@ class ReadingOrderModel:
|
||||
else:
|
||||
doc.add_text(parent=doc_item, label=c_label, text=c_text, prov=c_prov)
|
||||
|
||||
def _readingorder_elements_to_docling_doc(
|
||||
def _readingorder_elements_to_docling_doc( # noqa: C901
|
||||
self,
|
||||
conv_res: ConversionResult,
|
||||
ro_elements: List[ReadingOrderPageElement],
|
||||
|
@ -65,7 +65,7 @@ class TesseractOcrModel(BaseOcrModel):
|
||||
raise ImportError(install_errmsg)
|
||||
try:
|
||||
tesseract_version = tesserocr.tesseract_version()
|
||||
except:
|
||||
except Exception:
|
||||
raise ImportError(install_errmsg)
|
||||
|
||||
_, self._tesserocr_languages = tesserocr.get_languages()
|
||||
|
@ -47,7 +47,7 @@ def test_picture_classifier():
|
||||
|
||||
res = results[0]
|
||||
assert len(res.annotations) == 1
|
||||
assert type(res.annotations[0]) == PictureClassificationData
|
||||
assert isinstance(res.annotations[0], PictureClassificationData)
|
||||
classification_data = res.annotations[0]
|
||||
assert classification_data.provenance == "DocumentPictureClassifier"
|
||||
assert len(classification_data.predicted_classes) == 16, (
|
||||
@ -63,7 +63,7 @@ def test_picture_classifier():
|
||||
|
||||
res = results[1]
|
||||
assert len(res.annotations) == 1
|
||||
assert type(res.annotations[0]) == PictureClassificationData
|
||||
assert isinstance(res.annotations[0], PictureClassificationData)
|
||||
classification_data = res.annotations[0]
|
||||
assert classification_data.provenance == "DocumentPictureClassifier"
|
||||
assert len(classification_data.predicted_classes) == 16, (
|
||||
|
Loading…
Reference in New Issue
Block a user