mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 12:34:22 +00:00
runs 1 on linter issues
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
parent
0d261e52ed
commit
ad28271b4a
@ -168,7 +168,7 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
)
|
)
|
||||||
self.inline_texts = []
|
self.inline_texts = []
|
||||||
|
|
||||||
def _iterate_elements(
|
def _iterate_elements( # noqa: C901
|
||||||
self,
|
self,
|
||||||
element: marko.element.Element,
|
element: marko.element.Element,
|
||||||
depth: int,
|
depth: int,
|
||||||
|
@ -493,7 +493,7 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentBacken
|
|||||||
),
|
),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
except:
|
except Exception:
|
||||||
_log.error("could not extract the image from excel sheets")
|
_log.error("could not extract the image from excel sheets")
|
||||||
|
|
||||||
return doc
|
return doc
|
||||||
|
@ -418,7 +418,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
else prev_parent
|
else prev_parent
|
||||||
)
|
)
|
||||||
|
|
||||||
def _handle_text_elements(
|
def _handle_text_elements( # noqa: C901
|
||||||
self,
|
self,
|
||||||
element: BaseOxmlElement,
|
element: BaseOxmlElement,
|
||||||
docx_obj: DocxDocument,
|
docx_obj: DocxDocument,
|
||||||
|
@ -606,7 +606,7 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
def _walk_linear(
|
def _walk_linear( # noqa: C901
|
||||||
self, doc: DoclingDocument, parent: NodeItem, node: etree._Element
|
self, doc: DoclingDocument, parent: NodeItem, node: etree._Element
|
||||||
) -> str:
|
) -> str:
|
||||||
skip_tags = ["term"]
|
skip_tags = ["term"]
|
||||||
|
@ -15,6 +15,7 @@ import typer
|
|||||||
from docling_core.types.doc import ImageRefMode
|
from docling_core.types.doc import ImageRefMode
|
||||||
from docling_core.utils.file import resolve_source_to_path
|
from docling_core.utils.file import resolve_source_to_path
|
||||||
from pydantic import TypeAdapter
|
from pydantic import TypeAdapter
|
||||||
|
from rich.console import Console
|
||||||
|
|
||||||
from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
|
from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
|
||||||
from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
|
from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
|
||||||
@ -54,7 +55,6 @@ warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic|
|
|||||||
warnings.filterwarnings(action="ignore", category=FutureWarning, module="easyocr")
|
warnings.filterwarnings(action="ignore", category=FutureWarning, module="easyocr")
|
||||||
|
|
||||||
_log = logging.getLogger(__name__)
|
_log = logging.getLogger(__name__)
|
||||||
from rich.console import Console
|
|
||||||
|
|
||||||
console = Console()
|
console = Console()
|
||||||
err_console = Console(stderr=True)
|
err_console = Console(stderr=True)
|
||||||
@ -233,7 +233,7 @@ def _split_list(raw: Optional[str]) -> Optional[List[str]]:
|
|||||||
|
|
||||||
|
|
||||||
@app.command(no_args_is_help=True)
|
@app.command(no_args_is_help=True)
|
||||||
def convert(
|
def convert( # noqa: C901
|
||||||
input_sources: Annotated[
|
input_sources: Annotated[
|
||||||
List[str],
|
List[str],
|
||||||
typer.Argument(
|
typer.Argument(
|
||||||
|
@ -12,7 +12,7 @@ from docling_core.types.doc import (
|
|||||||
from docling_core.types.doc.page import SegmentedPdfPage, TextCell
|
from docling_core.types.doc.page import SegmentedPdfPage, TextCell
|
||||||
|
|
||||||
# DO NOT REMOVE; explicitly exposed from this location
|
# DO NOT REMOVE; explicitly exposed from this location
|
||||||
from docling_core.types.io import ( # noqa: F401
|
from docling_core.types.io import (
|
||||||
DocumentStream,
|
DocumentStream,
|
||||||
)
|
)
|
||||||
from PIL.Image import Image
|
from PIL.Image import Image
|
||||||
|
@ -19,7 +19,7 @@ from typing import (
|
|||||||
import filetype
|
import filetype
|
||||||
|
|
||||||
# DO NOT REMOVE; explicitly exposed from this location
|
# DO NOT REMOVE; explicitly exposed from this location
|
||||||
from docling_core.types.doc import ( # noqa: F401
|
from docling_core.types.doc import (
|
||||||
DocItem,
|
DocItem,
|
||||||
DocItemLabel,
|
DocItemLabel,
|
||||||
DoclingDocument,
|
DoclingDocument,
|
||||||
@ -28,8 +28,8 @@ from docling_core.types.doc import ( # noqa: F401
|
|||||||
TableItem,
|
TableItem,
|
||||||
TextItem,
|
TextItem,
|
||||||
)
|
)
|
||||||
from docling_core.types.doc.document import ListItem # noqa: F401
|
from docling_core.types.doc.document import ListItem
|
||||||
from docling_core.types.legacy_doc.base import ( # noqa: F401
|
from docling_core.types.legacy_doc.base import (
|
||||||
BaseText,
|
BaseText,
|
||||||
Figure,
|
Figure,
|
||||||
GlmTableCell,
|
GlmTableCell,
|
||||||
@ -37,14 +37,14 @@ from docling_core.types.legacy_doc.base import ( # noqa: F401
|
|||||||
PageReference,
|
PageReference,
|
||||||
Prov,
|
Prov,
|
||||||
Ref,
|
Ref,
|
||||||
|
Table as DsSchemaTable,
|
||||||
|
TableCell,
|
||||||
)
|
)
|
||||||
from docling_core.types.legacy_doc.base import Table as DsSchemaTable # noqa: F401
|
from docling_core.types.legacy_doc.document import (
|
||||||
from docling_core.types.legacy_doc.base import TableCell # noqa: F401
|
|
||||||
from docling_core.types.legacy_doc.document import ( # noqa: F401
|
|
||||||
CCSDocumentDescription as DsDocumentDescription,
|
CCSDocumentDescription as DsDocumentDescription,
|
||||||
|
CCSFileInfoObject as DsFileInfoObject,
|
||||||
|
ExportedCCSDocument as DsDocument,
|
||||||
)
|
)
|
||||||
from docling_core.types.legacy_doc.document import CCSFileInfoObject as DsFileInfoObject # noqa: F401
|
|
||||||
from docling_core.types.legacy_doc.document import ExportedCCSDocument as DsDocument # noqa: F401
|
|
||||||
from docling_core.utils.file import resolve_source_to_stream
|
from docling_core.utils.file import resolve_source_to_stream
|
||||||
from docling_core.utils.legacy import docling_document_to_legacy
|
from docling_core.utils.legacy import docling_document_to_legacy
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
@ -98,7 +98,7 @@ class ReadingOrderModel:
|
|||||||
else:
|
else:
|
||||||
doc.add_text(parent=doc_item, label=c_label, text=c_text, prov=c_prov)
|
doc.add_text(parent=doc_item, label=c_label, text=c_text, prov=c_prov)
|
||||||
|
|
||||||
def _readingorder_elements_to_docling_doc(
|
def _readingorder_elements_to_docling_doc( # noqa: C901
|
||||||
self,
|
self,
|
||||||
conv_res: ConversionResult,
|
conv_res: ConversionResult,
|
||||||
ro_elements: List[ReadingOrderPageElement],
|
ro_elements: List[ReadingOrderPageElement],
|
||||||
|
@ -65,7 +65,7 @@ class TesseractOcrModel(BaseOcrModel):
|
|||||||
raise ImportError(install_errmsg)
|
raise ImportError(install_errmsg)
|
||||||
try:
|
try:
|
||||||
tesseract_version = tesserocr.tesseract_version()
|
tesseract_version = tesserocr.tesseract_version()
|
||||||
except:
|
except Exception:
|
||||||
raise ImportError(install_errmsg)
|
raise ImportError(install_errmsg)
|
||||||
|
|
||||||
_, self._tesserocr_languages = tesserocr.get_languages()
|
_, self._tesserocr_languages = tesserocr.get_languages()
|
||||||
|
@ -47,7 +47,7 @@ def test_picture_classifier():
|
|||||||
|
|
||||||
res = results[0]
|
res = results[0]
|
||||||
assert len(res.annotations) == 1
|
assert len(res.annotations) == 1
|
||||||
assert type(res.annotations[0]) == PictureClassificationData
|
assert isinstance(res.annotations[0], PictureClassificationData)
|
||||||
classification_data = res.annotations[0]
|
classification_data = res.annotations[0]
|
||||||
assert classification_data.provenance == "DocumentPictureClassifier"
|
assert classification_data.provenance == "DocumentPictureClassifier"
|
||||||
assert len(classification_data.predicted_classes) == 16, (
|
assert len(classification_data.predicted_classes) == 16, (
|
||||||
@ -63,7 +63,7 @@ def test_picture_classifier():
|
|||||||
|
|
||||||
res = results[1]
|
res = results[1]
|
||||||
assert len(res.annotations) == 1
|
assert len(res.annotations) == 1
|
||||||
assert type(res.annotations[0]) == PictureClassificationData
|
assert isinstance(res.annotations[0], PictureClassificationData)
|
||||||
classification_data = res.annotations[0]
|
classification_data = res.annotations[0]
|
||||||
assert classification_data.provenance == "DocumentPictureClassifier"
|
assert classification_data.provenance == "DocumentPictureClassifier"
|
||||||
assert len(classification_data.predicted_classes) == 16, (
|
assert len(classification_data.predicted_classes) == 16, (
|
||||||
|
Loading…
Reference in New Issue
Block a user