From 76501331d2477490489d1edfed25fa894a5abbff Mon Sep 17 00:00:00 2001 From: Peter Staar Date: Mon, 12 May 2025 07:34:24 +0200 Subject: [PATCH] need to fix ruff linter Signed-off-by: Peter Staar --- docling/backend/wav_backend.py | 5 ++--- docling/cli/main.py | 5 ++--- docling/datamodel/base_models.py | 9 +++++---- docling/datamodel/document.py | 14 +++++++------- docling/datamodel/pipeline_options.py | 13 +++++++++---- docling/document_converter.py | 12 +++++++----- docling/models/hf_asr_models/asr_nemo.py | 11 +++++------ docling/pipeline/asr_pipeline.py | 18 ++++++++---------- 8 files changed, 45 insertions(+), 42 deletions(-) diff --git a/docling/backend/wav_backend.py b/docling/backend/wav_backend.py index 931ed053..1ac86a28 100644 --- a/docling/backend/wav_backend.py +++ b/docling/backend/wav_backend.py @@ -8,11 +8,11 @@ from docling.backend.abstract_backend import AbstractDocumentBackend from docling.datamodel.base_models import InputFormat from docling.datamodel.document import InputDocument -class WavDocumentBackend(AbstractDocumentBackend): +class WavDocumentBackend(AbstractDocumentBackend): def __init__(self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path]): super().__init__(in_doc, path_or_stream) - + def is_valid(self) -> bool: return True @@ -29,4 +29,3 @@ class WavDocumentBackend(AbstractDocumentBackend): @classmethod def supported_formats(cls) -> set[InputFormat]: return {InputFormat.WAV} - diff --git a/docling/cli/main.py b/docling/cli/main.py index 1a547eae..c052affe 100644 --- a/docling/cli/main.py +++ b/docling/cli/main.py @@ -577,11 +577,10 @@ def convert( # noqa: C901 asr_format_option = AsrFormatOption( pipeline_cls=AsrPipeline, pipeline_options=pipeline_options ) - + else: - _log.error(f"Did not find the correct pipeline: {pipeline}") - + if artifacts_path is not None: pipeline_options.artifacts_path = artifacts_path diff --git a/docling/datamodel/base_models.py b/docling/datamodel/base_models.py index d7ff178e..65f4121e 100644 --- a/docling/datamodel/base_models.py +++ b/docling/datamodel/base_models.py @@ -34,7 +34,7 @@ class ConversionStatus(str, Enum): class InputFormat(str, Enum): """A document format supported by document backend parsers.""" - # Documents + # Documents DOCX = "docx" PPTX = "pptx" HTML = "html" @@ -51,6 +51,7 @@ class InputFormat(str, Enum): # Audio WAV = "wav" + class OutputFormat(str, Enum): MARKDOWN = "md" JSON = "json" @@ -105,8 +106,7 @@ FormatToMimeType: Dict[InputFormat, List[str]] = { ], InputFormat.XML_USPTO: ["application/xml", "text/plain"], InputFormat.JSON_DOCLING: ["application/json"], - -# Audio + # Audio InputFormat.WAV: ["audio/wav", "audio/x-wav"], } @@ -165,8 +165,9 @@ class LayoutPrediction(BaseModel): class VlmPrediction(BaseModel): text: str = "" + class AsrPrediction(BaseModel): - text: str = "" + text: str = "" class ContainerElement( diff --git a/docling/datamodel/document.py b/docling/datamodel/document.py index dfe2f501..33cc0b0d 100644 --- a/docling/datamodel/document.py +++ b/docling/datamodel/document.py @@ -279,7 +279,7 @@ class _DocumentConversionInput(BaseModel): if isinstance(obj, Path): mime = filetype.guess_mime(str(obj)) print(mime) - + if mime is None: ext = obj.suffix[1:] mime = _DocumentConversionInput._mime_from_extension(ext) @@ -292,8 +292,8 @@ class _DocumentConversionInput(BaseModel): elif obj.suffixes[-1].lower() == ".docx": mime = "application/vnd.openxmlformats-officedocument.wordprocessingml.document" elif obj.suffixes[-1].lower() == ".pptx": - mime = "application/vnd.openxmlformats-officedocument.presentationml.presentation" - + mime = "application/vnd.openxmlformats-officedocument.presentationml.presentation" + elif isinstance(obj, DocumentStream): content = obj.stream.read(8192) obj.stream.seek(0) @@ -313,11 +313,11 @@ class _DocumentConversionInput(BaseModel): mime = "application/vnd.openxmlformats-officedocument.wordprocessingml.document" elif objname.endswith(".pptx"): mime = "application/vnd.openxmlformats-officedocument.presentationml.presentation" - + mime = mime or _DocumentConversionInput._detect_html_xhtml(content) mime = mime or _DocumentConversionInput._detect_csv(content) mime = mime or "text/plain" - + formats = MimeTypeToFormat.get(mime, []) if formats: if len(formats) == 1 and mime not in ("text/plain"): @@ -367,7 +367,7 @@ class _DocumentConversionInput(BaseModel): @staticmethod def _mime_from_extension(ext): print("ext: ", ext) - + mime = None if ext in FormatToExtensions[InputFormat.ASCIIDOC]: mime = FormatToMimeType[InputFormat.ASCIIDOC][0] @@ -382,7 +382,7 @@ class _DocumentConversionInput(BaseModel): elif ext in FormatToExtensions[InputFormat.PDF]: mime = FormatToMimeType[InputFormat.PDF][0] elif ext in FormatToExtensions[InputFormat.WAV]: - mime = FormatToMimeType[InputFormat.WAV][0] + mime = FormatToMimeType[InputFormat.WAV][0] return mime @staticmethod diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py index 9f8532d5..91e47af4 100644 --- a/docling/datamodel/pipeline_options.py +++ b/docling/datamodel/pipeline_options.py @@ -257,10 +257,12 @@ class BaseVlmOptions(BaseModel): kind: str prompt: str + class BaseAsrOptions(BaseModel): kind: str prompt: str + class ResponseFormat(str, Enum): DOCTAGS = "doctags" MARKDOWN = "markdown" @@ -274,6 +276,7 @@ class InferenceFramework(str, Enum): # Audio ASR_NEMO = "asr_nemo" + class HuggingFaceVlmOptions(BaseVlmOptions): kind: Literal["hf_model_options"] = "hf_model_options" @@ -289,6 +292,7 @@ class HuggingFaceVlmOptions(BaseVlmOptions): def repo_cache_folder(self) -> str: return self.repo_id.replace("/", "--") + class HuggingFaceAsrOptions(BaseVlmOptions): kind: Literal["hf_model_options"] = "hf_model_options" @@ -304,6 +308,7 @@ class HuggingFaceAsrOptions(BaseVlmOptions): def repo_cache_folder(self) -> str: return self.repo_id.replace("/", "--") + class ApiVlmOptions(BaseVlmOptions): kind: Literal["api_model_options"] = "api_model_options" @@ -415,11 +420,11 @@ class VlmPipelineOptions(PaginatedPipelineOptions): smoldocling_vlm_conversion_options ) + class AsrPipelineOptions(PaginatedPipelineOptions): - asr_options: Union[HuggingFaceAsrOptions] = ( - asr_nemo_conversion_options - ) - + asr_options: Union[HuggingFaceAsrOptions] = asr_nemo_conversion_options + + class PdfPipelineOptions(PaginatedPipelineOptions): """Options for the PDF pipeline.""" diff --git a/docling/document_converter.py b/docling/document_converter.py index c3ac07f4..73766a18 100644 --- a/docling/document_converter.py +++ b/docling/document_converter.py @@ -19,9 +19,9 @@ from docling.backend.md_backend import MarkdownDocumentBackend from docling.backend.msexcel_backend import MsExcelDocumentBackend from docling.backend.mspowerpoint_backend import MsPowerpointDocumentBackend from docling.backend.msword_backend import MsWordDocumentBackend +from docling.backend.wav_backend import WavDocumentBackend from docling.backend.xml.jats_backend import JatsDocumentBackend from docling.backend.xml.uspto_backend import PatentUsptoDocumentBackend -from docling.backend.wav_backend import WavDocumentBackend from docling.datamodel.base_models import ( ConversionStatus, DoclingComponentType, @@ -34,7 +34,7 @@ from docling.datamodel.document import ( InputDocument, _DocumentConversionInput, ) -from docling.datamodel.pipeline_options import PipelineOptions, AsrPipelineOptions +from docling.datamodel.pipeline_options import AsrPipelineOptions, PipelineOptions from docling.datamodel.settings import ( DEFAULT_PAGE_RANGE, DocumentLimits, @@ -42,10 +42,10 @@ from docling.datamodel.settings import ( settings, ) from docling.exceptions import ConversionError +from docling.pipeline.asr_pipeline import AsrPipeline from docling.pipeline.base_pipeline import BasePipeline from docling.pipeline.simple_pipeline import SimplePipeline from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline -from docling.pipeline.asr_pipeline import AsrPipeline from docling.utils.utils import chunkify _log = logging.getLogger(__name__) @@ -119,9 +119,11 @@ class PdfFormatOption(FormatOption): pipeline_cls: Type = StandardPdfPipeline backend: Type[AbstractDocumentBackend] = DoclingParseV4DocumentBackend + class AsrFormatOption(FormatOption): pipeline_cls: Type = AsrPipeline - + + def _get_default_option(format: InputFormat) -> FormatOption: format_to_default_options = { InputFormat.CSV: FormatOption( @@ -300,7 +302,7 @@ class DocumentConverter: fopt = self.format_to_options.get(doc_format) print(self.format_to_options) - + if fopt is None or fopt.pipeline_options is None: _log.warning(f"fopt ({fopt}) or its options are None for {doc_format}") return None diff --git a/docling/models/hf_asr_models/asr_nemo.py b/docling/models/hf_asr_models/asr_nemo.py index 4add5341..25ad522a 100644 --- a/docling/models/hf_asr_models/asr_nemo.py +++ b/docling/models/hf_asr_models/asr_nemo.py @@ -10,13 +10,13 @@ from docling.datamodel.pipeline_options import ( AcceleratorOptions, HuggingFaceAsrOptions, ) - from docling.models.base_model import BasePageModel from docling.utils.accelerator_utils import decide_device from docling.utils.profiling import TimeRecorder _log = logging.getLogger(__name__) + class AsrNemoModel(BasePageModel): def __init__( self, @@ -26,7 +26,7 @@ class AsrNemoModel(BasePageModel): asr_options: HuggingFaceAsrOptions, ): self.enabled = enabled - + self.asr_options = asr_options if self.enabled: @@ -45,7 +45,6 @@ class AsrNemoModel(BasePageModel): elif (artifacts_path / repo_cache_folder).exists(): artifacts_path = artifacts_path / repo_cache_folder - self.model = nemo_asr.models.ASRModel.from_pretrained("nvidia/parakeet-tdt-0.6b-v2") - - - + self.model = nemo_asr.models.ASRModel.from_pretrained( + "nvidia/parakeet-tdt-0.6b-v2" + ) diff --git a/docling/pipeline/asr_pipeline.py b/docling/pipeline/asr_pipeline.py index 653950b3..165b1331 100644 --- a/docling/pipeline/asr_pipeline.py +++ b/docling/pipeline/asr_pipeline.py @@ -9,18 +9,16 @@ from docling.backend.abstract_backend import ( ) from docling.datamodel.base_models import ConversionStatus from docling.datamodel.document import ConversionResult -from docling.datamodel.pipeline_options import PipelineOptions -from docling.pipeline.base_pipeline import BasePipeline -from docling.utils.profiling import ProfilingScope, TimeRecorder - from docling.datamodel.pipeline_options import ( + AsrPipelineOptions, HuggingFaceAsrOptions, InferenceFramework, + PipelineOptions, ResponseFormat, - AsrPipelineOptions, ) - from docling.models.hf_asr_models.asr_nemo import AsrNemoModel +from docling.pipeline.base_pipeline import BasePipeline +from docling.utils.profiling import ProfilingScope, TimeRecorder _log = logging.getLogger(__name__) @@ -44,7 +42,7 @@ class AsrPipeline(BasePipeline): "When defined, it must point to a folder containing all models required by the pipeline." ) - if isinstance(self.pipeline_options.asr_options, HuggingFaceAsrOptions): + if isinstance(self.pipeline_options.asr_options, HuggingFaceAsrOptions): asr_options = cast(HuggingFaceAsrOptions, self.pipeline_options.asr_options) if asr_options.inference_framework == InferenceFramework.ASR_NENO: self.build_pipe = [ @@ -59,10 +57,10 @@ class AsrPipeline(BasePipeline): _log.error(f"{asr_options.inference_framework} is not supported") else: - _log.error(f"ASR is not supported") + _log.error("ASR is not supported") def _build_document(self, conv_res: ConversionResult) -> ConversionResult: - pass + pass def _assemble_document(self, conv_res: ConversionResult) -> ConversionResult: return conv_res @@ -79,4 +77,4 @@ class AsrPipeline(BasePipeline): @classmethod def is_backend_supported(cls, backend: AbstractDocumentBackend): - pass + pass