mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 12:04:31 +00:00
need to fix ruff linter
Signed-off-by: Peter Staar <taa@zurich.ibm.com>
This commit is contained in:
parent
32ad65cb9f
commit
76501331d2
@ -8,8 +8,8 @@ from docling.backend.abstract_backend import AbstractDocumentBackend
|
|||||||
from docling.datamodel.base_models import InputFormat
|
from docling.datamodel.base_models import InputFormat
|
||||||
from docling.datamodel.document import InputDocument
|
from docling.datamodel.document import InputDocument
|
||||||
|
|
||||||
class WavDocumentBackend(AbstractDocumentBackend):
|
|
||||||
|
|
||||||
|
class WavDocumentBackend(AbstractDocumentBackend):
|
||||||
def __init__(self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path]):
|
def __init__(self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path]):
|
||||||
super().__init__(in_doc, path_or_stream)
|
super().__init__(in_doc, path_or_stream)
|
||||||
|
|
||||||
@ -29,4 +29,3 @@ class WavDocumentBackend(AbstractDocumentBackend):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def supported_formats(cls) -> set[InputFormat]:
|
def supported_formats(cls) -> set[InputFormat]:
|
||||||
return {InputFormat.WAV}
|
return {InputFormat.WAV}
|
||||||
|
|
||||||
|
@ -579,7 +579,6 @@ def convert( # noqa: C901
|
|||||||
)
|
)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
||||||
_log.error(f"Did not find the correct pipeline: {pipeline}")
|
_log.error(f"Did not find the correct pipeline: {pipeline}")
|
||||||
|
|
||||||
if artifacts_path is not None:
|
if artifacts_path is not None:
|
||||||
|
@ -51,6 +51,7 @@ class InputFormat(str, Enum):
|
|||||||
# Audio
|
# Audio
|
||||||
WAV = "wav"
|
WAV = "wav"
|
||||||
|
|
||||||
|
|
||||||
class OutputFormat(str, Enum):
|
class OutputFormat(str, Enum):
|
||||||
MARKDOWN = "md"
|
MARKDOWN = "md"
|
||||||
JSON = "json"
|
JSON = "json"
|
||||||
@ -105,8 +106,7 @@ FormatToMimeType: Dict[InputFormat, List[str]] = {
|
|||||||
],
|
],
|
||||||
InputFormat.XML_USPTO: ["application/xml", "text/plain"],
|
InputFormat.XML_USPTO: ["application/xml", "text/plain"],
|
||||||
InputFormat.JSON_DOCLING: ["application/json"],
|
InputFormat.JSON_DOCLING: ["application/json"],
|
||||||
|
# Audio
|
||||||
# Audio
|
|
||||||
InputFormat.WAV: ["audio/wav", "audio/x-wav"],
|
InputFormat.WAV: ["audio/wav", "audio/x-wav"],
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -165,6 +165,7 @@ class LayoutPrediction(BaseModel):
|
|||||||
class VlmPrediction(BaseModel):
|
class VlmPrediction(BaseModel):
|
||||||
text: str = ""
|
text: str = ""
|
||||||
|
|
||||||
|
|
||||||
class AsrPrediction(BaseModel):
|
class AsrPrediction(BaseModel):
|
||||||
text: str = ""
|
text: str = ""
|
||||||
|
|
||||||
|
@ -257,10 +257,12 @@ class BaseVlmOptions(BaseModel):
|
|||||||
kind: str
|
kind: str
|
||||||
prompt: str
|
prompt: str
|
||||||
|
|
||||||
|
|
||||||
class BaseAsrOptions(BaseModel):
|
class BaseAsrOptions(BaseModel):
|
||||||
kind: str
|
kind: str
|
||||||
prompt: str
|
prompt: str
|
||||||
|
|
||||||
|
|
||||||
class ResponseFormat(str, Enum):
|
class ResponseFormat(str, Enum):
|
||||||
DOCTAGS = "doctags"
|
DOCTAGS = "doctags"
|
||||||
MARKDOWN = "markdown"
|
MARKDOWN = "markdown"
|
||||||
@ -274,6 +276,7 @@ class InferenceFramework(str, Enum):
|
|||||||
# Audio
|
# Audio
|
||||||
ASR_NEMO = "asr_nemo"
|
ASR_NEMO = "asr_nemo"
|
||||||
|
|
||||||
|
|
||||||
class HuggingFaceVlmOptions(BaseVlmOptions):
|
class HuggingFaceVlmOptions(BaseVlmOptions):
|
||||||
kind: Literal["hf_model_options"] = "hf_model_options"
|
kind: Literal["hf_model_options"] = "hf_model_options"
|
||||||
|
|
||||||
@ -289,6 +292,7 @@ class HuggingFaceVlmOptions(BaseVlmOptions):
|
|||||||
def repo_cache_folder(self) -> str:
|
def repo_cache_folder(self) -> str:
|
||||||
return self.repo_id.replace("/", "--")
|
return self.repo_id.replace("/", "--")
|
||||||
|
|
||||||
|
|
||||||
class HuggingFaceAsrOptions(BaseVlmOptions):
|
class HuggingFaceAsrOptions(BaseVlmOptions):
|
||||||
kind: Literal["hf_model_options"] = "hf_model_options"
|
kind: Literal["hf_model_options"] = "hf_model_options"
|
||||||
|
|
||||||
@ -304,6 +308,7 @@ class HuggingFaceAsrOptions(BaseVlmOptions):
|
|||||||
def repo_cache_folder(self) -> str:
|
def repo_cache_folder(self) -> str:
|
||||||
return self.repo_id.replace("/", "--")
|
return self.repo_id.replace("/", "--")
|
||||||
|
|
||||||
|
|
||||||
class ApiVlmOptions(BaseVlmOptions):
|
class ApiVlmOptions(BaseVlmOptions):
|
||||||
kind: Literal["api_model_options"] = "api_model_options"
|
kind: Literal["api_model_options"] = "api_model_options"
|
||||||
|
|
||||||
@ -415,10 +420,10 @@ class VlmPipelineOptions(PaginatedPipelineOptions):
|
|||||||
smoldocling_vlm_conversion_options
|
smoldocling_vlm_conversion_options
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class AsrPipelineOptions(PaginatedPipelineOptions):
|
class AsrPipelineOptions(PaginatedPipelineOptions):
|
||||||
asr_options: Union[HuggingFaceAsrOptions] = (
|
asr_options: Union[HuggingFaceAsrOptions] = asr_nemo_conversion_options
|
||||||
asr_nemo_conversion_options
|
|
||||||
)
|
|
||||||
|
|
||||||
class PdfPipelineOptions(PaginatedPipelineOptions):
|
class PdfPipelineOptions(PaginatedPipelineOptions):
|
||||||
"""Options for the PDF pipeline."""
|
"""Options for the PDF pipeline."""
|
||||||
|
@ -19,9 +19,9 @@ from docling.backend.md_backend import MarkdownDocumentBackend
|
|||||||
from docling.backend.msexcel_backend import MsExcelDocumentBackend
|
from docling.backend.msexcel_backend import MsExcelDocumentBackend
|
||||||
from docling.backend.mspowerpoint_backend import MsPowerpointDocumentBackend
|
from docling.backend.mspowerpoint_backend import MsPowerpointDocumentBackend
|
||||||
from docling.backend.msword_backend import MsWordDocumentBackend
|
from docling.backend.msword_backend import MsWordDocumentBackend
|
||||||
|
from docling.backend.wav_backend import WavDocumentBackend
|
||||||
from docling.backend.xml.jats_backend import JatsDocumentBackend
|
from docling.backend.xml.jats_backend import JatsDocumentBackend
|
||||||
from docling.backend.xml.uspto_backend import PatentUsptoDocumentBackend
|
from docling.backend.xml.uspto_backend import PatentUsptoDocumentBackend
|
||||||
from docling.backend.wav_backend import WavDocumentBackend
|
|
||||||
from docling.datamodel.base_models import (
|
from docling.datamodel.base_models import (
|
||||||
ConversionStatus,
|
ConversionStatus,
|
||||||
DoclingComponentType,
|
DoclingComponentType,
|
||||||
@ -34,7 +34,7 @@ from docling.datamodel.document import (
|
|||||||
InputDocument,
|
InputDocument,
|
||||||
_DocumentConversionInput,
|
_DocumentConversionInput,
|
||||||
)
|
)
|
||||||
from docling.datamodel.pipeline_options import PipelineOptions, AsrPipelineOptions
|
from docling.datamodel.pipeline_options import AsrPipelineOptions, PipelineOptions
|
||||||
from docling.datamodel.settings import (
|
from docling.datamodel.settings import (
|
||||||
DEFAULT_PAGE_RANGE,
|
DEFAULT_PAGE_RANGE,
|
||||||
DocumentLimits,
|
DocumentLimits,
|
||||||
@ -42,10 +42,10 @@ from docling.datamodel.settings import (
|
|||||||
settings,
|
settings,
|
||||||
)
|
)
|
||||||
from docling.exceptions import ConversionError
|
from docling.exceptions import ConversionError
|
||||||
|
from docling.pipeline.asr_pipeline import AsrPipeline
|
||||||
from docling.pipeline.base_pipeline import BasePipeline
|
from docling.pipeline.base_pipeline import BasePipeline
|
||||||
from docling.pipeline.simple_pipeline import SimplePipeline
|
from docling.pipeline.simple_pipeline import SimplePipeline
|
||||||
from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline
|
from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline
|
||||||
from docling.pipeline.asr_pipeline import AsrPipeline
|
|
||||||
from docling.utils.utils import chunkify
|
from docling.utils.utils import chunkify
|
||||||
|
|
||||||
_log = logging.getLogger(__name__)
|
_log = logging.getLogger(__name__)
|
||||||
@ -119,9 +119,11 @@ class PdfFormatOption(FormatOption):
|
|||||||
pipeline_cls: Type = StandardPdfPipeline
|
pipeline_cls: Type = StandardPdfPipeline
|
||||||
backend: Type[AbstractDocumentBackend] = DoclingParseV4DocumentBackend
|
backend: Type[AbstractDocumentBackend] = DoclingParseV4DocumentBackend
|
||||||
|
|
||||||
|
|
||||||
class AsrFormatOption(FormatOption):
|
class AsrFormatOption(FormatOption):
|
||||||
pipeline_cls: Type = AsrPipeline
|
pipeline_cls: Type = AsrPipeline
|
||||||
|
|
||||||
|
|
||||||
def _get_default_option(format: InputFormat) -> FormatOption:
|
def _get_default_option(format: InputFormat) -> FormatOption:
|
||||||
format_to_default_options = {
|
format_to_default_options = {
|
||||||
InputFormat.CSV: FormatOption(
|
InputFormat.CSV: FormatOption(
|
||||||
|
@ -10,13 +10,13 @@ from docling.datamodel.pipeline_options import (
|
|||||||
AcceleratorOptions,
|
AcceleratorOptions,
|
||||||
HuggingFaceAsrOptions,
|
HuggingFaceAsrOptions,
|
||||||
)
|
)
|
||||||
|
|
||||||
from docling.models.base_model import BasePageModel
|
from docling.models.base_model import BasePageModel
|
||||||
from docling.utils.accelerator_utils import decide_device
|
from docling.utils.accelerator_utils import decide_device
|
||||||
from docling.utils.profiling import TimeRecorder
|
from docling.utils.profiling import TimeRecorder
|
||||||
|
|
||||||
_log = logging.getLogger(__name__)
|
_log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class AsrNemoModel(BasePageModel):
|
class AsrNemoModel(BasePageModel):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
@ -45,7 +45,6 @@ class AsrNemoModel(BasePageModel):
|
|||||||
elif (artifacts_path / repo_cache_folder).exists():
|
elif (artifacts_path / repo_cache_folder).exists():
|
||||||
artifacts_path = artifacts_path / repo_cache_folder
|
artifacts_path = artifacts_path / repo_cache_folder
|
||||||
|
|
||||||
self.model = nemo_asr.models.ASRModel.from_pretrained("nvidia/parakeet-tdt-0.6b-v2")
|
self.model = nemo_asr.models.ASRModel.from_pretrained(
|
||||||
|
"nvidia/parakeet-tdt-0.6b-v2"
|
||||||
|
)
|
||||||
|
|
||||||
|
@ -9,18 +9,16 @@ from docling.backend.abstract_backend import (
|
|||||||
)
|
)
|
||||||
from docling.datamodel.base_models import ConversionStatus
|
from docling.datamodel.base_models import ConversionStatus
|
||||||
from docling.datamodel.document import ConversionResult
|
from docling.datamodel.document import ConversionResult
|
||||||
from docling.datamodel.pipeline_options import PipelineOptions
|
|
||||||
from docling.pipeline.base_pipeline import BasePipeline
|
|
||||||
from docling.utils.profiling import ProfilingScope, TimeRecorder
|
|
||||||
|
|
||||||
from docling.datamodel.pipeline_options import (
|
from docling.datamodel.pipeline_options import (
|
||||||
|
AsrPipelineOptions,
|
||||||
HuggingFaceAsrOptions,
|
HuggingFaceAsrOptions,
|
||||||
InferenceFramework,
|
InferenceFramework,
|
||||||
|
PipelineOptions,
|
||||||
ResponseFormat,
|
ResponseFormat,
|
||||||
AsrPipelineOptions,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
from docling.models.hf_asr_models.asr_nemo import AsrNemoModel
|
from docling.models.hf_asr_models.asr_nemo import AsrNemoModel
|
||||||
|
from docling.pipeline.base_pipeline import BasePipeline
|
||||||
|
from docling.utils.profiling import ProfilingScope, TimeRecorder
|
||||||
|
|
||||||
_log = logging.getLogger(__name__)
|
_log = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -59,7 +57,7 @@ class AsrPipeline(BasePipeline):
|
|||||||
_log.error(f"{asr_options.inference_framework} is not supported")
|
_log.error(f"{asr_options.inference_framework} is not supported")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
_log.error(f"ASR is not supported")
|
_log.error("ASR is not supported")
|
||||||
|
|
||||||
def _build_document(self, conv_res: ConversionResult) -> ConversionResult:
|
def _build_document(self, conv_res: ConversionResult) -> ConversionResult:
|
||||||
pass
|
pass
|
||||||
|
Loading…
Reference in New Issue
Block a user