fixing styling issues

Signed-off-by: Swaymaw <swaymaw@gmail.com>
This commit is contained in:
Swaymaw 2024-11-22 14:38:13 +05:30
parent 86d9a2ca00
commit a00940f918
6 changed files with 17 additions and 12 deletions

View File

@ -26,11 +26,11 @@ from docling.datamodel.pipeline_options import (
EasyOcrOptions, EasyOcrOptions,
OcrMacOptions, OcrMacOptions,
OcrOptions, OcrOptions,
PaddleOcrOptions,
PdfPipelineOptions, PdfPipelineOptions,
TableFormerMode, TableFormerMode,
TesseractCliOcrOptions, TesseractCliOcrOptions,
TesseractOcrOptions, TesseractOcrOptions,
PaddleOcrOptions
) )
from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption

View File

@ -1,6 +1,6 @@
from enum import Enum from enum import Enum
from pathlib import Path from pathlib import Path
from typing import List, Literal, Optional, Union, Annotated from typing import Annotated, List, Literal, Optional, Union
from pydantic import BaseModel, ConfigDict, Field from pydantic import BaseModel, ConfigDict, Field
@ -41,11 +41,12 @@ class EasyOcrOptions(OcrOptions):
protected_namespaces=(), protected_namespaces=(),
) )
class PaddleOcrOptions(OcrOptions): class PaddleOcrOptions(OcrOptions):
kind: Literal["paddleocr"] = "paddleocr" kind: Literal["paddleocr"] = "paddleocr"
lang: Annotated[ lang: Annotated[
list[str], list[str],
Field(min_items=1, max_items=1) # Limits the list length to 0 or 1 items Field(min_items=1, max_items=1), # Limits the list length to 0 or 1 items
] = ["en"] ] = ["en"]
use_gpu: bool = True # same default as paddleocr.ocr use_gpu: bool = True # same default as paddleocr.ocr
use_angle_cls: bool = True use_angle_cls: bool = True
@ -102,7 +103,11 @@ class PdfPipelineOptions(PipelineOptions):
table_structure_options: TableStructureOptions = TableStructureOptions() table_structure_options: TableStructureOptions = TableStructureOptions()
ocr_options: Union[ ocr_options: Union[
EasyOcrOptions, TesseractCliOcrOptions, TesseractOcrOptions, PaddleOcrOptions, OcrMacOptions EasyOcrOptions,
TesseractCliOcrOptions,
TesseractOcrOptions,
PaddleOcrOptions,
OcrMacOptions,
] = Field(EasyOcrOptions(), discriminator="kind") ] = Field(EasyOcrOptions(), discriminator="kind")
images_scale: float = 1.0 images_scale: float = 1.0

View File

@ -23,7 +23,7 @@ class PaddleOcrModel(BaseOcrModel):
if self.enabled: if self.enabled:
try: try:
from paddleocr import PaddleOCR, draw_ocr from paddleocr import PaddleOCR, draw_ocr # type: ignore
except ImportError: except ImportError:
raise ImportError( raise ImportError(
"PaddleOCR is not installed. Please install it via `pip install paddlepaddle` and `pip install paddleocr` to use this OCR engine. " "PaddleOCR is not installed. Please install it via `pip install paddlepaddle` and `pip install paddleocr` to use this OCR engine. "

View File

@ -12,17 +12,17 @@ from docling.datamodel.document import ConversionResult
from docling.datamodel.pipeline_options import ( from docling.datamodel.pipeline_options import (
EasyOcrOptions, EasyOcrOptions,
OcrMacOptions, OcrMacOptions,
PaddleOcrOptions,
PdfPipelineOptions, PdfPipelineOptions,
TesseractCliOcrOptions, TesseractCliOcrOptions,
TesseractOcrOptions, TesseractOcrOptions,
PaddleOcrOptions,
) )
from docling.models.base_ocr_model import BaseOcrModel from docling.models.base_ocr_model import BaseOcrModel
from docling.models.ds_glm_model import GlmModel, GlmOptions from docling.models.ds_glm_model import GlmModel, GlmOptions
from docling.models.easyocr_model import EasyOcrModel from docling.models.easyocr_model import EasyOcrModel
from docling.models.paddle_ocr_model import PaddleOcrModel
from docling.models.layout_model import LayoutModel from docling.models.layout_model import LayoutModel
from docling.models.ocr_mac_model import OcrMacModel from docling.models.ocr_mac_model import OcrMacModel
from docling.models.paddle_ocr_model import PaddleOcrModel
from docling.models.page_assemble_model import PageAssembleModel, PageAssembleOptions from docling.models.page_assemble_model import PageAssembleModel, PageAssembleOptions
from docling.models.page_preprocessing_model import ( from docling.models.page_preprocessing_model import (
PagePreprocessingModel, PagePreprocessingModel,

View File

@ -5,10 +5,10 @@ from docling.datamodel.base_models import InputFormat
from docling.datamodel.pipeline_options import ( from docling.datamodel.pipeline_options import (
EasyOcrOptions, EasyOcrOptions,
OcrMacOptions, OcrMacOptions,
PaddleOcrOptions,
PdfPipelineOptions, PdfPipelineOptions,
TesseractCliOcrOptions, TesseractCliOcrOptions,
TesseractOcrOptions, TesseractOcrOptions,
PaddleOcrOptions
) )
from docling.document_converter import DocumentConverter, PdfFormatOption from docling.document_converter import DocumentConverter, PdfFormatOption

View File

@ -9,10 +9,10 @@ from docling.datamodel.pipeline_options import (
EasyOcrOptions, EasyOcrOptions,
OcrMacOptions, OcrMacOptions,
OcrOptions, OcrOptions,
PaddleOcrOptions,
PdfPipelineOptions, PdfPipelineOptions,
TesseractCliOcrOptions, TesseractCliOcrOptions,
TesseractOcrOptions, TesseractOcrOptions,
PaddleOcrOptions
) )
from docling.document_converter import DocumentConverter, PdfFormatOption from docling.document_converter import DocumentConverter, PdfFormatOption