fixing styling issues

Signed-off-by: Swaymaw <swaymaw@gmail.com>
This commit is contained in:
Swaymaw 2024-11-22 14:38:13 +05:30
parent 86d9a2ca00
commit a00940f918
6 changed files with 17 additions and 12 deletions

View File

@ -26,11 +26,11 @@ from docling.datamodel.pipeline_options import (
EasyOcrOptions,
OcrMacOptions,
OcrOptions,
PaddleOcrOptions,
PdfPipelineOptions,
TableFormerMode,
TesseractCliOcrOptions,
TesseractOcrOptions,
PaddleOcrOptions
)
from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
@ -264,7 +264,7 @@ def convert(
ocr_options = TesseractOcrOptions(force_full_page_ocr=force_ocr)
elif ocr_engine == OcrEngine.OCRMAC:
ocr_options = OcrMacOptions(force_full_page_ocr=force_ocr)
elif ocr_engine == OcrEngine.PADDLEOCR:
elif ocr_engine == OcrEngine.PADDLEOCR:
ocr_options = PaddleOcrOptions(force_full_page_ocr=force_ocr)
else:
raise RuntimeError(f"Unexpected OCR engine type {ocr_engine}")

View File

@ -1,6 +1,6 @@
from enum import Enum
from pathlib import Path
from typing import List, Literal, Optional, Union, Annotated
from typing import Annotated, List, Literal, Optional, Union
from pydantic import BaseModel, ConfigDict, Field
@ -41,11 +41,12 @@ class EasyOcrOptions(OcrOptions):
protected_namespaces=(),
)
class PaddleOcrOptions(OcrOptions):
kind: Literal["paddleocr"] = "paddleocr"
lang: Annotated[
list[str],
Field(min_items=1, max_items=1) # Limits the list length to 0 or 1 items
Field(min_items=1, max_items=1), # Limits the list length to 0 or 1 items
] = ["en"]
use_gpu: bool = True # same default as paddleocr.ocr
use_angle_cls: bool = True
@ -102,7 +103,11 @@ class PdfPipelineOptions(PipelineOptions):
table_structure_options: TableStructureOptions = TableStructureOptions()
ocr_options: Union[
EasyOcrOptions, TesseractCliOcrOptions, TesseractOcrOptions, PaddleOcrOptions, OcrMacOptions
EasyOcrOptions,
TesseractCliOcrOptions,
TesseractOcrOptions,
PaddleOcrOptions,
OcrMacOptions,
] = Field(EasyOcrOptions(), discriminator="kind")
images_scale: float = 1.0

View File

@ -23,7 +23,7 @@ class PaddleOcrModel(BaseOcrModel):
if self.enabled:
try:
from paddleocr import PaddleOCR, draw_ocr
from paddleocr import PaddleOCR, draw_ocr # type: ignore
except ImportError:
raise ImportError(
"PaddleOCR is not installed. Please install it via `pip install paddlepaddle` and `pip install paddleocr` to use this OCR engine. "

View File

@ -12,17 +12,17 @@ from docling.datamodel.document import ConversionResult
from docling.datamodel.pipeline_options import (
EasyOcrOptions,
OcrMacOptions,
PaddleOcrOptions,
PdfPipelineOptions,
TesseractCliOcrOptions,
TesseractOcrOptions,
PaddleOcrOptions,
)
from docling.models.base_ocr_model import BaseOcrModel
from docling.models.ds_glm_model import GlmModel, GlmOptions
from docling.models.easyocr_model import EasyOcrModel
from docling.models.paddle_ocr_model import PaddleOcrModel
from docling.models.layout_model import LayoutModel
from docling.models.ocr_mac_model import OcrMacModel
from docling.models.paddle_ocr_model import PaddleOcrModel
from docling.models.page_assemble_model import PageAssembleModel, PageAssembleOptions
from docling.models.page_preprocessing_model import (
PagePreprocessingModel,

View File

@ -5,10 +5,10 @@ from docling.datamodel.base_models import InputFormat
from docling.datamodel.pipeline_options import (
EasyOcrOptions,
OcrMacOptions,
PaddleOcrOptions,
PdfPipelineOptions,
TesseractCliOcrOptions,
TesseractOcrOptions,
PaddleOcrOptions
)
from docling.document_converter import DocumentConverter, PdfFormatOption

View File

@ -9,10 +9,10 @@ from docling.datamodel.pipeline_options import (
EasyOcrOptions,
OcrMacOptions,
OcrOptions,
PaddleOcrOptions,
PdfPipelineOptions,
TesseractCliOcrOptions,
TesseractOcrOptions,
PaddleOcrOptions
)
from docling.document_converter import DocumentConverter, PdfFormatOption