mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-01 15:02:21 +00:00
fixing styling issues
Signed-off-by: Swaymaw <swaymaw@gmail.com>
This commit is contained in:
parent
86d9a2ca00
commit
a00940f918
@ -26,11 +26,11 @@ from docling.datamodel.pipeline_options import (
|
||||
EasyOcrOptions,
|
||||
OcrMacOptions,
|
||||
OcrOptions,
|
||||
PaddleOcrOptions,
|
||||
PdfPipelineOptions,
|
||||
TableFormerMode,
|
||||
TesseractCliOcrOptions,
|
||||
TesseractOcrOptions,
|
||||
PaddleOcrOptions
|
||||
)
|
||||
from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
|
||||
|
||||
@ -264,7 +264,7 @@ def convert(
|
||||
ocr_options = TesseractOcrOptions(force_full_page_ocr=force_ocr)
|
||||
elif ocr_engine == OcrEngine.OCRMAC:
|
||||
ocr_options = OcrMacOptions(force_full_page_ocr=force_ocr)
|
||||
elif ocr_engine == OcrEngine.PADDLEOCR:
|
||||
elif ocr_engine == OcrEngine.PADDLEOCR:
|
||||
ocr_options = PaddleOcrOptions(force_full_page_ocr=force_ocr)
|
||||
else:
|
||||
raise RuntimeError(f"Unexpected OCR engine type {ocr_engine}")
|
||||
|
@ -1,6 +1,6 @@
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import List, Literal, Optional, Union, Annotated
|
||||
from typing import Annotated, List, Literal, Optional, Union
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
@ -41,11 +41,12 @@ class EasyOcrOptions(OcrOptions):
|
||||
protected_namespaces=(),
|
||||
)
|
||||
|
||||
|
||||
class PaddleOcrOptions(OcrOptions):
|
||||
kind: Literal["paddleocr"] = "paddleocr"
|
||||
lang: Annotated[
|
||||
list[str],
|
||||
Field(min_items=1, max_items=1) # Limits the list length to 0 or 1 items
|
||||
Field(min_items=1, max_items=1), # Limits the list length to 0 or 1 items
|
||||
] = ["en"]
|
||||
use_gpu: bool = True # same default as paddleocr.ocr
|
||||
use_angle_cls: bool = True
|
||||
@ -102,7 +103,11 @@ class PdfPipelineOptions(PipelineOptions):
|
||||
|
||||
table_structure_options: TableStructureOptions = TableStructureOptions()
|
||||
ocr_options: Union[
|
||||
EasyOcrOptions, TesseractCliOcrOptions, TesseractOcrOptions, PaddleOcrOptions, OcrMacOptions
|
||||
EasyOcrOptions,
|
||||
TesseractCliOcrOptions,
|
||||
TesseractOcrOptions,
|
||||
PaddleOcrOptions,
|
||||
OcrMacOptions,
|
||||
] = Field(EasyOcrOptions(), discriminator="kind")
|
||||
|
||||
images_scale: float = 1.0
|
||||
|
@ -23,7 +23,7 @@ class PaddleOcrModel(BaseOcrModel):
|
||||
|
||||
if self.enabled:
|
||||
try:
|
||||
from paddleocr import PaddleOCR, draw_ocr
|
||||
from paddleocr import PaddleOCR, draw_ocr # type: ignore
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"PaddleOCR is not installed. Please install it via `pip install paddlepaddle` and `pip install paddleocr` to use this OCR engine. "
|
||||
|
@ -12,17 +12,17 @@ from docling.datamodel.document import ConversionResult
|
||||
from docling.datamodel.pipeline_options import (
|
||||
EasyOcrOptions,
|
||||
OcrMacOptions,
|
||||
PaddleOcrOptions,
|
||||
PdfPipelineOptions,
|
||||
TesseractCliOcrOptions,
|
||||
TesseractOcrOptions,
|
||||
PaddleOcrOptions,
|
||||
)
|
||||
from docling.models.base_ocr_model import BaseOcrModel
|
||||
from docling.models.ds_glm_model import GlmModel, GlmOptions
|
||||
from docling.models.easyocr_model import EasyOcrModel
|
||||
from docling.models.paddle_ocr_model import PaddleOcrModel
|
||||
from docling.models.layout_model import LayoutModel
|
||||
from docling.models.ocr_mac_model import OcrMacModel
|
||||
from docling.models.paddle_ocr_model import PaddleOcrModel
|
||||
from docling.models.page_assemble_model import PageAssembleModel, PageAssembleOptions
|
||||
from docling.models.page_preprocessing_model import (
|
||||
PagePreprocessingModel,
|
||||
|
@ -5,10 +5,10 @@ from docling.datamodel.base_models import InputFormat
|
||||
from docling.datamodel.pipeline_options import (
|
||||
EasyOcrOptions,
|
||||
OcrMacOptions,
|
||||
PaddleOcrOptions,
|
||||
PdfPipelineOptions,
|
||||
TesseractCliOcrOptions,
|
||||
TesseractOcrOptions,
|
||||
PaddleOcrOptions
|
||||
)
|
||||
from docling.document_converter import DocumentConverter, PdfFormatOption
|
||||
|
||||
|
@ -9,10 +9,10 @@ from docling.datamodel.pipeline_options import (
|
||||
EasyOcrOptions,
|
||||
OcrMacOptions,
|
||||
OcrOptions,
|
||||
PaddleOcrOptions,
|
||||
PdfPipelineOptions,
|
||||
TesseractCliOcrOptions,
|
||||
TesseractOcrOptions,
|
||||
PaddleOcrOptions
|
||||
)
|
||||
from docling.document_converter import DocumentConverter, PdfFormatOption
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user