mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-15 16:18:22 +00:00
feat: add options for choosing OCR engines (#118)
--------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com> Signed-off-by: Peter Staar <taa@zurich.ibm.com> Co-authored-by: Nikos Livathinos <nli@zurich.ibm.com> Co-authored-by: Peter Staar <taa@zurich.ibm.com>
This commit is contained in:
@@ -110,7 +110,10 @@ class BoundingBox(BaseModel):
|
||||
return BoundingBox(l=l, t=t, r=r, b=b, coord_origin=origin)
|
||||
|
||||
def area(self) -> float:
|
||||
return (self.r - self.l) * (self.b - self.t)
|
||||
area = (self.r - self.l) * (self.b - self.t)
|
||||
if self.coord_origin == CoordOrigin.BOTTOMLEFT:
|
||||
area = -area
|
||||
return area
|
||||
|
||||
def intersection_area_with(self, other: "BoundingBox") -> float:
|
||||
# Calculate intersection coordinates
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from enum import Enum, auto
|
||||
from typing import List, Literal, Optional, Union
|
||||
|
||||
from pydantic import BaseModel
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
|
||||
class TableFormerMode(str, Enum):
|
||||
@@ -18,8 +19,49 @@ class TableStructureOptions(BaseModel):
|
||||
mode: TableFormerMode = TableFormerMode.FAST
|
||||
|
||||
|
||||
class OcrOptions(BaseModel):
|
||||
kind: str
|
||||
|
||||
|
||||
class EasyOcrOptions(OcrOptions):
|
||||
kind: Literal["easyocr"] = "easyocr"
|
||||
lang: List[str] = ["fr", "de", "es", "en"]
|
||||
use_gpu: bool = True # same default as easyocr.Reader
|
||||
model_storage_directory: Optional[str] = None
|
||||
download_enabled: bool = True # same default as easyocr.Reader
|
||||
|
||||
model_config = ConfigDict(
|
||||
extra="forbid",
|
||||
protected_namespaces=(),
|
||||
)
|
||||
|
||||
|
||||
class TesseractCliOcrOptions(OcrOptions):
|
||||
kind: Literal["tesseract"] = "tesseract"
|
||||
lang: List[str] = ["fra", "deu", "spa", "eng"]
|
||||
tesseract_cmd: str = "tesseract"
|
||||
path: Optional[str] = None
|
||||
|
||||
model_config = ConfigDict(
|
||||
extra="forbid",
|
||||
)
|
||||
|
||||
|
||||
class TesseractOcrOptions(OcrOptions):
|
||||
kind: Literal["tesserocr"] = "tesserocr"
|
||||
lang: List[str] = ["fra", "deu", "spa", "eng"]
|
||||
path: Optional[str] = None
|
||||
|
||||
model_config = ConfigDict(
|
||||
extra="forbid",
|
||||
)
|
||||
|
||||
|
||||
class PipelineOptions(BaseModel):
|
||||
do_table_structure: bool = True # True: perform table structure extraction
|
||||
do_ocr: bool = True # True: perform OCR, replace programmatic PDF text
|
||||
|
||||
table_structure_options: TableStructureOptions = TableStructureOptions()
|
||||
ocr_options: Union[EasyOcrOptions, TesseractCliOcrOptions, TesseractOcrOptions] = (
|
||||
Field(EasyOcrOptions(), discriminator="kind")
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user