mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 20:14:47 +00:00
fixed conflicts
Signed-off-by: Peter Staar <taa@zurich.ibm.com>
This commit is contained in:
parent
0b76211eed
commit
a3e2cf5473
@ -30,7 +30,7 @@ class EasyOcrOptions(OcrOptions):
|
||||
|
||||
class TesseractOcrOptions(OcrOptions):
|
||||
kind: Literal["tesseract"] = "tesseract"
|
||||
|
||||
lang: List[str] = ["fr", "de", "es", "en"]
|
||||
|
||||
class TesserOcrOptions(OcrOptions):
|
||||
kind: Literal["tesseract"] = "tesserocr"
|
||||
|
@ -10,7 +10,6 @@ from docling.models.base_ocr_model import BaseOcrModel
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TesseractOcrModel(BaseOcrModel):
|
||||
|
||||
def __init__(self, enabled: bool, options: TesseractOcrOptions):
|
||||
|
@ -8,6 +8,7 @@ from docling.datamodel.pipeline_options import (
|
||||
)
|
||||
from docling.models.base_ocr_model import BaseOcrModel
|
||||
from docling.models.easyocr_model import EasyOcrModel
|
||||
from docling.models.tesseract_model import TesseractOCRModel
|
||||
from docling.models.layout_model import LayoutModel
|
||||
from docling.models.table_structure_model import TableStructureModel
|
||||
from docling.models.tesseract_model import TesseractOcrModel
|
||||
|
@ -47,7 +47,28 @@ def get_pdf_paths():
|
||||
return pdf_files
|
||||
|
||||
|
||||
def get_converter():
|
||||
def get_easyocr_converter():
|
||||
|
||||
ocr_options = EasyOcrOptions(
|
||||
|
||||
)
|
||||
|
||||
pipeline_options = PipelineOptions()
|
||||
# Debug
|
||||
pipeline_options.do_ocr = True
|
||||
pipeline_options.do_table_structure = True
|
||||
pipeline_options.table_structure_options.do_cell_matching = True
|
||||
|
||||
|
||||
|
||||
converter = DocumentConverter(
|
||||
pipeline_options=pipeline_options,
|
||||
pdf_backend=DoclingParseDocumentBackend,
|
||||
)
|
||||
|
||||
return converter
|
||||
|
||||
def get_tesseract_converter():
|
||||
|
||||
pipeline_options = PipelineOptions()
|
||||
# Debug
|
||||
@ -63,6 +84,7 @@ def get_converter():
|
||||
return converter
|
||||
|
||||
|
||||
|
||||
def test_e2e_conversions():
|
||||
|
||||
pdf_paths = get_pdf_paths()
|
||||
|
Loading…
Reference in New Issue
Block a user