From 2576e657534e6a387b2879841adf36f8345fe406 Mon Sep 17 00:00:00 2001
From: Suhwan Seo <nuridol@gmail.com>
Date: Wed, 20 Nov 2024 17:31:57 +0900
Subject: [PATCH] docs: update examples and installation for ocrmac support

- Added `OcrMacOptions` to `custom_convert.py` and `full_page_ocr.py` examples.
- Included usage comments and examples for `OcrMacOptions` in OCR pipelines.
- Updated installation guide to include instructions for installing `ocrmac`, noting macOS version requirements (10.15+).
- Highlighted that `ocrmac` leverages Apple's Vision framework as an OCR backend.

This enhances documentation for users working on macOS to leverage `ocrmac` effectively.

Signed-off-by: Suhwan Seo <nuridol@gmail.com>
---
 docs/examples/custom_convert.py | 15 +++++++++++++++
 docs/examples/full_page_ocr.py  |  4 +++-
 docs/installation.md            | 11 +++++++++++
 3 files changed, 29 insertions(+), 1 deletion(-)
diff --git a/docs/examples/custom_convert.py b/docs/examples/custom_convert.py
index 7631848b..2d300904 100644
--- a/docs/examples/custom_convert.py
+++ b/docs/examples/custom_convert.py
@@ -7,6 +7,7 @@ from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
 from docling.datamodel.base_models import InputFormat
 from docling.datamodel.pipeline_options import PdfPipelineOptions
 from docling.document_converter import DocumentConverter, PdfFormatOption
+from docling.models.ocr_mac_model import OcrMacOptions
 from docling.models.tesseract_ocr_cli_model import TesseractCliOcrOptions
 from docling.models.tesseract_ocr_model import TesseractOcrOptions
 
@@ -122,6 +123,20 @@ def main():
     #     }
     # )
 
+    # Docling Parse with ocrmac(Mac only)
+    # ----------------------
+    # pipeline_options = PdfPipelineOptions()
+    # pipeline_options.do_ocr = True
+    # pipeline_options.do_table_structure = True
+    # pipeline_options.table_structure_options.do_cell_matching = True
+    # pipeline_options.ocr_options = OcrMacOptions()
+
+    # doc_converter = DocumentConverter(
+    #     format_options={
+    #         InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options)
+    #     }
+    # )
+
     ###########################################################################
 
     start_time = time.time()
diff --git a/docs/examples/full_page_ocr.py b/docs/examples/full_page_ocr.py
index 35c2ba6b..bbb7e122 100644
--- a/docs/examples/full_page_ocr.py
+++ b/docs/examples/full_page_ocr.py
@@ -4,6 +4,7 @@ from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
 from docling.datamodel.base_models import InputFormat
 from docling.datamodel.pipeline_options import (
     EasyOcrOptions,
+    OcrMacOptions,
     PdfPipelineOptions,
     TesseractCliOcrOptions,
     TesseractOcrOptions,
@@ -19,9 +20,10 @@ def main():
     pipeline_options.do_table_structure = True
     pipeline_options.table_structure_options.do_cell_matching = True
 
-    # Any of the OCR options can be used:EasyOcrOptions, TesseractOcrOptions, TesseractCliOcrOptions
+    # Any of the OCR options can be used:EasyOcrOptions, TesseractOcrOptions, TesseractCliOcrOptions, OcrMacOptions(Mac only)
     # ocr_options = EasyOcrOptions(force_full_page_ocr=True)
     # ocr_options = TesseractOcrOptions(force_full_page_ocr=True)
+    # ocr_options = OcrMacOptions(force_full_page_ocr=True)
     ocr_options = TesseractCliOcrOptions(force_full_page_ocr=True)
     pipeline_options.ocr_options = ocr_options
 
diff --git a/docs/installation.md b/docs/installation.md
index 7701543b..addae382 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -92,6 +92,17 @@ Works on macOS, Linux, and Windows, with support for both x86_64 and arm64 archi
     pip install --no-binary :all: tesserocr
     ```
 
+    <h3>ocrmac installation</h3>
+
+    [ocrmac](https://github.com/straussmaximilian/ocrmac) is using
+    Apple's vision(or livetext) framework as OCR backend.
+    For using this engine with Docling, ocrmac must be installed on your system.
+    This only works on macOS systems with newer macOS versions (10.15+).
+
+    ```console
+    pip install ocrmac
+    ```
+
 ## Development setup
 
 To develop Docling features, bugfixes etc., install as follows from your local clone's root dir: