updated the mkdocs

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
This commit is contained in:
Peter Staar 2025-01-14 13:00:50 +01:00
parent 5127b31083
commit 1c970b7613
2 changed files with 7 additions and 1 deletions

View File

@ -2,7 +2,7 @@ import logging
import time import time
from pathlib import Path from pathlib import Path
from docling_core.types.doc import ImageRefMode, PictureItem, TableItem from docling_core.types.doc import ImageRefMode, PictureItem, TableItem, TextItem
from docling.datamodel.base_models import FigureElement, InputFormat, Table from docling.datamodel.base_models import FigureElement, InputFormat, Table
from docling.datamodel.pipeline_options import PdfPipelineOptions from docling.datamodel.pipeline_options import PdfPipelineOptions
@ -16,6 +16,7 @@ IMAGE_RESOLUTION_SCALE = 2.0
# FIXME: put in your favorite translation code .... # FIXME: put in your favorite translation code ....
def translate(text: str, src: str = "en", dest: str = "de"): def translate(text: str, src: str = "en", dest: str = "de"):
_log.warning("!!! IMPLEMENT HERE YOUR FAVORITE TRANSLATION CODE!!!")
# from googletrans import Translator # from googletrans import Translator
# Initialize the translator # Initialize the translator
@ -65,6 +66,10 @@ def main():
element.orig = element.text element.orig = element.text
element.text = translate(text=element.text) element.text = translate(text=element.text)
elif isinstance(element, TableItem):
for cell in element.data.table_cells:
cell.text = translate(text=element.text)
# Save markdown with embedded pictures in translated text # Save markdown with embedded pictures in translated text
md_filename = output_dir / f"{doc_filename}-with-images-translated.md" md_filename = output_dir / f"{doc_filename}-with-images-translated.md"
conv_doc.save_as_markdown(md_filename, image_mode=ImageRefMode.EMBEDDED) conv_doc.save_as_markdown(md_filename, image_mode=ImageRefMode.EMBEDDED)

View File

@ -76,6 +76,7 @@ nav:
- "Multimodal export": examples/export_multimodal.py - "Multimodal export": examples/export_multimodal.py
- "Force full page OCR": examples/full_page_ocr.py - "Force full page OCR": examples/full_page_ocr.py
- "Accelerator options": examples/run_with_accelerator.py - "Accelerator options": examples/run_with_accelerator.py
- "Simple translation": examples/translate.py
- ✂️ Chunking: - ✂️ Chunking:
- "Hybrid chunking": examples/hybrid_chunking.ipynb - "Hybrid chunking": examples/hybrid_chunking.ipynb
- 💬 RAG / QA: - 💬 RAG / QA: