diff --git a/docs/examples/translate.py b/docs/examples/translate.py index 1adb1c75..907f3c05 100644 --- a/docs/examples/translate.py +++ b/docs/examples/translate.py @@ -2,7 +2,7 @@ import logging import time from pathlib import Path -from docling_core.types.doc import ImageRefMode, PictureItem, TableItem +from docling_core.types.doc import ImageRefMode, PictureItem, TableItem, TextItem from docling.datamodel.base_models import FigureElement, InputFormat, Table from docling.datamodel.pipeline_options import PdfPipelineOptions @@ -16,6 +16,7 @@ IMAGE_RESOLUTION_SCALE = 2.0 # FIXME: put in your favorite translation code .... def translate(text: str, src: str = "en", dest: str = "de"): + _log.warning("!!! IMPLEMENT HERE YOUR FAVORITE TRANSLATION CODE!!!") # from googletrans import Translator # Initialize the translator @@ -65,6 +66,10 @@ def main(): element.orig = element.text element.text = translate(text=element.text) + elif isinstance(element, TableItem): + for cell in element.data.table_cells: + cell.text = translate(text=element.text) + # Save markdown with embedded pictures in translated text md_filename = output_dir / f"{doc_filename}-with-images-translated.md" conv_doc.save_as_markdown(md_filename, image_mode=ImageRefMode.EMBEDDED) diff --git a/mkdocs.yml b/mkdocs.yml index 8d9f6591..8f8d86d9 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -76,6 +76,7 @@ nav: - "Multimodal export": examples/export_multimodal.py - "Force full page OCR": examples/full_page_ocr.py - "Accelerator options": examples/run_with_accelerator.py + - "Simple translation": examples/translate.py - ✂️ Chunking: - "Hybrid chunking": examples/hybrid_chunking.ipynb - 💬 RAG / QA: