mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-02 07:22:14 +00:00
updated the mkdocs
Signed-off-by: Peter Staar <taa@zurich.ibm.com>
This commit is contained in:
parent
5127b31083
commit
1c970b7613
@ -2,7 +2,7 @@ import logging
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
from docling_core.types.doc import ImageRefMode, PictureItem, TableItem
|
||||
from docling_core.types.doc import ImageRefMode, PictureItem, TableItem, TextItem
|
||||
|
||||
from docling.datamodel.base_models import FigureElement, InputFormat, Table
|
||||
from docling.datamodel.pipeline_options import PdfPipelineOptions
|
||||
@ -16,6 +16,7 @@ IMAGE_RESOLUTION_SCALE = 2.0
|
||||
# FIXME: put in your favorite translation code ....
|
||||
def translate(text: str, src: str = "en", dest: str = "de"):
|
||||
|
||||
_log.warning("!!! IMPLEMENT HERE YOUR FAVORITE TRANSLATION CODE!!!")
|
||||
# from googletrans import Translator
|
||||
|
||||
# Initialize the translator
|
||||
@ -65,6 +66,10 @@ def main():
|
||||
element.orig = element.text
|
||||
element.text = translate(text=element.text)
|
||||
|
||||
elif isinstance(element, TableItem):
|
||||
for cell in element.data.table_cells:
|
||||
cell.text = translate(text=element.text)
|
||||
|
||||
# Save markdown with embedded pictures in translated text
|
||||
md_filename = output_dir / f"{doc_filename}-with-images-translated.md"
|
||||
conv_doc.save_as_markdown(md_filename, image_mode=ImageRefMode.EMBEDDED)
|
||||
|
@ -76,6 +76,7 @@ nav:
|
||||
- "Multimodal export": examples/export_multimodal.py
|
||||
- "Force full page OCR": examples/full_page_ocr.py
|
||||
- "Accelerator options": examples/run_with_accelerator.py
|
||||
- "Simple translation": examples/translate.py
|
||||
- ✂️ Chunking:
|
||||
- "Hybrid chunking": examples/hybrid_chunking.ipynb
|
||||
- 💬 RAG / QA:
|
||||
|
Loading…
Reference in New Issue
Block a user