mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-02 15:32:30 +00:00
updated the mkdocs
Signed-off-by: Peter Staar <taa@zurich.ibm.com>
This commit is contained in:
parent
5127b31083
commit
1c970b7613
@ -2,7 +2,7 @@ import logging
|
|||||||
import time
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from docling_core.types.doc import ImageRefMode, PictureItem, TableItem
|
from docling_core.types.doc import ImageRefMode, PictureItem, TableItem, TextItem
|
||||||
|
|
||||||
from docling.datamodel.base_models import FigureElement, InputFormat, Table
|
from docling.datamodel.base_models import FigureElement, InputFormat, Table
|
||||||
from docling.datamodel.pipeline_options import PdfPipelineOptions
|
from docling.datamodel.pipeline_options import PdfPipelineOptions
|
||||||
@ -16,6 +16,7 @@ IMAGE_RESOLUTION_SCALE = 2.0
|
|||||||
# FIXME: put in your favorite translation code ....
|
# FIXME: put in your favorite translation code ....
|
||||||
def translate(text: str, src: str = "en", dest: str = "de"):
|
def translate(text: str, src: str = "en", dest: str = "de"):
|
||||||
|
|
||||||
|
_log.warning("!!! IMPLEMENT HERE YOUR FAVORITE TRANSLATION CODE!!!")
|
||||||
# from googletrans import Translator
|
# from googletrans import Translator
|
||||||
|
|
||||||
# Initialize the translator
|
# Initialize the translator
|
||||||
@ -65,6 +66,10 @@ def main():
|
|||||||
element.orig = element.text
|
element.orig = element.text
|
||||||
element.text = translate(text=element.text)
|
element.text = translate(text=element.text)
|
||||||
|
|
||||||
|
elif isinstance(element, TableItem):
|
||||||
|
for cell in element.data.table_cells:
|
||||||
|
cell.text = translate(text=element.text)
|
||||||
|
|
||||||
# Save markdown with embedded pictures in translated text
|
# Save markdown with embedded pictures in translated text
|
||||||
md_filename = output_dir / f"{doc_filename}-with-images-translated.md"
|
md_filename = output_dir / f"{doc_filename}-with-images-translated.md"
|
||||||
conv_doc.save_as_markdown(md_filename, image_mode=ImageRefMode.EMBEDDED)
|
conv_doc.save_as_markdown(md_filename, image_mode=ImageRefMode.EMBEDDED)
|
||||||
|
@ -76,6 +76,7 @@ nav:
|
|||||||
- "Multimodal export": examples/export_multimodal.py
|
- "Multimodal export": examples/export_multimodal.py
|
||||||
- "Force full page OCR": examples/full_page_ocr.py
|
- "Force full page OCR": examples/full_page_ocr.py
|
||||||
- "Accelerator options": examples/run_with_accelerator.py
|
- "Accelerator options": examples/run_with_accelerator.py
|
||||||
|
- "Simple translation": examples/translate.py
|
||||||
- ✂️ Chunking:
|
- ✂️ Chunking:
|
||||||
- "Hybrid chunking": examples/hybrid_chunking.ipynb
|
- "Hybrid chunking": examples/hybrid_chunking.ipynb
|
||||||
- 💬 RAG / QA:
|
- 💬 RAG / QA:
|
||||||
|
Loading…
Reference in New Issue
Block a user