diff --git a/docs/examples/rag_langchain.ipynb b/docs/examples/rag_langchain.ipynb index f2464f29..31ff009a 100644 --- a/docs/examples/rag_langchain.ipynb +++ b/docs/examples/rag_langchain.ipynb @@ -49,18 +49,6 @@ "load_dotenv()" ] }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "import warnings\n", - "\n", - "warnings.filterwarnings(action=\"ignore\", category=UserWarning, module=\"pydantic|torch\")\n", - "warnings.filterwarnings(action=\"ignore\", category=FutureWarning, module=\"easyocr\")" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -86,54 +74,37 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ - "from enum import Enum\n", "from typing import Iterator\n", "\n", "from langchain_core.document_loaders import BaseLoader\n", "from langchain_core.documents import Document as LCDocument\n", - "from pydantic import BaseModel\n", "\n", "from docling.document_converter import DocumentConverter\n", "\n", - "\n", - "class DocumentMetadata(BaseModel):\n", - " dl_doc_hash: str\n", - " # source: str\n", - "\n", - "\n", "class DoclingPDFLoader(BaseLoader):\n", - " class ParseType(str, Enum):\n", - " MARKDOWN = \"markdown\"\n", - " # JSON = \"json\"\n", "\n", - " def __init__(self, file_path: str | list[str], parse_type: ParseType) -> None:\n", + " def __init__(self, file_path: str | list[str]) -> None:\n", " self._file_paths = file_path if isinstance(file_path, list) else [file_path]\n", - " self._parse_type = parse_type\n", " self._converter = DocumentConverter()\n", "\n", " def lazy_load(self) -> Iterator[LCDocument]:\n", " for source in self._file_paths:\n", - " dl_doc = self._converter.convert_single(source).output\n", - " match self._parse_type:\n", - " case self.ParseType.MARKDOWN:\n", - " text = dl_doc.export_to_markdown()\n", - " # case self.ParseType.JSON:\n", - " # text = dl_doc.model_dump_json()\n", - " case _:\n", - " raise RuntimeError(\n", - " f\"Unexpected parse type encountered: {self._parse_type}\"\n", - " )\n", - " lc_doc = LCDocument(\n", - " page_content=text,\n", - " metadata=DocumentMetadata(\n", - " dl_doc_hash=dl_doc.file_info.document_hash,\n", - " ).model_dump(),\n", - " )\n", - " yield lc_doc" + " dl_doc = self._converter.convert(source).document\n", + " text = dl_doc.export_to_markdown()\n", + " yield LCDocument(page_content=text)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "FILE_PATH = \"https://raw.githubusercontent.com/DS4SD/docling/main/tests/data/2206.01062.pdf\" # DocLayNet paper" ] }, { @@ -141,37 +112,10 @@ "execution_count": 5, "metadata": {}, "outputs": [], - "source": [ - "FILE_PATH = \"https://arxiv.org/pdf/2206.01062\" # DocLayNet paper" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "1b38d07d5fed4618a44ecf261e1e5c44", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Fetching 7 files: 0%| | 0/7 [00:00 â„šī¸ 👉 **The LlamaIndex Docling extension update to Docling v2 is ongoing; in the meanwhile, this notebook is showing current extension output, based on Docling v1.**" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/docs/integrations/llamaindex.md b/docs/integrations/llamaindex.md index d3c0f46e..af82da31 100644 --- a/docs/integrations/llamaindex.md +++ b/docs/integrations/llamaindex.md @@ -4,6 +4,10 @@ Docling is available as an official LlamaIndex extension! To get started, check out the [step-by-step guide in LlamaIndex \[↗\]](https://docs.llamaindex.ai/en/stable/examples/data_connectors/DoclingReaderDemo/). +!!! info "Docling v2" + + The LlamaIndex Docling extension update to Docling v2 is ongoing. + ## Components ### Docling Reader diff --git a/docs/overrides/main.html b/docs/overrides/main.html index 9071c5ff..195acaf1 100644 --- a/docs/overrides/main.html +++ b/docs/overrides/main.html @@ -1,5 +1,5 @@ {% extends "base.html" %} {% block announce %} -

🎉 Docling is going v2, check out what's new and how to get started!

+

🎉 Docling has gone v2! Check out what's new and how to get started!

{% endblock %}