diff --git a/examples/rag_langchain.ipynb b/examples/rag_langchain.ipynb index d99ab976..874d3e4e 100644 --- a/examples/rag_langchain.ipynb +++ b/examples/rag_langchain.ipynb @@ -74,7 +74,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Loader and splitter" + "### Helpers" ] }, { @@ -225,6 +225,13 @@ "FILE_PATH = \"https://arxiv.org/pdf/2206.01062\" # DocLayNet paper" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Loader and splitter" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -420,10 +427,10 @@ "output_type": "stream", "text": [ "Question:\n", - "How many pages were human annotated for DocLayNet?\n", + "How many pages were human annotated by humans for DocLayNet?\n", "\n", "Answer:\n", - "\"80863 pages were human annotated for DocLayNet.\\nExplanation:\\nFrom the context, it is clear that DocL...\"\n", + "\"80863 pages were annotated by humans in DocLayNet.\"\n", "\n", "Source 1:\n", " text: \"DocLayNet contains 80863 PDF pages. Among these, 7059 carry two instances of human annotations, and ...\"\n", @@ -442,29 +449,36 @@ " path: $.main-text[23]\n", "\n", "Source 3:\n", - " text: \"Phase 4: Production annotation. The previously selected 80K pages were annotated with the defined 11...\"\n", - " bbox: [317.3695373535156, 82.78482818603516, 559.7149047851562, 244.83221435546875]\n", - " dl_doc_hash: 5dfbd8c115a15fd3396b68409124cfee29fc8efac7b5c846634ff924e635e0dc\n", - " heading: 4 ANNOTATION CAMPAIGN\n", - " page: 5\n", - " path: $.main-text[80]\n", - "\n", - "Source 4:\n", " text: \"DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis\"\n", " bbox: [53.60108947753906, 723.3781127929688, 347.139892578125, 731.6909790039062]\n", " dl_doc_hash: 5dfbd8c115a15fd3396b68409124cfee29fc8efac7b5c846634ff924e635e0dc\n", " heading: REFERENCES\n", " page: 9\n", - " path: $.main-text[133]\n" + " path: $.main-text[133]\n", + "\n", + "Source 4:\n", + " text: \"DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis\"\n", + " bbox: [53.542964935302734, 723.3500366210938, 347.0172424316406, 731.6931762695312]\n", + " dl_doc_hash: 5dfbd8c115a15fd3396b68409124cfee29fc8efac7b5c846634ff924e635e0dc\n", + " heading: 4 ANNOTATION CAMPAIGN\n", + " page: 5\n", + " path: $.main-text[64]\n" ] } ], "source": [ "resp_dict = rag_chain.invoke(\n", - " {\"input\": \"How many pages were human annotated for DocLayNet?\"}\n", + " {\"input\": \"How many pages were human annotated by humans for DocLayNet?\"}\n", ")\n", "print_qa(resp_dict=resp_dict)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/examples/rag_llamaindex.ipynb b/examples/rag_llamaindex.ipynb index 3a3381a6..fb3a9c43 100644 --- a/examples/rag_llamaindex.ipynb +++ b/examples/rag_llamaindex.ipynb @@ -43,11 +43,8 @@ ], "source": [ "import os\n", - "from tempfile import TemporaryDirectory\n", "\n", "from dotenv import load_dotenv\n", - "from pydantic import TypeAdapter\n", - "from rich.pretty import pprint\n", "\n", "load_dotenv()" ] @@ -386,6 +383,7 @@ "metadata": {}, "outputs": [], "source": [ + "from tempfile import TemporaryDirectory\n", "from llama_index.vector_stores.milvus import MilvusVectorStore\n", "\n", "MILVUS_URI = os.environ.get(\n",