diff --git a/docs/examples/rag_mongodb.ipynb b/docs/examples/rag_mongodb.ipynb index 189c66b8..acc1aa14 100644 --- a/docs/examples/rag_mongodb.ipynb +++ b/docs/examples/rag_mongodb.ipynb @@ -68,7 +68,7 @@ "import warnings\n", "\n", "warnings.filterwarnings(\"ignore\")\n", - "logging.getLogger(\"pymongo\").setLevel(logging.ERROR)\n" + "logging.getLogger(\"pymongo\").setLevel(logging.ERROR)" ] }, { @@ -134,8 +134,8 @@ "source": [ "# Influential machine learning papers\n", "source_urls = [\n", - " \"https://arxiv.org/pdf/1706.03762\" # Attention is All You Need\n", - " ]" + " \"https://arxiv.org/pdf/1706.03762\" # Attention is All You Need\n", + "]" ] }, { @@ -185,8 +185,10 @@ } ], "source": [ - "from docling.document_converter import DocumentConverter\n", "from pprint import pprint\n", + "\n", + "from docling.document_converter import DocumentConverter\n", + "\n", "# Instantiate the doc converter\n", "doc_converter = DocumentConverter()\n", "\n", @@ -288,11 +290,11 @@ }, "outputs": [], "source": [ - "# Voyage API key\n", - "VOYAGE_API_KEY=\"**********************\" \n", - "\n", "import voyageai\n", "\n", + "# Voyage API key\n", + "VOYAGE_API_KEY = \"**********************\"\n", + "\n", "# Initialize the VoyageAI client\n", "vo = voyageai.Client(VOYAGE_API_KEY)\n", "result = vo.contextualized_embed(inputs=[chunk_texts], model=\"voyage-context-3\")\n", @@ -326,7 +328,10 @@ "outputs": [], "source": [ "# Combine chunks with their embeddings\n", - "chunk_data = [{\"text\": text, \"embedding\": emb} for text, emb in zip(chunk_texts, contextualized_chunk_embds)]" + "chunk_data = [\n", + " {\"text\": text, \"embedding\": emb}\n", + " for text, emb in zip(chunk_texts, contextualized_chunk_embds)\n", + "]" ] }, { @@ -362,9 +367,11 @@ "# Insert to MongoDB\n", "from pymongo import MongoClient\n", "\n", - "client = MongoClient(\"mongodb+srv://*******.mongodb.net/\") # Replace with your MongoDB connection string\n", - "db = client[\"rag_db\"] # Database name\n", - "collection = db[\"documents\"] # Collection name\n", + "client = MongoClient(\n", + " \"mongodb+srv://*******.mongodb.net/\"\n", + ") # Replace with your MongoDB connection string\n", + "db = client[\"rag_db\"] # Database name\n", + "collection = db[\"documents\"] # Collection name\n", "\n", "# Insert chunk data into MongoDB\n", "response = collection.insert_many(chunk_data)\n", @@ -399,21 +406,21 @@ "\n", "# Create your index model, then create the search index\n", "search_index_model = SearchIndexModel(\n", - " definition={\n", - " \"fields\": [\n", - " {\n", - " \"type\": \"vector\",\n", - " \"path\": \"embedding\",\n", - " \"numDimensions\": 1024,\n", - " \"similarity\": \"dotProduct\"\n", - " }\n", - " ]\n", - " },\n", - " name=\"vector_index\",\n", - " type=\"vectorSearch\"\n", + " definition={\n", + " \"fields\": [\n", + " {\n", + " \"type\": \"vector\",\n", + " \"path\": \"embedding\",\n", + " \"numDimensions\": 1024,\n", + " \"similarity\": \"dotProduct\",\n", + " }\n", + " ]\n", + " },\n", + " name=\"vector_index\",\n", + " type=\"vectorSearch\",\n", ")\n", "result = collection.create_search_index(model=search_index_model)\n", - "print(\"New search index named \" + result + \" is building.\")\n" + "print(\"New search index named \" + result + \" is building.\")" ] }, { @@ -524,21 +531,24 @@ } ], "source": [ + "import os\n", + "\n", + "from openai import AzureOpenAI\n", "from rich.console import Console\n", "from rich.panel import Panel\n", - "from openai import AzureOpenAI\n", - "import os\n", "\n", "# Create MongoDB vector search query for \"Attention is All You Need\"\n", "# (prompt already defined above, reuse if present; else keep this definition)\n", "prompt = \"Give me top 3 learning points from `Attention is All You Need`, using only the retrieved context.\"\n", "\n", "# Generate embedding for the query using VoyageAI (vo already initialized earlier)\n", - "query_embd_context = vo.contextualized_embed(\n", - " inputs=[[prompt]],\n", - " model=\"voyage-context-3\",\n", - " input_type=\"query\"\n", - ").results[0].embeddings[0]\n", + "query_embd_context = (\n", + " vo.contextualized_embed(\n", + " inputs=[[prompt]], model=\"voyage-context-3\", input_type=\"query\"\n", + " )\n", + " .results[0]\n", + " .embeddings[0]\n", + ")\n", "\n", "# Vector search pipeline\n", "search_pipeline = [\n", @@ -548,21 +558,17 @@ " \"path\": \"embedding\",\n", " \"queryVector\": query_embd_context,\n", " \"numCandidates\": 10,\n", - " \"limit\": 10\n", + " \"limit\": 10,\n", " }\n", " },\n", - " {\n", - " \"$project\": {\n", - " \"text\": 1,\n", - " \"_id\": 0,\n", - " \"score\": {\"$meta\": \"vectorSearchScore\"}\n", - " }\n", - " }\n", + " {\"$project\": {\"text\": 1, \"_id\": 0, \"score\": {\"$meta\": \"vectorSearchScore\"}}},\n", "]\n", "\n", "results = list(collection.aggregate(search_pipeline))\n", "if not results:\n", - " raise ValueError(\"No vector search results returned. Verify the index is built before querying.\")\n", + " raise ValueError(\n", + " \"No vector search results returned. Verify the index is built before querying.\"\n", + " )\n", "\n", "context_texts = [doc[\"text\"] for doc in results]\n", "combined_context = \"\\n\\n\".join(context_texts)\n", @@ -579,7 +585,7 @@ "client = AzureOpenAI(\n", " api_key=AZURE_OPENAI_API_KEY,\n", " azure_endpoint=AZURE_OPENAI_ENDPOINT.rstrip(\"/\"),\n", - " api_version=AZURE_OPENAI_API_VERSION\n", + " api_version=AZURE_OPENAI_API_VERSION,\n", ")\n", "\n", "# Chat completion using retrieved context\n", @@ -588,21 +594,23 @@ " messages=[\n", " {\n", " \"role\": \"system\",\n", - " \"content\": \"You are a helpful assistant. Use only the provided context to answer questions. If the context is insufficient, say so.\"\n", + " \"content\": \"You are a helpful assistant. Use only the provided context to answer questions. If the context is insufficient, say so.\",\n", " },\n", " {\n", " \"role\": \"user\",\n", - " \"content\": f\"Context:\\n{combined_context}\\n\\nQuestion: {prompt}\"\n", - " }\n", + " \"content\": f\"Context:\\n{combined_context}\\n\\nQuestion: {prompt}\",\n", + " },\n", " ],\n", - " temperature=0.2\n", + " temperature=0.2,\n", ")\n", "\n", "response_text = response.choices[0].message.content\n", "\n", "console = Console()\n", "console.print(Panel(f\"{prompt}\", title=\"Prompt\", border_style=\"bold red\"))\n", - "console.print(Panel(response_text, title=\"Generated Content\", border_style=\"bold green\"))" + "console.print(\n", + " Panel(response_text, title=\"Generated Content\", border_style=\"bold green\")\n", + ")" ] }, { diff --git a/mkdocs.yml b/mkdocs.yml index 4d375b7c..dd83136e 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -123,6 +123,7 @@ nav: - examples/rag_opensearch.ipynb - examples/rag_weaviate.ipynb - examples/retrieval_qdrant.ipynb + - examples/rag_mongodb.ipynb - Integrations: - Integrations: integrations/index.md - 🤖 Agentic / AI dev frameworks: