docs: Add example with MongoDB

This commit is contained in:
utsavMongoDB
2025-10-03 15:11:01 +05:30
parent 5c7a249291
commit 25436e543c
2 changed files with 55 additions and 46 deletions

View File

@@ -68,7 +68,7 @@
"import warnings\n",
"\n",
"warnings.filterwarnings(\"ignore\")\n",
"logging.getLogger(\"pymongo\").setLevel(logging.ERROR)\n"
"logging.getLogger(\"pymongo\").setLevel(logging.ERROR)"
]
},
{
@@ -185,8 +185,10 @@
}
],
"source": [
"from docling.document_converter import DocumentConverter\n",
"from pprint import pprint\n",
"\n",
"from docling.document_converter import DocumentConverter\n",
"\n",
"# Instantiate the doc converter\n",
"doc_converter = DocumentConverter()\n",
"\n",
@@ -288,11 +290,11 @@
},
"outputs": [],
"source": [
"import voyageai\n",
"\n",
"# Voyage API key\n",
"VOYAGE_API_KEY = \"**********************\"\n",
"\n",
"import voyageai\n",
"\n",
"# Initialize the VoyageAI client\n",
"vo = voyageai.Client(VOYAGE_API_KEY)\n",
"result = vo.contextualized_embed(inputs=[chunk_texts], model=\"voyage-context-3\")\n",
@@ -326,7 +328,10 @@
"outputs": [],
"source": [
"# Combine chunks with their embeddings\n",
"chunk_data = [{\"text\": text, \"embedding\": emb} for text, emb in zip(chunk_texts, contextualized_chunk_embds)]"
"chunk_data = [\n",
" {\"text\": text, \"embedding\": emb}\n",
" for text, emb in zip(chunk_texts, contextualized_chunk_embds)\n",
"]"
]
},
{
@@ -362,7 +367,9 @@
"# Insert to MongoDB\n",
"from pymongo import MongoClient\n",
"\n",
"client = MongoClient(\"mongodb+srv://*******.mongodb.net/\") # Replace with your MongoDB connection string\n",
"client = MongoClient(\n",
" \"mongodb+srv://*******.mongodb.net/\"\n",
") # Replace with your MongoDB connection string\n",
"db = client[\"rag_db\"] # Database name\n",
"collection = db[\"documents\"] # Collection name\n",
"\n",
@@ -405,15 +412,15 @@
" \"type\": \"vector\",\n",
" \"path\": \"embedding\",\n",
" \"numDimensions\": 1024,\n",
" \"similarity\": \"dotProduct\"\n",
" \"similarity\": \"dotProduct\",\n",
" }\n",
" ]\n",
" },\n",
" name=\"vector_index\",\n",
" type=\"vectorSearch\"\n",
" type=\"vectorSearch\",\n",
")\n",
"result = collection.create_search_index(model=search_index_model)\n",
"print(\"New search index named \" + result + \" is building.\")\n"
"print(\"New search index named \" + result + \" is building.\")"
]
},
{
@@ -524,21 +531,24 @@
}
],
"source": [
"import os\n",
"\n",
"from openai import AzureOpenAI\n",
"from rich.console import Console\n",
"from rich.panel import Panel\n",
"from openai import AzureOpenAI\n",
"import os\n",
"\n",
"# Create MongoDB vector search query for \"Attention is All You Need\"\n",
"# (prompt already defined above, reuse if present; else keep this definition)\n",
"prompt = \"Give me top 3 learning points from `Attention is All You Need`, using only the retrieved context.\"\n",
"\n",
"# Generate embedding for the query using VoyageAI (vo already initialized earlier)\n",
"query_embd_context = vo.contextualized_embed(\n",
" inputs=[[prompt]],\n",
" model=\"voyage-context-3\",\n",
" input_type=\"query\"\n",
").results[0].embeddings[0]\n",
"query_embd_context = (\n",
" vo.contextualized_embed(\n",
" inputs=[[prompt]], model=\"voyage-context-3\", input_type=\"query\"\n",
" )\n",
" .results[0]\n",
" .embeddings[0]\n",
")\n",
"\n",
"# Vector search pipeline\n",
"search_pipeline = [\n",
@@ -548,21 +558,17 @@
" \"path\": \"embedding\",\n",
" \"queryVector\": query_embd_context,\n",
" \"numCandidates\": 10,\n",
" \"limit\": 10\n",
" \"limit\": 10,\n",
" }\n",
" },\n",
" {\n",
" \"$project\": {\n",
" \"text\": 1,\n",
" \"_id\": 0,\n",
" \"score\": {\"$meta\": \"vectorSearchScore\"}\n",
" }\n",
" }\n",
" {\"$project\": {\"text\": 1, \"_id\": 0, \"score\": {\"$meta\": \"vectorSearchScore\"}}},\n",
"]\n",
"\n",
"results = list(collection.aggregate(search_pipeline))\n",
"if not results:\n",
" raise ValueError(\"No vector search results returned. Verify the index is built before querying.\")\n",
" raise ValueError(\n",
" \"No vector search results returned. Verify the index is built before querying.\"\n",
" )\n",
"\n",
"context_texts = [doc[\"text\"] for doc in results]\n",
"combined_context = \"\\n\\n\".join(context_texts)\n",
@@ -579,7 +585,7 @@
"client = AzureOpenAI(\n",
" api_key=AZURE_OPENAI_API_KEY,\n",
" azure_endpoint=AZURE_OPENAI_ENDPOINT.rstrip(\"/\"),\n",
" api_version=AZURE_OPENAI_API_VERSION\n",
" api_version=AZURE_OPENAI_API_VERSION,\n",
")\n",
"\n",
"# Chat completion using retrieved context\n",
@@ -588,21 +594,23 @@
" messages=[\n",
" {\n",
" \"role\": \"system\",\n",
" \"content\": \"You are a helpful assistant. Use only the provided context to answer questions. If the context is insufficient, say so.\"\n",
" \"content\": \"You are a helpful assistant. Use only the provided context to answer questions. If the context is insufficient, say so.\",\n",
" },\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": f\"Context:\\n{combined_context}\\n\\nQuestion: {prompt}\"\n",
" }\n",
" \"content\": f\"Context:\\n{combined_context}\\n\\nQuestion: {prompt}\",\n",
" },\n",
" ],\n",
" temperature=0.2\n",
" temperature=0.2,\n",
")\n",
"\n",
"response_text = response.choices[0].message.content\n",
"\n",
"console = Console()\n",
"console.print(Panel(f\"{prompt}\", title=\"Prompt\", border_style=\"bold red\"))\n",
"console.print(Panel(response_text, title=\"Generated Content\", border_style=\"bold green\"))"
"console.print(\n",
" Panel(response_text, title=\"Generated Content\", border_style=\"bold green\")\n",
")"
]
},
{

View File

@@ -123,6 +123,7 @@ nav:
- examples/rag_opensearch.ipynb
- examples/rag_weaviate.ipynb
- examples/retrieval_qdrant.ipynb
- examples/rag_mongodb.ipynb
- Integrations:
- Integrations: integrations/index.md
- 🤖 Agentic / AI dev frameworks: