docs: Add example with MongoDB

This commit is contained in:
utsavMongoDB
2025-10-03 15:11:01 +05:30
parent 5c7a249291
commit 25436e543c
2 changed files with 55 additions and 46 deletions

View File

@@ -68,7 +68,7 @@
"import warnings\n", "import warnings\n",
"\n", "\n",
"warnings.filterwarnings(\"ignore\")\n", "warnings.filterwarnings(\"ignore\")\n",
"logging.getLogger(\"pymongo\").setLevel(logging.ERROR)\n" "logging.getLogger(\"pymongo\").setLevel(logging.ERROR)"
] ]
}, },
{ {
@@ -134,8 +134,8 @@
"source": [ "source": [
"# Influential machine learning papers\n", "# Influential machine learning papers\n",
"source_urls = [\n", "source_urls = [\n",
" \"https://arxiv.org/pdf/1706.03762\" # Attention is All You Need\n", " \"https://arxiv.org/pdf/1706.03762\" # Attention is All You Need\n",
" ]" "]"
] ]
}, },
{ {
@@ -185,8 +185,10 @@
} }
], ],
"source": [ "source": [
"from docling.document_converter import DocumentConverter\n",
"from pprint import pprint\n", "from pprint import pprint\n",
"\n",
"from docling.document_converter import DocumentConverter\n",
"\n",
"# Instantiate the doc converter\n", "# Instantiate the doc converter\n",
"doc_converter = DocumentConverter()\n", "doc_converter = DocumentConverter()\n",
"\n", "\n",
@@ -288,11 +290,11 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"# Voyage API key\n",
"VOYAGE_API_KEY=\"**********************\" \n",
"\n",
"import voyageai\n", "import voyageai\n",
"\n", "\n",
"# Voyage API key\n",
"VOYAGE_API_KEY = \"**********************\"\n",
"\n",
"# Initialize the VoyageAI client\n", "# Initialize the VoyageAI client\n",
"vo = voyageai.Client(VOYAGE_API_KEY)\n", "vo = voyageai.Client(VOYAGE_API_KEY)\n",
"result = vo.contextualized_embed(inputs=[chunk_texts], model=\"voyage-context-3\")\n", "result = vo.contextualized_embed(inputs=[chunk_texts], model=\"voyage-context-3\")\n",
@@ -326,7 +328,10 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# Combine chunks with their embeddings\n", "# Combine chunks with their embeddings\n",
"chunk_data = [{\"text\": text, \"embedding\": emb} for text, emb in zip(chunk_texts, contextualized_chunk_embds)]" "chunk_data = [\n",
" {\"text\": text, \"embedding\": emb}\n",
" for text, emb in zip(chunk_texts, contextualized_chunk_embds)\n",
"]"
] ]
}, },
{ {
@@ -362,9 +367,11 @@
"# Insert to MongoDB\n", "# Insert to MongoDB\n",
"from pymongo import MongoClient\n", "from pymongo import MongoClient\n",
"\n", "\n",
"client = MongoClient(\"mongodb+srv://*******.mongodb.net/\") # Replace with your MongoDB connection string\n", "client = MongoClient(\n",
"db = client[\"rag_db\"] # Database name\n", " \"mongodb+srv://*******.mongodb.net/\"\n",
"collection = db[\"documents\"] # Collection name\n", ") # Replace with your MongoDB connection string\n",
"db = client[\"rag_db\"] # Database name\n",
"collection = db[\"documents\"] # Collection name\n",
"\n", "\n",
"# Insert chunk data into MongoDB\n", "# Insert chunk data into MongoDB\n",
"response = collection.insert_many(chunk_data)\n", "response = collection.insert_many(chunk_data)\n",
@@ -399,21 +406,21 @@
"\n", "\n",
"# Create your index model, then create the search index\n", "# Create your index model, then create the search index\n",
"search_index_model = SearchIndexModel(\n", "search_index_model = SearchIndexModel(\n",
" definition={\n", " definition={\n",
" \"fields\": [\n", " \"fields\": [\n",
" {\n", " {\n",
" \"type\": \"vector\",\n", " \"type\": \"vector\",\n",
" \"path\": \"embedding\",\n", " \"path\": \"embedding\",\n",
" \"numDimensions\": 1024,\n", " \"numDimensions\": 1024,\n",
" \"similarity\": \"dotProduct\"\n", " \"similarity\": \"dotProduct\",\n",
" }\n", " }\n",
" ]\n", " ]\n",
" },\n", " },\n",
" name=\"vector_index\",\n", " name=\"vector_index\",\n",
" type=\"vectorSearch\"\n", " type=\"vectorSearch\",\n",
")\n", ")\n",
"result = collection.create_search_index(model=search_index_model)\n", "result = collection.create_search_index(model=search_index_model)\n",
"print(\"New search index named \" + result + \" is building.\")\n" "print(\"New search index named \" + result + \" is building.\")"
] ]
}, },
{ {
@@ -524,21 +531,24 @@
} }
], ],
"source": [ "source": [
"import os\n",
"\n",
"from openai import AzureOpenAI\n",
"from rich.console import Console\n", "from rich.console import Console\n",
"from rich.panel import Panel\n", "from rich.panel import Panel\n",
"from openai import AzureOpenAI\n",
"import os\n",
"\n", "\n",
"# Create MongoDB vector search query for \"Attention is All You Need\"\n", "# Create MongoDB vector search query for \"Attention is All You Need\"\n",
"# (prompt already defined above, reuse if present; else keep this definition)\n", "# (prompt already defined above, reuse if present; else keep this definition)\n",
"prompt = \"Give me top 3 learning points from `Attention is All You Need`, using only the retrieved context.\"\n", "prompt = \"Give me top 3 learning points from `Attention is All You Need`, using only the retrieved context.\"\n",
"\n", "\n",
"# Generate embedding for the query using VoyageAI (vo already initialized earlier)\n", "# Generate embedding for the query using VoyageAI (vo already initialized earlier)\n",
"query_embd_context = vo.contextualized_embed(\n", "query_embd_context = (\n",
" inputs=[[prompt]],\n", " vo.contextualized_embed(\n",
" model=\"voyage-context-3\",\n", " inputs=[[prompt]], model=\"voyage-context-3\", input_type=\"query\"\n",
" input_type=\"query\"\n", " )\n",
").results[0].embeddings[0]\n", " .results[0]\n",
" .embeddings[0]\n",
")\n",
"\n", "\n",
"# Vector search pipeline\n", "# Vector search pipeline\n",
"search_pipeline = [\n", "search_pipeline = [\n",
@@ -548,21 +558,17 @@
" \"path\": \"embedding\",\n", " \"path\": \"embedding\",\n",
" \"queryVector\": query_embd_context,\n", " \"queryVector\": query_embd_context,\n",
" \"numCandidates\": 10,\n", " \"numCandidates\": 10,\n",
" \"limit\": 10\n", " \"limit\": 10,\n",
" }\n", " }\n",
" },\n", " },\n",
" {\n", " {\"$project\": {\"text\": 1, \"_id\": 0, \"score\": {\"$meta\": \"vectorSearchScore\"}}},\n",
" \"$project\": {\n",
" \"text\": 1,\n",
" \"_id\": 0,\n",
" \"score\": {\"$meta\": \"vectorSearchScore\"}\n",
" }\n",
" }\n",
"]\n", "]\n",
"\n", "\n",
"results = list(collection.aggregate(search_pipeline))\n", "results = list(collection.aggregate(search_pipeline))\n",
"if not results:\n", "if not results:\n",
" raise ValueError(\"No vector search results returned. Verify the index is built before querying.\")\n", " raise ValueError(\n",
" \"No vector search results returned. Verify the index is built before querying.\"\n",
" )\n",
"\n", "\n",
"context_texts = [doc[\"text\"] for doc in results]\n", "context_texts = [doc[\"text\"] for doc in results]\n",
"combined_context = \"\\n\\n\".join(context_texts)\n", "combined_context = \"\\n\\n\".join(context_texts)\n",
@@ -579,7 +585,7 @@
"client = AzureOpenAI(\n", "client = AzureOpenAI(\n",
" api_key=AZURE_OPENAI_API_KEY,\n", " api_key=AZURE_OPENAI_API_KEY,\n",
" azure_endpoint=AZURE_OPENAI_ENDPOINT.rstrip(\"/\"),\n", " azure_endpoint=AZURE_OPENAI_ENDPOINT.rstrip(\"/\"),\n",
" api_version=AZURE_OPENAI_API_VERSION\n", " api_version=AZURE_OPENAI_API_VERSION,\n",
")\n", ")\n",
"\n", "\n",
"# Chat completion using retrieved context\n", "# Chat completion using retrieved context\n",
@@ -588,21 +594,23 @@
" messages=[\n", " messages=[\n",
" {\n", " {\n",
" \"role\": \"system\",\n", " \"role\": \"system\",\n",
" \"content\": \"You are a helpful assistant. Use only the provided context to answer questions. If the context is insufficient, say so.\"\n", " \"content\": \"You are a helpful assistant. Use only the provided context to answer questions. If the context is insufficient, say so.\",\n",
" },\n", " },\n",
" {\n", " {\n",
" \"role\": \"user\",\n", " \"role\": \"user\",\n",
" \"content\": f\"Context:\\n{combined_context}\\n\\nQuestion: {prompt}\"\n", " \"content\": f\"Context:\\n{combined_context}\\n\\nQuestion: {prompt}\",\n",
" }\n", " },\n",
" ],\n", " ],\n",
" temperature=0.2\n", " temperature=0.2,\n",
")\n", ")\n",
"\n", "\n",
"response_text = response.choices[0].message.content\n", "response_text = response.choices[0].message.content\n",
"\n", "\n",
"console = Console()\n", "console = Console()\n",
"console.print(Panel(f\"{prompt}\", title=\"Prompt\", border_style=\"bold red\"))\n", "console.print(Panel(f\"{prompt}\", title=\"Prompt\", border_style=\"bold red\"))\n",
"console.print(Panel(response_text, title=\"Generated Content\", border_style=\"bold green\"))" "console.print(\n",
" Panel(response_text, title=\"Generated Content\", border_style=\"bold green\")\n",
")"
] ]
}, },
{ {

View File

@@ -123,6 +123,7 @@ nav:
- examples/rag_opensearch.ipynb - examples/rag_opensearch.ipynb
- examples/rag_weaviate.ipynb - examples/rag_weaviate.ipynb
- examples/retrieval_qdrant.ipynb - examples/retrieval_qdrant.ipynb
- examples/rag_mongodb.ipynb
- Integrations: - Integrations:
- Integrations: integrations/index.md - Integrations: integrations/index.md
- 🤖 Agentic / AI dev frameworks: - 🤖 Agentic / AI dev frameworks: