Files
KG_RAG/.test_notebooks/llama_index_test.ipynb
2024-03-22 16:18:59 -07:00

255 lines
5.9 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "e98fb0c6",
"metadata": {},
"outputs": [],
"source": [
"'''\n",
"Ref:\n",
" https://docs.llamaindex.ai/en/stable/examples/query_engine/knowledge_graph_rag_query_engine.html\n",
" https://docs.llamaindex.ai/en/stable/examples/index_structs/knowledge_graph/Neo4jKGIndexDemo.html\n",
"'''\n",
"\n",
"from llama_index.core import StorageContext\n",
"from llama_index.graph_stores.neo4j import Neo4jGraphStore\n",
"from llama_index.core.query_engine import RetrieverQueryEngine\n",
"from llama_index.core.retrievers import KnowledgeGraphRAGRetriever\n",
"from llama_index.llms.openai import OpenAI\n",
"from llama_index.core import Settings\n",
"from llama_index.llms.azure_openai import AzureOpenAI\n",
"from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding\n",
"\n",
"import os\n",
"from dotenv import load_dotenv, find_dotenv\n",
"\n",
"from IPython.display import display, Markdown\n"
]
},
{
"cell_type": "code",
"execution_count": 66,
"id": "54aa26f3",
"metadata": {},
"outputs": [],
"source": [
"load_dotenv(os.path.join(os.path.expanduser('~'), '.gpt_config.env'))\n",
"api_key = os.environ.get('API_KEY')\n",
"azure_endpoint = os.environ.get('RESOURCE_ENDPOINT')\n",
"api_version = os.environ.get('API_VERSION')\n",
"model=\"gpt-35-turbo\"\n",
"\n",
"llm = AzureOpenAI(\n",
" model=model,\n",
" deployment_name=model,\n",
" api_key=api_key,\n",
" azure_endpoint=azure_endpoint,\n",
" api_version=api_version,\n",
")\n",
"\n",
"Settings.llm = llm\n",
"\n",
"\n",
"embed_model = AzureOpenAIEmbedding(\n",
" model=\"text-embedding-ada-002\",\n",
" deployment_name=\"text-embedding-ada-002\",\n",
" api_key=api_key,\n",
" azure_endpoint=azure_endpoint,\n",
" api_version=api_version,\n",
")\n",
"\n",
"Settings.embed_model = embed_model\n",
"Settings.chunk_size = 512\n",
"\n",
"# Settings.llm = None\n"
]
},
{
"cell_type": "code",
"execution_count": 67,
"id": "cc0e4e67",
"metadata": {},
"outputs": [],
"source": [
"load_dotenv(os.path.join(os.path.expanduser('~'), '.spoke_neo4j_config.env'))\n",
"username = os.environ.get('NEO4J_USER')\n",
"password = os.environ.get('NEO4J_PSW')\n",
"url = os.environ.get('NEO4J_URL')\n",
"database = os.environ.get('NEO4J_DB')\n"
]
},
{
"cell_type": "code",
"execution_count": 68,
"id": "5b8e6b05",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 78.5 ms, sys: 8.57 ms, total: 87.1 ms\n",
"Wall time: 14.3 s\n"
]
}
],
"source": [
"%%time\n",
"\n",
"graph_store = Neo4jGraphStore(\n",
" username=username,\n",
" password=password,\n",
" url=url,\n",
" database=database\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 69,
"id": "5c26c852",
"metadata": {},
"outputs": [],
"source": [
"storage_context = StorageContext.from_defaults(graph_store=graph_store)\n"
]
},
{
"cell_type": "code",
"execution_count": 70,
"id": "29ee8d5e",
"metadata": {},
"outputs": [],
"source": [
"# storage_context.graph_store.get_schema()\n"
]
},
{
"cell_type": "code",
"execution_count": 71,
"id": "b3bf11d7",
"metadata": {},
"outputs": [],
"source": [
"graph_rag_retriever = KnowledgeGraphRAGRetriever(\n",
" storage_context=storage_context,\n",
" verbose=True,\n",
" retriever_mode='keyword',\n",
"# with_nl2graphquery=True,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 72,
"id": "146ece63",
"metadata": {},
"outputs": [],
"source": [
"query_engine = RetrieverQueryEngine.from_args(\n",
" graph_rag_retriever,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 73,
"id": "d71cc63a",
"metadata": {},
"outputs": [
{
"data": {
"text/markdown": [
"<b>Empty Response</b>"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 57.6 ms, sys: 5.54 ms, total: 63.2 ms\n",
"Wall time: 1.66 s\n"
]
}
],
"source": [
"%%time\n",
"\n",
"# response = await query_engine.aquery(\n",
" \n",
"# )\n",
"\n",
"# response = query_engine.query(\n",
"# \"What are the genes associated with hypochondrogenesis?\",\n",
"# )\n",
"\n",
"response = query_engine.query(\n",
" \"What do you know about hypochondrogenesis?\",\n",
")\n",
"display(Markdown(f\"<b>{response}</b>\"))"
]
},
{
"cell_type": "code",
"execution_count": 74,
"id": "b76767a0",
"metadata": {},
"outputs": [
{
"data": {
"text/markdown": [
"<b>Empty Response</b>"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"\n",
"display(Markdown(f\"<b>{response}</b>\"))\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "de5794ed",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}