mirror of
https://github.com/pinecone-io/examples.git
synced 2023-10-11 20:04:54 +03:00
added delete index at end
This commit is contained in:
@@ -25,11 +25,13 @@
|
||||
"source": [
|
||||
"import pinecone\n",
|
||||
"\n",
|
||||
"index_name = \"fine-tune-vector-search\"\n",
|
||||
"\n",
|
||||
"pinecone.init(\n",
|
||||
" api_key=\"YOUR_API_KEY\",\n",
|
||||
" environment=\"YOUR_ENV\" # find next to API key in console\n",
|
||||
")\n",
|
||||
"index = pinecone.Index(\"imagenet-query-trainer-clip\")"
|
||||
"index = pinecone.Index(index_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1363,6 +1365,24 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Once we've finished training the classifier we can delete our Pinecone index to save resources."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pinecone.delete_index(index_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
|
||||
@@ -57,29 +57,30 @@
|
||||
"source": [
|
||||
"import pinecone # pip install pinecone-client\n",
|
||||
"\n",
|
||||
"API_KEY = \"YOUR_API_KEY\" # get api key app.pinecone.io\n",
|
||||
"index_name = \"negative-mining\"\n",
|
||||
"\n",
|
||||
"pinecone.init(\n",
|
||||
" api_key=API_KEY,\n",
|
||||
" api_key=\"YOUR_API_KEY\", # app.pinecone.io\n",
|
||||
" environment=\"YOUR_ENV\" # find next to API key in console\n",
|
||||
")\n",
|
||||
"# create a new negative mining index if does not already exist\n",
|
||||
"if 'negative-mine' not in pinecone.list_indexes():\n",
|
||||
"if index_name not in pinecone.list_indexes():\n",
|
||||
" pinecone.create_index(\n",
|
||||
" 'negative-mine',\n",
|
||||
" index_name,\n",
|
||||
" dimension=model.get_sentence_embedding_dimension(),\n",
|
||||
" metric='dotproduct',\n",
|
||||
" pods=1\n",
|
||||
" )\n",
|
||||
"# connect\n",
|
||||
"index = pinecone.Index('negative-mine')"
|
||||
"index = pinecone.Index(index_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now we encode the passages and store in the `negative-mine` index."
|
||||
"Now we encode the passages and store in the `negative-mining` index."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -227,7 +228,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pinecone.delete_index('negative-mine') # delete the index when done to avoid higher charges (if using multiple pods)"
|
||||
"pinecone.delete_index(index_name) # delete the index when done to avoid higher charges (if using multiple pods)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -260,7 +261,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.7 (main, Sep 14 2022, 22:38:23) [Clang 14.0.0 (clang-1400.0.29.102)]"
|
||||
"version": "3.10.9"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
|
||||
@@ -431,8 +431,8 @@
|
||||
"import pinecone\n",
|
||||
"\n",
|
||||
"pinecone.init(\n",
|
||||
" api_key='<<YOUR_API_KEY>>', # app.pinecone.io\n",
|
||||
" environment='us-west1-gcp'\n",
|
||||
" api_key='YOUR_API_KEY', # app.pinecone.io\n",
|
||||
" environment='YOUR_ENV' # find next to API key in console\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -543,7 +543,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.7 (main, Sep 14 2022, 22:38:23) [Clang 14.0.0 (clang-1400.0.29.102)]"
|
||||
"version": "3.10.9"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
|
||||
@@ -42,8 +42,8 @@
|
||||
"import pinecone\n",
|
||||
"\n",
|
||||
"pinecone.init(\n",
|
||||
" api_key='<<YOUR_API_KEY>>', # app.pinecone.io\n",
|
||||
" environment='us-west1-gcp'\n",
|
||||
" api_key='YOUR_API_KEY', # app.pinecone.io\n",
|
||||
" environment='YOUR_ENV' # find next to api key in console\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"index_id = 'arxiv-search'\n",
|
||||
@@ -833,6 +833,34 @@
|
||||
"source": [
|
||||
"In this case it seems the best result is in position *5* (still not bad out of 2M+ abstracts) and the top result seems relevant but might not quite answer our query."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "67dfc5ea",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Once we're finished, we delete the index to save resources."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "144fd999",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pinecone.delete_index(index_id)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "14fa79c2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -857,7 +885,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.7 (main, Sep 14 2022, 22:38:23) [Clang 14.0.0 (clang-1400.0.29.102)]"
|
||||
"version": "3.10.9"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
|
||||
@@ -1028,6 +1028,31 @@
|
||||
"source": [
|
||||
"The advantage of Tensorflow.js could have been framed better and the fact that PyTorch has no equivalent explicitly stated. However, the answer is good and gives a nice summary and answer to our question — using information pulled from multiple sources."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Once you're finished with the index we delete it to save resources:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pinecone.delete_index(index_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -988,6 +988,31 @@
|
||||
"source": [
|
||||
"And we get a pretty great answer straight away, specifying to use _multiple-rankings loss_ (also called _multiple negatives ranking loss_)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Once we're done with the index we delete it to save resources:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pinecone.delete_index(index_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -1009,7 +1034,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.12 (main, Apr 5 2022, 01:52:34) \n[Clang 12.0.0 ]"
|
||||
"version": "3.9.12"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
|
||||
@@ -946,13 +946,31 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Looks great, our semantic search pipeline is clearly able to identify the meaning between each of our queries and return the most semantically similar questions from the already indexed questions.\n",
|
||||
"\n",
|
||||
"Once we're done with the index we delete it to save resources:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"piencone.delete_index(index_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "gR-oj6C9NA1W"
|
||||
},
|
||||
"source": [
|
||||
"Looks great, our semantic search pipeline is clearly able to identify the meaning between each of our queries and return the most semantically similar questions from the already indexed questions.\n",
|
||||
"\n",
|
||||
"---"
|
||||
]
|
||||
}
|
||||
|
||||
@@ -1020,6 +1020,31 @@
|
||||
"for match in res['results'][0]['matches']:\n",
|
||||
" print(f\"{match['score']:.2f}: {match['metadata']['title']} ({match['metadata']['link_flair_text']})\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Once we're finished with the index we delete it to save resources."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pinecone.delete_index(index_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -1044,7 +1069,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.7 (main, Sep 14 2022, 22:38:23) [Clang 14.0.0 (clang-1400.0.29.102)]"
|
||||
"version": "3.10.9"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
|
||||
@@ -404,11 +404,11 @@
|
||||
"import pinecone\n",
|
||||
"\n",
|
||||
"pinecone.init(\n",
|
||||
" api_key='<<PINECONE_API_KEY>>', # app.pinecone.io\n",
|
||||
" api_key='PINECONE_API_KEY', # app.pinecone.io\n",
|
||||
" environment=\"YOUR_ENV\" # find next to API key in console\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"index_name = 'apr-demo'\n",
|
||||
"index_name = 'beyond-search-openai'\n",
|
||||
"\n",
|
||||
"if not index_name in pinecone.list_indexes():\n",
|
||||
" pinecone.create_index(\n",
|
||||
@@ -515,7 +515,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.7 (main, Sep 14 2022, 22:38:23) [Clang 14.0.0 (clang-1400.0.29.102)]"
|
||||
"version": "3.10.9"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
"import openai\n",
|
||||
"from openai.embeddings_utils import get_embedding\n",
|
||||
"\n",
|
||||
"openai.api_key = '<<OPENAI_API_KEY>>' # beta.openai.com/login/"
|
||||
"openai.api_key = 'OPENAI_API_KEY' # platform.openai.com/login/"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -63,11 +63,11 @@
|
||||
"\n",
|
||||
"def load_index():\n",
|
||||
" pinecone.init(\n",
|
||||
" api_key='<<PINECONE_API_KEY>>', # app.pinecone.io\n",
|
||||
" api_key='PINECONE_API_KEY', # app.pinecone.io\n",
|
||||
" environment=\"YOUR_ENV\" # find next to API key in console\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" index_name = 'apr-demo'\n",
|
||||
" index_name = 'beyond-search-openai'\n",
|
||||
"\n",
|
||||
" if not index_name in pinecone.list_indexes():\n",
|
||||
" raise KeyError(f\"Index '{index_name}' does not exist.\")\n",
|
||||
@@ -456,6 +456,23 @@
|
||||
"print(answer_question(index, question=\"How can I use embeddings to visualize my data?\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Once you're finished with the index, delete it to save resources:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pinecone.delete_index(index_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -3854,16 +3854,18 @@
|
||||
"source": [
|
||||
"import pinecone\n",
|
||||
"\n",
|
||||
"index_name = 'semantic-search-openai'\n",
|
||||
"\n",
|
||||
"# initialize connection to pinecone (get API key at app.pinecone.io)\n",
|
||||
"pinecone.init(\n",
|
||||
" api_key=\"<<PINECONE API KEY>>\",\n",
|
||||
" environment=\"us-west1-gcp\"\n",
|
||||
" api_key=\"PINECONE_API_KEY\",\n",
|
||||
" environment=\"YOUR_ENV\" # find next to api key in console\n",
|
||||
")\n",
|
||||
"# check if 'openai' index already exists (only create index if not)\n",
|
||||
"if 'openai' not in pinecone.list_indexes():\n",
|
||||
" pinecone.create_index('openai', dimension=len(embeds[0]))\n",
|
||||
"if index_name not in pinecone.list_indexes():\n",
|
||||
" pinecone.create_index(index_name, dimension=len(embeds[0]))\n",
|
||||
"# connect to index\n",
|
||||
"index = pinecone.Index('openai')"
|
||||
"index = pinecone.Index(index_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -4233,13 +4235,31 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Looks great, our semantic search pipeline is clearly able to identify the meaning between each of our queries and return the most semantically similar questions from the already indexed questions.\n",
|
||||
"\n",
|
||||
"Once we're finished with the index we delete it to save resources."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pinecone.delete_index(index_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "ItsvY1lej6dz"
|
||||
},
|
||||
"source": [
|
||||
"Looks great, our semantic search pipeline is clearly able to identify the meaning between each of our queries and return the most semantically similar questions from the already indexed questions.\n",
|
||||
"\n",
|
||||
"---"
|
||||
]
|
||||
}
|
||||
@@ -4263,7 +4283,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.7 (main, Sep 14 2022, 22:38:23) [Clang 14.0.0 (clang-1400.0.29.102)]"
|
||||
"version": "3.10.9"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
|
||||
@@ -1556,6 +1556,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "KxQuX6BaMWEa"
|
||||
@@ -1568,15 +1569,15 @@
|
||||
"To build the vector DB we will need to index everything, for this we will need to initialize our connection to Pinecone, create an index, and insert everything in the format:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"pinecone.Vector(\n",
|
||||
" id=\"id\",\n",
|
||||
" values=[0.1, 0.2, ...], # dense vec\n",
|
||||
" sparse_values=pinecone.SparseValues(\n",
|
||||
" indices=[\"23\", \"718\"],\n",
|
||||
" values=[0.25, 0.77]\n",
|
||||
" ), # sparse vec\n",
|
||||
" metadata=(<Struct object>, {\"context\": \"some text here\"}) # metadata dict\n",
|
||||
")\n",
|
||||
"{\n",
|
||||
" 'id': 'id-123',\n",
|
||||
" 'values': [0.1, 0.2, ...], # dense vec\n",
|
||||
" 'sparse_values': {\n",
|
||||
" 'indices': [23, 718],\n",
|
||||
" 'values': [0.25, 0.77]\n",
|
||||
" }, # sparse vec\n",
|
||||
" 'metadata': {\"context\": \"some text here\"} # metadata dict\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"To make things easier we can create a helper function to transform a list of records from `data` into this format, we'll call it `builder`:"
|
||||
@@ -1590,7 +1591,6 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.protobuf.struct_pb2 import Struct\n",
|
||||
"import pinecone\n",
|
||||
"\n",
|
||||
"\n",
|
||||
@@ -1608,29 +1608,26 @@
|
||||
" sparse_vecs = sparse_model(\n",
|
||||
" d_kwargs=input_ids.to(device)\n",
|
||||
" )['d_rep'].squeeze()\n",
|
||||
" # convert to SparseValues format\n",
|
||||
" # convert to upsert format\n",
|
||||
" upserts = []\n",
|
||||
" for _id, dense_vec, sparse_vec, context in zip(ids, dense_vecs, sparse_vecs, contexts):\n",
|
||||
" # extract columns where there are non-zero weights\n",
|
||||
" indices = sparse_vec.nonzero().squeeze().cpu().tolist() # positions\n",
|
||||
" values = sparse_vec[indices].cpu().tolist() # weights/scores\n",
|
||||
" # build sparse dictionary\n",
|
||||
" sparse_dict = {\n",
|
||||
" # build sparse values dictionary\n",
|
||||
" sparse_values = {\n",
|
||||
" \"indices\": indices,\n",
|
||||
" \"values\": values\n",
|
||||
" }\n",
|
||||
" # create SparseValues (or GRPCSparseValues)\n",
|
||||
" sparse_values = pinecone.GRPCSparseValues(**sparse_dict)\n",
|
||||
" # build metadata struct\n",
|
||||
" metadata = Struct()\n",
|
||||
" metadata.update({'context': context})\n",
|
||||
" metadata = {'context': context}\n",
|
||||
" # append all to upserts list as pinecone.Vector (or GRPCVector)\n",
|
||||
" upserts.append(\n",
|
||||
" pinecone.GRPCVector(\n",
|
||||
" id=_id, values=dense_vec, sparse_values=sparse_values,\n",
|
||||
" metadata=metadata\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" upserts.append({\n",
|
||||
" 'id': _id,\n",
|
||||
" 'values': dense_vec,\n",
|
||||
" 'sparse_values': sparse_values,\n",
|
||||
" 'metadata': metadata\n",
|
||||
" })\n",
|
||||
" return upserts"
|
||||
]
|
||||
},
|
||||
@@ -4910,7 +4907,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"index_name = 'splade-pubmed'\n",
|
||||
"index_name = 'pubmed-splade'\n",
|
||||
"\n",
|
||||
"pinecone.create_index(\n",
|
||||
" index_name,\n",
|
||||
|
||||
@@ -2026,6 +2026,25 @@
|
||||
"We leave this to the reader to assess, as it is subjective. One thing to notice is the the similarity scores are a bit lower when retreiving from the top news sources. This is not surprising, since one might expect relevant results to come from more scientific sources such as _climatecentral.org_ and _energylivenews.com_, like in the non-filtered query."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "f90ff64f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"After we've finished, delete the index to save resources."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "98a648b4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pinecone.delete_index(INDEX_NAME)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d3a42e36",
|
||||
|
||||
Reference in New Issue
Block a user