From 14382ef8d3597d0ffe273350f49ed009160fee3c Mon Sep 17 00:00:00 2001
From: Roy Miara <miararoy@gmail.com>
Date: Mon, 7 Aug 2023 17:45:54 +0300
Subject: [PATCH 1/3] add how to notebook for ds

---
 .../how_to_create_pinecone_datasets.ipynb     | 560 ++++++++++++++++++
 1 file changed, 560 insertions(+)
 create mode 100644 docs/assets/how_to_create_pinecone_datasets.ipynb
diff --git a/docs/assets/how_to_create_pinecone_datasets.ipynb b/docs/assets/how_to_create_pinecone_datasets.ipynb
new file mode 100644
index 0000000..e637c83
--- /dev/null
+++ b/docs/assets/how_to_create_pinecone_datasets.ipynb
@@ -0,0 +1,560 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pinecone-io/examples/blob/master/docs/assets/how_to_create_pinecone_datasets.ipynb) \n",
+    "[![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/pinecone-io/examples/blob/master/docs/assets/how_to_create_pinecone_datasets.ipynb)\n",
+    "\n",
+    "# Creaeting Pinecone Datasets"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This notebook will walk you through the process of creating a Pinecone dataset from a pandas Dataframe."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 1: create a simple sample dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Defaulting to user installation because normal site-packages is not writeable\n",
+      "Requirement already satisfied: pandas in /Users/roymiara/Library/Python/3.9/lib/python/site-packages (2.0.2)\n",
+      "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/roymiara/Library/Python/3.9/lib/python/site-packages (from pandas) (2.8.2)\n",
+      "Requirement already satisfied: pytz>=2020.1 in /Users/roymiara/Library/Python/3.9/lib/python/site-packages (from pandas) (2022.2.1)\n",
+      "Requirement already satisfied: tzdata>=2022.1 in /Users/roymiara/Library/Python/3.9/lib/python/site-packages (from pandas) (2022.7)\n",
+      "Requirement already satisfied: numpy>=1.20.3 in /Users/roymiara/Library/Python/3.9/lib/python/site-packages (from pandas) (1.24.2)\n",
+      "Requirement already satisfied: six>=1.5 in /Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/site-packages (from python-dateutil>=2.8.2->pandas) (1.15.0)\n",
+      "\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.1.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "!pip install pandas"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>values</th>\n",
+       "      <th>sparse_values</th>\n",
+       "      <th>metadata</th>\n",
+       "      <th>blob</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>[0.1, 0.2, 0.3]</td>\n",
+       "      <td>{'indices': [1, 2, 3], 'values': [0.1, 0.2, 0.3]}</td>\n",
+       "      <td>{'title': 'title1', 'url': 'url1'}</td>\n",
+       "      <td>{'extra_field': 'extra_value'}</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>[0.4, 0.5, 0.6]</td>\n",
+       "      <td>{'indices': [4, 5, 6], 'values': [0.4, 0.5, 0.6]}</td>\n",
+       "      <td>{'title': 'title2', 'url': 'url2'}</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>[0.7, 0.8, 0.9]</td>\n",
+       "      <td>{'indices': [7, 8, 9], 'values': [0.7, 0.8, 0.9]}</td>\n",
+       "      <td>{'title': 'title3', 'url': 'url3'}</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>[1.0, 1.1, 1.2]</td>\n",
+       "      <td>{'indices': [10, 11, 12], 'values': [1.0, 1.1,...</td>\n",
+       "      <td>{'title': 'title4', 'url': 'url4'}</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>[1.3, 1.4, 1.5]</td>\n",
+       "      <td>{'indices': [13, 14, 15], 'values': [1.3, 1.4,...</td>\n",
+       "      <td>{'title': 'title5', 'url': 'url5'}</td>\n",
+       "      <td>{'another_field': 'another_value'}</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  id           values                                      sparse_values  \\\n",
+       "0  1  [0.1, 0.2, 0.3]  {'indices': [1, 2, 3], 'values': [0.1, 0.2, 0.3]}   \n",
+       "1  2  [0.4, 0.5, 0.6]  {'indices': [4, 5, 6], 'values': [0.4, 0.5, 0.6]}   \n",
+       "2  3  [0.7, 0.8, 0.9]  {'indices': [7, 8, 9], 'values': [0.7, 0.8, 0.9]}   \n",
+       "3  4  [1.0, 1.1, 1.2]  {'indices': [10, 11, 12], 'values': [1.0, 1.1,...   \n",
+       "4  5  [1.3, 1.4, 1.5]  {'indices': [13, 14, 15], 'values': [1.3, 1.4,...   \n",
+       "\n",
+       "                             metadata                                blob  \n",
+       "0  {'title': 'title1', 'url': 'url1'}      {'extra_field': 'extra_value'}  \n",
+       "1  {'title': 'title2', 'url': 'url2'}                                None  \n",
+       "2  {'title': 'title3', 'url': 'url3'}                                None  \n",
+       "3  {'title': 'title4', 'url': 'url4'}                                None  \n",
+       "4  {'title': 'title5', 'url': 'url5'}  {'another_field': 'another_value'}  "
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "documents = [\n",
+    "    {\n",
+    "        \"id\": \"1\",\n",
+    "        \"values\": [0.1, 0.2, 0.3],\n",
+    "        \"sparse_values\": {\"indices\": [1, 2, 3], \"values\": [0.1, 0.2, 0.3]},\n",
+    "        \"metadata\": {\"title\": \"title1\", \"url\": \"url1\"},\n",
+    "        \"blob\": {\"extra_field\": \"extra_value\"},\n",
+    "    },\n",
+    "    {\n",
+    "        \"id\": \"2\",\n",
+    "        \"values\": [0.4, 0.5, 0.6],\n",
+    "        \"sparse_values\": {\"indices\": [4, 5, 6], \"values\": [0.4, 0.5, 0.6]},\n",
+    "        \"metadata\": {\"title\": \"title2\", \"url\": \"url2\"},\n",
+    "        \"blob\": None,\n",
+    "    },\n",
+    "    {\n",
+    "        \"id\": \"3\",\n",
+    "        \"values\": [0.7, 0.8, 0.9],\n",
+    "        \"sparse_values\": {\"indices\": [7, 8, 9], \"values\": [0.7, 0.8, 0.9]},\n",
+    "        \"metadata\": {\"title\": \"title3\", \"url\": \"url3\"},\n",
+    "        \"blob\": None,\n",
+    "    },\n",
+    "    {\n",
+    "        \"id\": \"4\",\n",
+    "        \"values\": [1.0, 1.1, 1.2],\n",
+    "        \"sparse_values\": {\"indices\": [10, 11, 12], \"values\": [1.0, 1.1, 1.2]},\n",
+    "        \"metadata\": {\"title\": \"title4\", \"url\": \"url4\"},\n",
+    "        \"blob\": None,\n",
+    "    },\n",
+    "    {\n",
+    "        \"id\": \"5\",\n",
+    "        \"values\": [1.3, 1.4, 1.5],\n",
+    "        \"sparse_values\": {\"indices\": [13, 14, 15], \"values\": [1.3, 1.4, 1.5]},\n",
+    "        \"metadata\": {\"title\": \"title5\", \"url\": \"url5\"},\n",
+    "        \"blob\": {\"another_field\": \"another_value\"},\n",
+    "    }\n",
+    "]\n",
+    "\n",
+    "df = pd.DataFrame(documents)\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Some notes:\n",
+    "1. Note that we have both metadata field and 'blob' field, the metadata field is the acutal pinecone metadata we will use in our index, blob, is an additional field that we can use to store any additional information we want to store along with the Dataset.\n",
+    "2. here we used both 'values' and 'sparse_values', however, sparse_values is not a mandatory field, if you don't have sparse values keep it empty."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Pinecone Dataset\n",
+    "\n",
+    "Now that we have our data Ready, we can create a Pinecone Dataset. A Pinecone Dataset is a collection of documtents, queries and Metadata. We can create a Pinecone \n",
+    "* Documents: a collection of records with Id, Vectors (dense, sparse) and metadata\n",
+    "* Queries: a collection of queries with Vectors (dense, sparse), metadata filter and top_k\n",
+    "* Metadata: a defintion of the dataset: Name, dimension, metric, embedding models, etc."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install pinecone-datasets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pinecone_datasets import Dataset, DatasetMetadata"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'name': '',\n",
+       " 'created_at': '2023-08-07 17:13:11.949042',\n",
+       " 'documents': 0,\n",
+       " 'queries': 0,\n",
+       " 'source': None,\n",
+       " 'license': None,\n",
+       " 'bucket': None,\n",
+       " 'task': None,\n",
+       " 'dense_model': {'name': '', 'tokenizer': None, 'dimension': 0},\n",
+       " 'sparse_model': None,\n",
+       " 'description': None,\n",
+       " 'tags': None,\n",
+       " 'args': None}"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# creating a new empty metadata\n",
+    "metadata = DatasetMetadata.empty()\n",
+    "metadata.dict()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>values</th>\n",
+       "      <th>sparse_values</th>\n",
+       "      <th>metadata</th>\n",
+       "      <th>blob</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>[0.1, 0.2, 0.3]</td>\n",
+       "      <td>{'indices': [1, 2, 3], 'values': [0.1, 0.2, 0.3]}</td>\n",
+       "      <td>{'title': 'title1', 'url': 'url1'}</td>\n",
+       "      <td>{'extra_field': 'extra_value'}</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>[0.4, 0.5, 0.6]</td>\n",
+       "      <td>{'indices': [4, 5, 6], 'values': [0.4, 0.5, 0.6]}</td>\n",
+       "      <td>{'title': 'title2', 'url': 'url2'}</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>[0.7, 0.8, 0.9]</td>\n",
+       "      <td>{'indices': [7, 8, 9], 'values': [0.7, 0.8, 0.9]}</td>\n",
+       "      <td>{'title': 'title3', 'url': 'url3'}</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>[1.0, 1.1, 1.2]</td>\n",
+       "      <td>{'indices': [10, 11, 12], 'values': [1.0, 1.1,...</td>\n",
+       "      <td>{'title': 'title4', 'url': 'url4'}</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>[1.3, 1.4, 1.5]</td>\n",
+       "      <td>{'indices': [13, 14, 15], 'values': [1.3, 1.4,...</td>\n",
+       "      <td>{'title': 'title5', 'url': 'url5'}</td>\n",
+       "      <td>{'another_field': 'another_value'}</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  id           values                                      sparse_values  \\\n",
+       "0  1  [0.1, 0.2, 0.3]  {'indices': [1, 2, 3], 'values': [0.1, 0.2, 0.3]}   \n",
+       "1  2  [0.4, 0.5, 0.6]  {'indices': [4, 5, 6], 'values': [0.4, 0.5, 0.6]}   \n",
+       "2  3  [0.7, 0.8, 0.9]  {'indices': [7, 8, 9], 'values': [0.7, 0.8, 0.9]}   \n",
+       "3  4  [1.0, 1.1, 1.2]  {'indices': [10, 11, 12], 'values': [1.0, 1.1,...   \n",
+       "4  5  [1.3, 1.4, 1.5]  {'indices': [13, 14, 15], 'values': [1.3, 1.4,...   \n",
+       "\n",
+       "                             metadata                                blob  \n",
+       "0  {'title': 'title1', 'url': 'url1'}      {'extra_field': 'extra_value'}  \n",
+       "1  {'title': 'title2', 'url': 'url2'}                                None  \n",
+       "2  {'title': 'title3', 'url': 'url3'}                                None  \n",
+       "3  {'title': 'title4', 'url': 'url4'}                                None  \n",
+       "4  {'title': 'title5', 'url': 'url5'}  {'another_field': 'another_value'}  "
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ds = Dataset.from_pandas(documents=df, q=None, metadata=metadata)\n",
+    "ds.documents"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Save dataset to local path\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/roymiara/Library/Python/3.9/lib/python/site-packages/pinecone_datasets/dataset.py:433: UserWarning: Queries are empty, not saving queries\n",
+      "  warnings.warn(\"Queries are empty, not saving queries\")\n"
+     ]
+    }
+   ],
+   "source": [
+    "ds.to_path('/tmp/ds')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Re-load dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "new_ds = Dataset.from_path('/tmp/ds')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>values</th>\n",
+       "      <th>sparse_values</th>\n",
+       "      <th>metadata</th>\n",
+       "      <th>blob</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>[0.1, 0.2, 0.3]</td>\n",
+       "      <td>{'indices': [1, 2, 3], 'values': [0.1, 0.2, 0.3]}</td>\n",
+       "      <td>{'title': 'title1', 'url': 'url1'}</td>\n",
+       "      <td>{'another_field': None, 'extra_field': 'extra_...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>[0.4, 0.5, 0.6]</td>\n",
+       "      <td>{'indices': [4, 5, 6], 'values': [0.4, 0.5, 0.6]}</td>\n",
+       "      <td>{'title': 'title2', 'url': 'url2'}</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>[0.7, 0.8, 0.9]</td>\n",
+       "      <td>{'indices': [7, 8, 9], 'values': [0.7, 0.8, 0.9]}</td>\n",
+       "      <td>{'title': 'title3', 'url': 'url3'}</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>[1.0, 1.1, 1.2]</td>\n",
+       "      <td>{'indices': [10, 11, 12], 'values': [1.0, 1.1,...</td>\n",
+       "      <td>{'title': 'title4', 'url': 'url4'}</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>[1.3, 1.4, 1.5]</td>\n",
+       "      <td>{'indices': [13, 14, 15], 'values': [1.3, 1.4,...</td>\n",
+       "      <td>{'title': 'title5', 'url': 'url5'}</td>\n",
+       "      <td>{'another_field': 'another_value', 'extra_fiel...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  id           values                                      sparse_values  \\\n",
+       "0  1  [0.1, 0.2, 0.3]  {'indices': [1, 2, 3], 'values': [0.1, 0.2, 0.3]}   \n",
+       "1  2  [0.4, 0.5, 0.6]  {'indices': [4, 5, 6], 'values': [0.4, 0.5, 0.6]}   \n",
+       "2  3  [0.7, 0.8, 0.9]  {'indices': [7, 8, 9], 'values': [0.7, 0.8, 0.9]}   \n",
+       "3  4  [1.0, 1.1, 1.2]  {'indices': [10, 11, 12], 'values': [1.0, 1.1,...   \n",
+       "4  5  [1.3, 1.4, 1.5]  {'indices': [13, 14, 15], 'values': [1.3, 1.4,...   \n",
+       "\n",
+       "                             metadata  \\\n",
+       "0  {'title': 'title1', 'url': 'url1'}   \n",
+       "1  {'title': 'title2', 'url': 'url2'}   \n",
+       "2  {'title': 'title3', 'url': 'url3'}   \n",
+       "3  {'title': 'title4', 'url': 'url4'}   \n",
+       "4  {'title': 'title5', 'url': 'url5'}   \n",
+       "\n",
+       "                                                blob  \n",
+       "0  {'another_field': None, 'extra_field': 'extra_...  \n",
+       "1                                               None  \n",
+       "2                                               None  \n",
+       "3                                               None  \n",
+       "4  {'another_field': 'another_value', 'extra_fiel...  "
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "new_ds.documents"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.6"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From 1a1334e7908de0220bcc6b77ec5c1b24b518f9c2 Mon Sep 17 00:00:00 2001
From: Roy Miara <miararoy@gmail.com>
Date: Mon, 7 Aug 2023 17:53:44 +0300
Subject: [PATCH 2/3] some edis

---
 .../how_to_create_pinecone_datasets.ipynb     | 37 ++++++++-----------
 1 file changed, 16 insertions(+), 21 deletions(-)

diff --git a/docs/assets/how_to_create_pinecone_datasets.ipynb b/docs/assets/how_to_create_pinecone_datasets.ipynb
index e637c83..75e7210 100644
--- a/docs/assets/how_to_create_pinecone_datasets.ipynb
+++ b/docs/assets/how_to_create_pinecone_datasets.ipynb
@@ -26,7 +26,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [
     {
@@ -52,7 +52,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -61,7 +61,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [
     {
@@ -153,7 +153,7 @@
        "4  {'title': 'title5', 'url': 'url5'}  {'another_field': 'another_value'}  "
       ]
      },
-     "execution_count": 9,
+     "execution_count": 27,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -206,8 +206,8 @@
    "metadata": {},
    "source": [
     "Some notes:\n",
-    "1. Note that we have both metadata field and 'blob' field, the metadata field is the acutal pinecone metadata we will use in our index, blob, is an additional field that we can use to store any additional information we want to store along with the Dataset.\n",
-    "2. here we used both 'values' and 'sparse_values', however, sparse_values is not a mandatory field, if you don't have sparse values keep it empty."
+    "* Note that we have both metadata field and 'blob' field, the metadata field is the acutal pinecone metadata we will use in our index, blob, is an additional field that we can use to store any additional information we want to store along with the Dataset.\n",
+    "* here we used both 'values' and 'sparse_values', however, sparse_values is not a mandatory field, if you don't have sparse values keep it empty."
    ]
   },
   {
@@ -233,7 +233,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -242,14 +242,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 30,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
        "{'name': '',\n",
-       " 'created_at': '2023-08-07 17:13:11.949042',\n",
+       " 'created_at': '2023-08-07 17:52:49.166878',\n",
        " 'documents': 0,\n",
        " 'queries': 0,\n",
        " 'source': None,\n",
@@ -263,7 +263,7 @@
        " 'args': None}"
       ]
      },
-     "execution_count": 16,
+     "execution_count": 30,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -276,7 +276,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [
     {
@@ -368,7 +368,7 @@
        "4  {'title': 'title5', 'url': 'url5'}  {'another_field': 'another_value'}  "
       ]
      },
-     "execution_count": 20,
+     "execution_count": 31,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -387,7 +387,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 32,
    "metadata": {},
    "outputs": [
     {
@@ -412,7 +412,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 33,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -421,7 +421,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 34,
    "metadata": {},
    "outputs": [
     {
@@ -520,7 +520,7 @@
        "4  {'another_field': 'another_value', 'extra_fiel...  "
       ]
      },
-     "execution_count": 24,
+     "execution_count": 34,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -528,11 +528,6 @@
    "source": [
     "new_ds.documents"
    ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": []
   }
  ],
  "metadata": {

From c2b9f3dbcb4a71f6ccc95b26edac22539a16e609 Mon Sep 17 00:00:00 2001
From: James Briggs <35938317+jamescalam@users.noreply.github.com>
Date: Mon, 14 Aug 2023 17:21:24 +0800
Subject: [PATCH 3/3] minor tweaks

---
 .../how-to-create-pinecone-datasets.ipynb     | 1042 +++++++++++++++++
 .../how_to_create_pinecone_datasets.ipynb     |  555 ---------
 2 files changed, 1042 insertions(+), 555 deletions(-)
 create mode 100644 docs/assets/how-to-create-pinecone-datasets.ipynb
 delete mode 100644 docs/assets/how_to_create_pinecone_datasets.ipynb

diff --git a/docs/assets/how-to-create-pinecone-datasets.ipynb b/docs/assets/how-to-create-pinecone-datasets.ipynb
new file mode 100644
index 0000000..8f53364
--- /dev/null
+++ b/docs/assets/how-to-create-pinecone-datasets.ipynb
@@ -0,0 +1,1042 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "cdN6QOXIUaUq"
+      },
+      "source": [
+        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pinecone-io/examples/blob/master/docs/assets/how-to-create-pinecone-datasets.ipynb)\n",
+        "[![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/pinecone-io/examples/blob/master/docs/assets/how-to-create-pinecone-datasets.ipynb)\n",
+        "\n",
+        "# Creating Pinecone Datasets"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8Fiobs_oUaUr"
+      },
+      "source": [
+        "This notebook will walk you through the process of creating a Pinecone dataset from a pandas Dataframe."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DLuQirtzUaUs"
+      },
+      "source": [
+        "## Step 1: create a simple sample dataset"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 1,
+      "metadata": {
+        "id": "bVW2DlVQUaUs",
+        "outputId": "bd3c9438-7c67-4097-b580-4bfdd695ab92",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        }
+      },
+      "outputs": [],
+      "source": [
+        "!pip install -qU pandas==2.0.2"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 2,
+      "metadata": {
+        "id": "fPebr9XNUaUs"
+      },
+      "outputs": [],
+      "source": [
+        "import pandas as pd"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 3,
+      "metadata": {
+        "id": "I_WRSqY8UaUs",
+        "outputId": "36348ad8-38ef-40b2-8b0c-fc7e34e12575",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 206
+        }
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "  id           values                                      sparse_values  \\\n",
+              "0  1  [0.1, 0.2, 0.3]  {'indices': [1, 2, 3], 'values': [0.1, 0.2, 0.3]}   \n",
+              "1  2  [0.4, 0.5, 0.6]  {'indices': [4, 5, 6], 'values': [0.4, 0.5, 0.6]}   \n",
+              "2  3  [0.7, 0.8, 0.9]  {'indices': [7, 8, 9], 'values': [0.7, 0.8, 0.9]}   \n",
+              "3  4  [1.0, 1.1, 1.2]  {'indices': [10, 11, 12], 'values': [1.0, 1.1,...   \n",
+              "4  5  [1.3, 1.4, 1.5]  {'indices': [13, 14, 15], 'values': [1.3, 1.4,...   \n",
+              "\n",
+              "                             metadata                                blob  \n",
+              "0  {'title': 'title1', 'url': 'url1'}      {'extra_field': 'extra_value'}  \n",
+              "1  {'title': 'title2', 'url': 'url2'}                                None  \n",
+              "2  {'title': 'title3', 'url': 'url3'}                                None  \n",
+              "3  {'title': 'title4', 'url': 'url4'}                                None  \n",
+              "4  {'title': 'title5', 'url': 'url5'}  {'another_field': 'another_value'}  "
+            ],
+            "text/html": [
+              "\n",
+              "\n",
+              "  <div id=\"df-ee9831ef-5516-44bf-8080-3a2a74e6f00c\">\n",
+              "    <div class=\"colab-df-container\">\n",
+              "      <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>id</th>\n",
+              "      <th>values</th>\n",
+              "      <th>sparse_values</th>\n",
+              "      <th>metadata</th>\n",
+              "      <th>blob</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>1</td>\n",
+              "      <td>[0.1, 0.2, 0.3]</td>\n",
+              "      <td>{'indices': [1, 2, 3], 'values': [0.1, 0.2, 0.3]}</td>\n",
+              "      <td>{'title': 'title1', 'url': 'url1'}</td>\n",
+              "      <td>{'extra_field': 'extra_value'}</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>2</td>\n",
+              "      <td>[0.4, 0.5, 0.6]</td>\n",
+              "      <td>{'indices': [4, 5, 6], 'values': [0.4, 0.5, 0.6]}</td>\n",
+              "      <td>{'title': 'title2', 'url': 'url2'}</td>\n",
+              "      <td>None</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>3</td>\n",
+              "      <td>[0.7, 0.8, 0.9]</td>\n",
+              "      <td>{'indices': [7, 8, 9], 'values': [0.7, 0.8, 0.9]}</td>\n",
+              "      <td>{'title': 'title3', 'url': 'url3'}</td>\n",
+              "      <td>None</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>4</td>\n",
+              "      <td>[1.0, 1.1, 1.2]</td>\n",
+              "      <td>{'indices': [10, 11, 12], 'values': [1.0, 1.1,...</td>\n",
+              "      <td>{'title': 'title4', 'url': 'url4'}</td>\n",
+              "      <td>None</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>5</td>\n",
+              "      <td>[1.3, 1.4, 1.5]</td>\n",
+              "      <td>{'indices': [13, 14, 15], 'values': [1.3, 1.4,...</td>\n",
+              "      <td>{'title': 'title5', 'url': 'url5'}</td>\n",
+              "      <td>{'another_field': 'another_value'}</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>\n",
+              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-ee9831ef-5516-44bf-8080-3a2a74e6f00c')\"\n",
+              "              title=\"Convert this dataframe to an interactive table.\"\n",
+              "              style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "       width=\"24px\">\n",
+              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
+              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
+              "  </svg>\n",
+              "      </button>\n",
+              "\n",
+              "\n",
+              "\n",
+              "    <div id=\"df-2dbf45c1-fcbc-44df-ae34-b177e0482493\">\n",
+              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-2dbf45c1-fcbc-44df-ae34-b177e0482493')\"\n",
+              "              title=\"Suggest charts.\"\n",
+              "              style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "      </button>\n",
+              "    </div>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: #E8F0FE;\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: #1967D2;\n",
+              "    height: 32px;\n",
+              "    padding: 0 0 0 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: #E2EBFA;\n",
+              "    box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: #174EA6;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "    background-color: #3B4455;\n",
+              "    fill: #D2E3FC;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart:hover {\n",
+              "    background-color: #434B5C;\n",
+              "    box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "    filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "    fill: #FFFFFF;\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "    <script>\n",
+              "      async function quickchart(key) {\n",
+              "        const containerElement = document.querySelector('#' + key);\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      }\n",
+              "    </script>\n",
+              "\n",
+              "      <script>\n",
+              "\n",
+              "function displayQuickchartButton(domScope) {\n",
+              "  let quickchartButtonEl =\n",
+              "    domScope.querySelector('#df-2dbf45c1-fcbc-44df-ae34-b177e0482493 button.colab-df-quickchart');\n",
+              "  quickchartButtonEl.style.display =\n",
+              "    google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "}\n",
+              "\n",
+              "        displayQuickchartButton(document);\n",
+              "      </script>\n",
+              "      <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      flex-wrap:wrap;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "      <script>\n",
+              "        const buttonEl =\n",
+              "          document.querySelector('#df-ee9831ef-5516-44bf-8080-3a2a74e6f00c button.colab-df-convert');\n",
+              "        buttonEl.style.display =\n",
+              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "        async function convertToInteractive(key) {\n",
+              "          const element = document.querySelector('#df-ee9831ef-5516-44bf-8080-3a2a74e6f00c');\n",
+              "          const dataTable =\n",
+              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                     [key], {});\n",
+              "          if (!dataTable) return;\n",
+              "\n",
+              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "            + ' to learn more about interactive tables.';\n",
+              "          element.innerHTML = '';\n",
+              "          dataTable['output_type'] = 'display_data';\n",
+              "          await google.colab.output.renderOutput(dataTable, element);\n",
+              "          const docLink = document.createElement('div');\n",
+              "          docLink.innerHTML = docLinkHtml;\n",
+              "          element.appendChild(docLink);\n",
+              "        }\n",
+              "      </script>\n",
+              "    </div>\n",
+              "  </div>\n"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 3
+        }
+      ],
+      "source": [
+        "documents = [\n",
+        "    {\n",
+        "        \"id\": \"1\",\n",
+        "        \"values\": [0.1, 0.2, 0.3],\n",
+        "        \"sparse_values\": {\"indices\": [1, 2, 3], \"values\": [0.1, 0.2, 0.3]},\n",
+        "        \"metadata\": {\"title\": \"title1\", \"url\": \"url1\"},\n",
+        "        \"blob\": {\"extra_field\": \"extra_value\"},\n",
+        "    },\n",
+        "    {\n",
+        "        \"id\": \"2\",\n",
+        "        \"values\": [0.4, 0.5, 0.6],\n",
+        "        \"sparse_values\": {\"indices\": [4, 5, 6], \"values\": [0.4, 0.5, 0.6]},\n",
+        "        \"metadata\": {\"title\": \"title2\", \"url\": \"url2\"},\n",
+        "        \"blob\": None,\n",
+        "    },\n",
+        "    {\n",
+        "        \"id\": \"3\",\n",
+        "        \"values\": [0.7, 0.8, 0.9],\n",
+        "        \"sparse_values\": {\"indices\": [7, 8, 9], \"values\": [0.7, 0.8, 0.9]},\n",
+        "        \"metadata\": {\"title\": \"title3\", \"url\": \"url3\"},\n",
+        "        \"blob\": None,\n",
+        "    },\n",
+        "    {\n",
+        "        \"id\": \"4\",\n",
+        "        \"values\": [1.0, 1.1, 1.2],\n",
+        "        \"sparse_values\": {\"indices\": [10, 11, 12], \"values\": [1.0, 1.1, 1.2]},\n",
+        "        \"metadata\": {\"title\": \"title4\", \"url\": \"url4\"},\n",
+        "        \"blob\": None,\n",
+        "    },\n",
+        "    {\n",
+        "        \"id\": \"5\",\n",
+        "        \"values\": [1.3, 1.4, 1.5],\n",
+        "        \"sparse_values\": {\"indices\": [13, 14, 15], \"values\": [1.3, 1.4, 1.5]},\n",
+        "        \"metadata\": {\"title\": \"title5\", \"url\": \"url5\"},\n",
+        "        \"blob\": {\"another_field\": \"another_value\"},\n",
+        "    }\n",
+        "]\n",
+        "\n",
+        "df = pd.DataFrame(documents)\n",
+        "df"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "c_zwxJ_OUaUt"
+      },
+      "source": [
+        "Some notes:\n",
+        "* Note that we have both metadata field and 'blob' field, the metadata field is the acutal pinecone metadata we will use in our index, blob, is an additional field that we can use to store any additional information we want to store along with the Dataset.\n",
+        "* here we used both 'values' and 'sparse_values', however, sparse_values is not a mandatory field, if you don't have sparse values keep it empty."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "BcFx1wFqUaUt"
+      },
+      "source": [
+        "## Pinecone Dataset\n",
+        "\n",
+        "Now that we have our data Ready, we can create a Pinecone Dataset. A Pinecone Dataset is a collection of documtents, queries and Metadata. We can create a Pinecone\n",
+        "* Documents: a collection of records with Id, Vectors (dense, sparse) and metadata\n",
+        "* Queries: a collection of queries with Vectors (dense, sparse), metadata filter and top_k\n",
+        "* Metadata: a defintion of the dataset: Name, dimension, metric, embedding models, etc."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 4,
+      "metadata": {
+        "id": "DCGFhTtyUaUt"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install -qU \\\n",
+        "  pinecone-client==2.2.2 \\\n",
+        "  pinecone-datasets==0.6.0"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 5,
+      "metadata": {
+        "id": "S9NCQyTqUaUt"
+      },
+      "outputs": [],
+      "source": [
+        "from pinecone_datasets import Dataset, DatasetMetadata"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 6,
+      "metadata": {
+        "id": "Eaiy3IjIUaUt",
+        "outputId": "4ff727bd-1a56-42bb-8cd2-e645b5ab390c",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        }
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "{'name': '',\n",
+              " 'created_at': '2023-08-14 09:18:50.196514',\n",
+              " 'documents': 0,\n",
+              " 'queries': 0,\n",
+              " 'source': None,\n",
+              " 'license': None,\n",
+              " 'bucket': None,\n",
+              " 'task': None,\n",
+              " 'dense_model': {'name': '', 'tokenizer': None, 'dimension': 0},\n",
+              " 'sparse_model': None,\n",
+              " 'description': None,\n",
+              " 'tags': None,\n",
+              " 'args': None}"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 6
+        }
+      ],
+      "source": [
+        "# creating a new empty metadata\n",
+        "metadata = DatasetMetadata.empty()\n",
+        "metadata.dict()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 7,
+      "metadata": {
+        "id": "g_ACjKDOUaUt",
+        "outputId": "bc47c7d1-a3ef-4cf1-9e4b-7da6f82e111c",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 206
+        }
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "  id           values                                      sparse_values  \\\n",
+              "0  1  [0.1, 0.2, 0.3]  {'indices': [1, 2, 3], 'values': [0.1, 0.2, 0.3]}   \n",
+              "1  2  [0.4, 0.5, 0.6]  {'indices': [4, 5, 6], 'values': [0.4, 0.5, 0.6]}   \n",
+              "2  3  [0.7, 0.8, 0.9]  {'indices': [7, 8, 9], 'values': [0.7, 0.8, 0.9]}   \n",
+              "3  4  [1.0, 1.1, 1.2]  {'indices': [10, 11, 12], 'values': [1.0, 1.1,...   \n",
+              "4  5  [1.3, 1.4, 1.5]  {'indices': [13, 14, 15], 'values': [1.3, 1.4,...   \n",
+              "\n",
+              "                             metadata                                blob  \n",
+              "0  {'title': 'title1', 'url': 'url1'}      {'extra_field': 'extra_value'}  \n",
+              "1  {'title': 'title2', 'url': 'url2'}                                None  \n",
+              "2  {'title': 'title3', 'url': 'url3'}                                None  \n",
+              "3  {'title': 'title4', 'url': 'url4'}                                None  \n",
+              "4  {'title': 'title5', 'url': 'url5'}  {'another_field': 'another_value'}  "
+            ],
+            "text/html": [
+              "\n",
+              "\n",
+              "  <div id=\"df-7047aefe-be6d-423c-b810-75e31017f008\">\n",
+              "    <div class=\"colab-df-container\">\n",
+              "      <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>id</th>\n",
+              "      <th>values</th>\n",
+              "      <th>sparse_values</th>\n",
+              "      <th>metadata</th>\n",
+              "      <th>blob</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>1</td>\n",
+              "      <td>[0.1, 0.2, 0.3]</td>\n",
+              "      <td>{'indices': [1, 2, 3], 'values': [0.1, 0.2, 0.3]}</td>\n",
+              "      <td>{'title': 'title1', 'url': 'url1'}</td>\n",
+              "      <td>{'extra_field': 'extra_value'}</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>2</td>\n",
+              "      <td>[0.4, 0.5, 0.6]</td>\n",
+              "      <td>{'indices': [4, 5, 6], 'values': [0.4, 0.5, 0.6]}</td>\n",
+              "      <td>{'title': 'title2', 'url': 'url2'}</td>\n",
+              "      <td>None</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>3</td>\n",
+              "      <td>[0.7, 0.8, 0.9]</td>\n",
+              "      <td>{'indices': [7, 8, 9], 'values': [0.7, 0.8, 0.9]}</td>\n",
+              "      <td>{'title': 'title3', 'url': 'url3'}</td>\n",
+              "      <td>None</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>4</td>\n",
+              "      <td>[1.0, 1.1, 1.2]</td>\n",
+              "      <td>{'indices': [10, 11, 12], 'values': [1.0, 1.1,...</td>\n",
+              "      <td>{'title': 'title4', 'url': 'url4'}</td>\n",
+              "      <td>None</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>5</td>\n",
+              "      <td>[1.3, 1.4, 1.5]</td>\n",
+              "      <td>{'indices': [13, 14, 15], 'values': [1.3, 1.4,...</td>\n",
+              "      <td>{'title': 'title5', 'url': 'url5'}</td>\n",
+              "      <td>{'another_field': 'another_value'}</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>\n",
+              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-7047aefe-be6d-423c-b810-75e31017f008')\"\n",
+              "              title=\"Convert this dataframe to an interactive table.\"\n",
+              "              style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "       width=\"24px\">\n",
+              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
+              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
+              "  </svg>\n",
+              "      </button>\n",
+              "\n",
+              "\n",
+              "\n",
+              "    <div id=\"df-ba273d30-2d9c-43f2-b7d1-7b27125abb98\">\n",
+              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-ba273d30-2d9c-43f2-b7d1-7b27125abb98')\"\n",
+              "              title=\"Suggest charts.\"\n",
+              "              style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "      </button>\n",
+              "    </div>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: #E8F0FE;\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: #1967D2;\n",
+              "    height: 32px;\n",
+              "    padding: 0 0 0 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: #E2EBFA;\n",
+              "    box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: #174EA6;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "    background-color: #3B4455;\n",
+              "    fill: #D2E3FC;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart:hover {\n",
+              "    background-color: #434B5C;\n",
+              "    box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "    filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "    fill: #FFFFFF;\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "    <script>\n",
+              "      async function quickchart(key) {\n",
+              "        const containerElement = document.querySelector('#' + key);\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      }\n",
+              "    </script>\n",
+              "\n",
+              "      <script>\n",
+              "\n",
+              "function displayQuickchartButton(domScope) {\n",
+              "  let quickchartButtonEl =\n",
+              "    domScope.querySelector('#df-ba273d30-2d9c-43f2-b7d1-7b27125abb98 button.colab-df-quickchart');\n",
+              "  quickchartButtonEl.style.display =\n",
+              "    google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "}\n",
+              "\n",
+              "        displayQuickchartButton(document);\n",
+              "      </script>\n",
+              "      <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      flex-wrap:wrap;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "      <script>\n",
+              "        const buttonEl =\n",
+              "          document.querySelector('#df-7047aefe-be6d-423c-b810-75e31017f008 button.colab-df-convert');\n",
+              "        buttonEl.style.display =\n",
+              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "        async function convertToInteractive(key) {\n",
+              "          const element = document.querySelector('#df-7047aefe-be6d-423c-b810-75e31017f008');\n",
+              "          const dataTable =\n",
+              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                     [key], {});\n",
+              "          if (!dataTable) return;\n",
+              "\n",
+              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "            + ' to learn more about interactive tables.';\n",
+              "          element.innerHTML = '';\n",
+              "          dataTable['output_type'] = 'display_data';\n",
+              "          await google.colab.output.renderOutput(dataTable, element);\n",
+              "          const docLink = document.createElement('div');\n",
+              "          docLink.innerHTML = docLinkHtml;\n",
+              "          element.appendChild(docLink);\n",
+              "        }\n",
+              "      </script>\n",
+              "    </div>\n",
+              "  </div>\n"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 7
+        }
+      ],
+      "source": [
+        "ds = Dataset.from_pandas(documents=df, q=None, metadata=metadata)\n",
+        "ds.documents"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "CGzdg2sZUaUt"
+      },
+      "source": [
+        "## Save dataset to local path\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 8,
+      "metadata": {
+        "id": "IVkK6fJUUaUt",
+        "outputId": "943ff58d-91d6-4a75-e218-d833214fee1b",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        }
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.10/dist-packages/pinecone_datasets/dataset.py:433: UserWarning: Queries are empty, not saving queries\n",
+            "  warnings.warn(\"Queries are empty, not saving queries\")\n"
+          ]
+        }
+      ],
+      "source": [
+        "ds.to_path('/tmp/ds')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "B5tvJlnSUaUu"
+      },
+      "source": [
+        "### Reload dataset"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 9,
+      "metadata": {
+        "id": "pLEhwSaRUaUu"
+      },
+      "outputs": [],
+      "source": [
+        "new_ds = Dataset.from_path('/tmp/ds')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 10,
+      "metadata": {
+        "id": "J5LJGYqxUaUu",
+        "outputId": "120f1ebf-e30a-4913-a84f-727e52e2add8",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 206
+        }
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "  id           values                                      sparse_values  \\\n",
+              "0  1  [0.1, 0.2, 0.3]  {'indices': [1, 2, 3], 'values': [0.1, 0.2, 0.3]}   \n",
+              "1  2  [0.4, 0.5, 0.6]  {'indices': [4, 5, 6], 'values': [0.4, 0.5, 0.6]}   \n",
+              "2  3  [0.7, 0.8, 0.9]  {'indices': [7, 8, 9], 'values': [0.7, 0.8, 0.9]}   \n",
+              "3  4  [1.0, 1.1, 1.2]  {'indices': [10, 11, 12], 'values': [1.0, 1.1,...   \n",
+              "4  5  [1.3, 1.4, 1.5]  {'indices': [13, 14, 15], 'values': [1.3, 1.4,...   \n",
+              "\n",
+              "                             metadata  \\\n",
+              "0  {'title': 'title1', 'url': 'url1'}   \n",
+              "1  {'title': 'title2', 'url': 'url2'}   \n",
+              "2  {'title': 'title3', 'url': 'url3'}   \n",
+              "3  {'title': 'title4', 'url': 'url4'}   \n",
+              "4  {'title': 'title5', 'url': 'url5'}   \n",
+              "\n",
+              "                                                blob  \n",
+              "0  {'another_field': None, 'extra_field': 'extra_...  \n",
+              "1                                               None  \n",
+              "2                                               None  \n",
+              "3                                               None  \n",
+              "4  {'another_field': 'another_value', 'extra_fiel...  "
+            ],
+            "text/html": [
+              "\n",
+              "\n",
+              "  <div id=\"df-924edd68-488a-465d-825a-1743d1db0e66\">\n",
+              "    <div class=\"colab-df-container\">\n",
+              "      <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>id</th>\n",
+              "      <th>values</th>\n",
+              "      <th>sparse_values</th>\n",
+              "      <th>metadata</th>\n",
+              "      <th>blob</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>1</td>\n",
+              "      <td>[0.1, 0.2, 0.3]</td>\n",
+              "      <td>{'indices': [1, 2, 3], 'values': [0.1, 0.2, 0.3]}</td>\n",
+              "      <td>{'title': 'title1', 'url': 'url1'}</td>\n",
+              "      <td>{'another_field': None, 'extra_field': 'extra_...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>2</td>\n",
+              "      <td>[0.4, 0.5, 0.6]</td>\n",
+              "      <td>{'indices': [4, 5, 6], 'values': [0.4, 0.5, 0.6]}</td>\n",
+              "      <td>{'title': 'title2', 'url': 'url2'}</td>\n",
+              "      <td>None</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>3</td>\n",
+              "      <td>[0.7, 0.8, 0.9]</td>\n",
+              "      <td>{'indices': [7, 8, 9], 'values': [0.7, 0.8, 0.9]}</td>\n",
+              "      <td>{'title': 'title3', 'url': 'url3'}</td>\n",
+              "      <td>None</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>4</td>\n",
+              "      <td>[1.0, 1.1, 1.2]</td>\n",
+              "      <td>{'indices': [10, 11, 12], 'values': [1.0, 1.1,...</td>\n",
+              "      <td>{'title': 'title4', 'url': 'url4'}</td>\n",
+              "      <td>None</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>5</td>\n",
+              "      <td>[1.3, 1.4, 1.5]</td>\n",
+              "      <td>{'indices': [13, 14, 15], 'values': [1.3, 1.4,...</td>\n",
+              "      <td>{'title': 'title5', 'url': 'url5'}</td>\n",
+              "      <td>{'another_field': 'another_value', 'extra_fiel...</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>\n",
+              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-924edd68-488a-465d-825a-1743d1db0e66')\"\n",
+              "              title=\"Convert this dataframe to an interactive table.\"\n",
+              "              style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "       width=\"24px\">\n",
+              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
+              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
+              "  </svg>\n",
+              "      </button>\n",
+              "\n",
+              "\n",
+              "\n",
+              "    <div id=\"df-42148e02-48b4-4818-94f5-46b211ba40a7\">\n",
+              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-42148e02-48b4-4818-94f5-46b211ba40a7')\"\n",
+              "              title=\"Suggest charts.\"\n",
+              "              style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "      </button>\n",
+              "    </div>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: #E8F0FE;\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: #1967D2;\n",
+              "    height: 32px;\n",
+              "    padding: 0 0 0 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: #E2EBFA;\n",
+              "    box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: #174EA6;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "    background-color: #3B4455;\n",
+              "    fill: #D2E3FC;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart:hover {\n",
+              "    background-color: #434B5C;\n",
+              "    box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "    filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "    fill: #FFFFFF;\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "    <script>\n",
+              "      async function quickchart(key) {\n",
+              "        const containerElement = document.querySelector('#' + key);\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      }\n",
+              "    </script>\n",
+              "\n",
+              "      <script>\n",
+              "\n",
+              "function displayQuickchartButton(domScope) {\n",
+              "  let quickchartButtonEl =\n",
+              "    domScope.querySelector('#df-42148e02-48b4-4818-94f5-46b211ba40a7 button.colab-df-quickchart');\n",
+              "  quickchartButtonEl.style.display =\n",
+              "    google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "}\n",
+              "\n",
+              "        displayQuickchartButton(document);\n",
+              "      </script>\n",
+              "      <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      flex-wrap:wrap;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "      <script>\n",
+              "        const buttonEl =\n",
+              "          document.querySelector('#df-924edd68-488a-465d-825a-1743d1db0e66 button.colab-df-convert');\n",
+              "        buttonEl.style.display =\n",
+              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "        async function convertToInteractive(key) {\n",
+              "          const element = document.querySelector('#df-924edd68-488a-465d-825a-1743d1db0e66');\n",
+              "          const dataTable =\n",
+              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                     [key], {});\n",
+              "          if (!dataTable) return;\n",
+              "\n",
+              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "            + ' to learn more about interactive tables.';\n",
+              "          element.innerHTML = '';\n",
+              "          dataTable['output_type'] = 'display_data';\n",
+              "          await google.colab.output.renderOutput(dataTable, element);\n",
+              "          const docLink = document.createElement('div');\n",
+              "          docLink.innerHTML = docLinkHtml;\n",
+              "          element.appendChild(docLink);\n",
+              "        }\n",
+              "      </script>\n",
+              "    </div>\n",
+              "  </div>\n"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 10
+        }
+      ],
+      "source": [
+        "new_ds.documents"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.9.6"
+    },
+    "orig_nbformat": 4,
+    "colab": {
+      "provenance": []
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
\ No newline at end of file
diff --git a/docs/assets/how_to_create_pinecone_datasets.ipynb b/docs/assets/how_to_create_pinecone_datasets.ipynb
deleted file mode 100644
index 75e7210..0000000
--- a/docs/assets/how_to_create_pinecone_datasets.ipynb
+++ /dev/null
@@ -1,555 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pinecone-io/examples/blob/master/docs/assets/how_to_create_pinecone_datasets.ipynb) \n",
-    "[![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/pinecone-io/examples/blob/master/docs/assets/how_to_create_pinecone_datasets.ipynb)\n",
-    "\n",
-    "# Creaeting Pinecone Datasets"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "This notebook will walk you through the process of creating a Pinecone dataset from a pandas Dataframe."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Step 1: create a simple sample dataset"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Defaulting to user installation because normal site-packages is not writeable\n",
-      "Requirement already satisfied: pandas in /Users/roymiara/Library/Python/3.9/lib/python/site-packages (2.0.2)\n",
-      "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/roymiara/Library/Python/3.9/lib/python/site-packages (from pandas) (2.8.2)\n",
-      "Requirement already satisfied: pytz>=2020.1 in /Users/roymiara/Library/Python/3.9/lib/python/site-packages (from pandas) (2022.2.1)\n",
-      "Requirement already satisfied: tzdata>=2022.1 in /Users/roymiara/Library/Python/3.9/lib/python/site-packages (from pandas) (2022.7)\n",
-      "Requirement already satisfied: numpy>=1.20.3 in /Users/roymiara/Library/Python/3.9/lib/python/site-packages (from pandas) (1.24.2)\n",
-      "Requirement already satisfied: six>=1.5 in /Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/site-packages (from python-dateutil>=2.8.2->pandas) (1.15.0)\n",
-      "\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.1.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip\u001b[0m\n"
-     ]
-    }
-   ],
-   "source": [
-    "!pip install pandas"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>id</th>\n",
-       "      <th>values</th>\n",
-       "      <th>sparse_values</th>\n",
-       "      <th>metadata</th>\n",
-       "      <th>blob</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>[0.1, 0.2, 0.3]</td>\n",
-       "      <td>{'indices': [1, 2, 3], 'values': [0.1, 0.2, 0.3]}</td>\n",
-       "      <td>{'title': 'title1', 'url': 'url1'}</td>\n",
-       "      <td>{'extra_field': 'extra_value'}</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>2</td>\n",
-       "      <td>[0.4, 0.5, 0.6]</td>\n",
-       "      <td>{'indices': [4, 5, 6], 'values': [0.4, 0.5, 0.6]}</td>\n",
-       "      <td>{'title': 'title2', 'url': 'url2'}</td>\n",
-       "      <td>None</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>3</td>\n",
-       "      <td>[0.7, 0.8, 0.9]</td>\n",
-       "      <td>{'indices': [7, 8, 9], 'values': [0.7, 0.8, 0.9]}</td>\n",
-       "      <td>{'title': 'title3', 'url': 'url3'}</td>\n",
-       "      <td>None</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>4</td>\n",
-       "      <td>[1.0, 1.1, 1.2]</td>\n",
-       "      <td>{'indices': [10, 11, 12], 'values': [1.0, 1.1,...</td>\n",
-       "      <td>{'title': 'title4', 'url': 'url4'}</td>\n",
-       "      <td>None</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>5</td>\n",
-       "      <td>[1.3, 1.4, 1.5]</td>\n",
-       "      <td>{'indices': [13, 14, 15], 'values': [1.3, 1.4,...</td>\n",
-       "      <td>{'title': 'title5', 'url': 'url5'}</td>\n",
-       "      <td>{'another_field': 'another_value'}</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "  id           values                                      sparse_values  \\\n",
-       "0  1  [0.1, 0.2, 0.3]  {'indices': [1, 2, 3], 'values': [0.1, 0.2, 0.3]}   \n",
-       "1  2  [0.4, 0.5, 0.6]  {'indices': [4, 5, 6], 'values': [0.4, 0.5, 0.6]}   \n",
-       "2  3  [0.7, 0.8, 0.9]  {'indices': [7, 8, 9], 'values': [0.7, 0.8, 0.9]}   \n",
-       "3  4  [1.0, 1.1, 1.2]  {'indices': [10, 11, 12], 'values': [1.0, 1.1,...   \n",
-       "4  5  [1.3, 1.4, 1.5]  {'indices': [13, 14, 15], 'values': [1.3, 1.4,...   \n",
-       "\n",
-       "                             metadata                                blob  \n",
-       "0  {'title': 'title1', 'url': 'url1'}      {'extra_field': 'extra_value'}  \n",
-       "1  {'title': 'title2', 'url': 'url2'}                                None  \n",
-       "2  {'title': 'title3', 'url': 'url3'}                                None  \n",
-       "3  {'title': 'title4', 'url': 'url4'}                                None  \n",
-       "4  {'title': 'title5', 'url': 'url5'}  {'another_field': 'another_value'}  "
-      ]
-     },
-     "execution_count": 27,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "documents = [\n",
-    "    {\n",
-    "        \"id\": \"1\",\n",
-    "        \"values\": [0.1, 0.2, 0.3],\n",
-    "        \"sparse_values\": {\"indices\": [1, 2, 3], \"values\": [0.1, 0.2, 0.3]},\n",
-    "        \"metadata\": {\"title\": \"title1\", \"url\": \"url1\"},\n",
-    "        \"blob\": {\"extra_field\": \"extra_value\"},\n",
-    "    },\n",
-    "    {\n",
-    "        \"id\": \"2\",\n",
-    "        \"values\": [0.4, 0.5, 0.6],\n",
-    "        \"sparse_values\": {\"indices\": [4, 5, 6], \"values\": [0.4, 0.5, 0.6]},\n",
-    "        \"metadata\": {\"title\": \"title2\", \"url\": \"url2\"},\n",
-    "        \"blob\": None,\n",
-    "    },\n",
-    "    {\n",
-    "        \"id\": \"3\",\n",
-    "        \"values\": [0.7, 0.8, 0.9],\n",
-    "        \"sparse_values\": {\"indices\": [7, 8, 9], \"values\": [0.7, 0.8, 0.9]},\n",
-    "        \"metadata\": {\"title\": \"title3\", \"url\": \"url3\"},\n",
-    "        \"blob\": None,\n",
-    "    },\n",
-    "    {\n",
-    "        \"id\": \"4\",\n",
-    "        \"values\": [1.0, 1.1, 1.2],\n",
-    "        \"sparse_values\": {\"indices\": [10, 11, 12], \"values\": [1.0, 1.1, 1.2]},\n",
-    "        \"metadata\": {\"title\": \"title4\", \"url\": \"url4\"},\n",
-    "        \"blob\": None,\n",
-    "    },\n",
-    "    {\n",
-    "        \"id\": \"5\",\n",
-    "        \"values\": [1.3, 1.4, 1.5],\n",
-    "        \"sparse_values\": {\"indices\": [13, 14, 15], \"values\": [1.3, 1.4, 1.5]},\n",
-    "        \"metadata\": {\"title\": \"title5\", \"url\": \"url5\"},\n",
-    "        \"blob\": {\"another_field\": \"another_value\"},\n",
-    "    }\n",
-    "]\n",
-    "\n",
-    "df = pd.DataFrame(documents)\n",
-    "df"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Some notes:\n",
-    "* Note that we have both metadata field and 'blob' field, the metadata field is the acutal pinecone metadata we will use in our index, blob, is an additional field that we can use to store any additional information we want to store along with the Dataset.\n",
-    "* here we used both 'values' and 'sparse_values', however, sparse_values is not a mandatory field, if you don't have sparse values keep it empty."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Pinecone Dataset\n",
-    "\n",
-    "Now that we have our data Ready, we can create a Pinecone Dataset. A Pinecone Dataset is a collection of documtents, queries and Metadata. We can create a Pinecone \n",
-    "* Documents: a collection of records with Id, Vectors (dense, sparse) and metadata\n",
-    "* Queries: a collection of queries with Vectors (dense, sparse), metadata filter and top_k\n",
-    "* Metadata: a defintion of the dataset: Name, dimension, metric, embedding models, etc."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!pip install pinecone-datasets"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from pinecone_datasets import Dataset, DatasetMetadata"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'name': '',\n",
-       " 'created_at': '2023-08-07 17:52:49.166878',\n",
-       " 'documents': 0,\n",
-       " 'queries': 0,\n",
-       " 'source': None,\n",
-       " 'license': None,\n",
-       " 'bucket': None,\n",
-       " 'task': None,\n",
-       " 'dense_model': {'name': '', 'tokenizer': None, 'dimension': 0},\n",
-       " 'sparse_model': None,\n",
-       " 'description': None,\n",
-       " 'tags': None,\n",
-       " 'args': None}"
-      ]
-     },
-     "execution_count": 30,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# creating a new empty metadata\n",
-    "metadata = DatasetMetadata.empty()\n",
-    "metadata.dict()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>id</th>\n",
-       "      <th>values</th>\n",
-       "      <th>sparse_values</th>\n",
-       "      <th>metadata</th>\n",
-       "      <th>blob</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>[0.1, 0.2, 0.3]</td>\n",
-       "      <td>{'indices': [1, 2, 3], 'values': [0.1, 0.2, 0.3]}</td>\n",
-       "      <td>{'title': 'title1', 'url': 'url1'}</td>\n",
-       "      <td>{'extra_field': 'extra_value'}</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>2</td>\n",
-       "      <td>[0.4, 0.5, 0.6]</td>\n",
-       "      <td>{'indices': [4, 5, 6], 'values': [0.4, 0.5, 0.6]}</td>\n",
-       "      <td>{'title': 'title2', 'url': 'url2'}</td>\n",
-       "      <td>None</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>3</td>\n",
-       "      <td>[0.7, 0.8, 0.9]</td>\n",
-       "      <td>{'indices': [7, 8, 9], 'values': [0.7, 0.8, 0.9]}</td>\n",
-       "      <td>{'title': 'title3', 'url': 'url3'}</td>\n",
-       "      <td>None</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>4</td>\n",
-       "      <td>[1.0, 1.1, 1.2]</td>\n",
-       "      <td>{'indices': [10, 11, 12], 'values': [1.0, 1.1,...</td>\n",
-       "      <td>{'title': 'title4', 'url': 'url4'}</td>\n",
-       "      <td>None</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>5</td>\n",
-       "      <td>[1.3, 1.4, 1.5]</td>\n",
-       "      <td>{'indices': [13, 14, 15], 'values': [1.3, 1.4,...</td>\n",
-       "      <td>{'title': 'title5', 'url': 'url5'}</td>\n",
-       "      <td>{'another_field': 'another_value'}</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "  id           values                                      sparse_values  \\\n",
-       "0  1  [0.1, 0.2, 0.3]  {'indices': [1, 2, 3], 'values': [0.1, 0.2, 0.3]}   \n",
-       "1  2  [0.4, 0.5, 0.6]  {'indices': [4, 5, 6], 'values': [0.4, 0.5, 0.6]}   \n",
-       "2  3  [0.7, 0.8, 0.9]  {'indices': [7, 8, 9], 'values': [0.7, 0.8, 0.9]}   \n",
-       "3  4  [1.0, 1.1, 1.2]  {'indices': [10, 11, 12], 'values': [1.0, 1.1,...   \n",
-       "4  5  [1.3, 1.4, 1.5]  {'indices': [13, 14, 15], 'values': [1.3, 1.4,...   \n",
-       "\n",
-       "                             metadata                                blob  \n",
-       "0  {'title': 'title1', 'url': 'url1'}      {'extra_field': 'extra_value'}  \n",
-       "1  {'title': 'title2', 'url': 'url2'}                                None  \n",
-       "2  {'title': 'title3', 'url': 'url3'}                                None  \n",
-       "3  {'title': 'title4', 'url': 'url4'}                                None  \n",
-       "4  {'title': 'title5', 'url': 'url5'}  {'another_field': 'another_value'}  "
-      ]
-     },
-     "execution_count": 31,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "ds = Dataset.from_pandas(documents=df, q=None, metadata=metadata)\n",
-    "ds.documents"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Save dataset to local path\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/roymiara/Library/Python/3.9/lib/python/site-packages/pinecone_datasets/dataset.py:433: UserWarning: Queries are empty, not saving queries\n",
-      "  warnings.warn(\"Queries are empty, not saving queries\")\n"
-     ]
-    }
-   ],
-   "source": [
-    "ds.to_path('/tmp/ds')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Re-load dataset"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "new_ds = Dataset.from_path('/tmp/ds')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>id</th>\n",
-       "      <th>values</th>\n",
-       "      <th>sparse_values</th>\n",
-       "      <th>metadata</th>\n",
-       "      <th>blob</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>[0.1, 0.2, 0.3]</td>\n",
-       "      <td>{'indices': [1, 2, 3], 'values': [0.1, 0.2, 0.3]}</td>\n",
-       "      <td>{'title': 'title1', 'url': 'url1'}</td>\n",
-       "      <td>{'another_field': None, 'extra_field': 'extra_...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>2</td>\n",
-       "      <td>[0.4, 0.5, 0.6]</td>\n",
-       "      <td>{'indices': [4, 5, 6], 'values': [0.4, 0.5, 0.6]}</td>\n",
-       "      <td>{'title': 'title2', 'url': 'url2'}</td>\n",
-       "      <td>None</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>3</td>\n",
-       "      <td>[0.7, 0.8, 0.9]</td>\n",
-       "      <td>{'indices': [7, 8, 9], 'values': [0.7, 0.8, 0.9]}</td>\n",
-       "      <td>{'title': 'title3', 'url': 'url3'}</td>\n",
-       "      <td>None</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>4</td>\n",
-       "      <td>[1.0, 1.1, 1.2]</td>\n",
-       "      <td>{'indices': [10, 11, 12], 'values': [1.0, 1.1,...</td>\n",
-       "      <td>{'title': 'title4', 'url': 'url4'}</td>\n",
-       "      <td>None</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>5</td>\n",
-       "      <td>[1.3, 1.4, 1.5]</td>\n",
-       "      <td>{'indices': [13, 14, 15], 'values': [1.3, 1.4,...</td>\n",
-       "      <td>{'title': 'title5', 'url': 'url5'}</td>\n",
-       "      <td>{'another_field': 'another_value', 'extra_fiel...</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "  id           values                                      sparse_values  \\\n",
-       "0  1  [0.1, 0.2, 0.3]  {'indices': [1, 2, 3], 'values': [0.1, 0.2, 0.3]}   \n",
-       "1  2  [0.4, 0.5, 0.6]  {'indices': [4, 5, 6], 'values': [0.4, 0.5, 0.6]}   \n",
-       "2  3  [0.7, 0.8, 0.9]  {'indices': [7, 8, 9], 'values': [0.7, 0.8, 0.9]}   \n",
-       "3  4  [1.0, 1.1, 1.2]  {'indices': [10, 11, 12], 'values': [1.0, 1.1,...   \n",
-       "4  5  [1.3, 1.4, 1.5]  {'indices': [13, 14, 15], 'values': [1.3, 1.4,...   \n",
-       "\n",
-       "                             metadata  \\\n",
-       "0  {'title': 'title1', 'url': 'url1'}   \n",
-       "1  {'title': 'title2', 'url': 'url2'}   \n",
-       "2  {'title': 'title3', 'url': 'url3'}   \n",
-       "3  {'title': 'title4', 'url': 'url4'}   \n",
-       "4  {'title': 'title5', 'url': 'url5'}   \n",
-       "\n",
-       "                                                blob  \n",
-       "0  {'another_field': None, 'extra_field': 'extra_...  \n",
-       "1                                               None  \n",
-       "2                                               None  \n",
-       "3                                               None  \n",
-       "4  {'another_field': 'another_value', 'extra_fiel...  "
-      ]
-     },
-     "execution_count": 34,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "new_ds.documents"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.6"
-  },
-  "orig_nbformat": 4
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

	id	values	sparse_values	metadata	blob
0	1	[0.1, 0.2, 0.3]	{'indices': [1, 2, 3], 'values': [0.1, 0.2, 0.3]}	{'title': 'title1', 'url': 'url1'}	{'extra_field': 'extra_value'}
1	2	[0.4, 0.5, 0.6]	{'indices': [4, 5, 6], 'values': [0.4, 0.5, 0.6]}	{'title': 'title2', 'url': 'url2'}	None
2	3	[0.7, 0.8, 0.9]	{'indices': [7, 8, 9], 'values': [0.7, 0.8, 0.9]}	{'title': 'title3', 'url': 'url3'}	None
3	4	[1.0, 1.1, 1.2]	{'indices': [10, 11, 12], 'values': [1.0, 1.1,...	{'title': 'title4', 'url': 'url4'}	None
4	5	[1.3, 1.4, 1.5]	{'indices': [13, 14, 15], 'values': [1.3, 1.4,...	{'title': 'title5', 'url': 'url5'}	{'another_field': 'another_value'}