mirror of
https://github.com/pinecone-io/examples.git
synced 2023-10-11 20:04:54 +03:00
1042 lines
45 KiB
Plaintext
1042 lines
45 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "cdN6QOXIUaUq"
|
|
},
|
|
"source": [
|
|
"[](https://colab.research.google.com/github/pinecone-io/examples/blob/master/docs/assets/how-to-create-pinecone-datasets.ipynb)\n",
|
|
"[](https://nbviewer.org/github/pinecone-io/examples/blob/master/docs/assets/how-to-create-pinecone-datasets.ipynb)\n",
|
|
"\n",
|
|
"# Creating Pinecone Datasets"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "8Fiobs_oUaUr"
|
|
},
|
|
"source": [
|
|
"This notebook will walk you through the process of creating a Pinecone dataset from a pandas Dataframe."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "DLuQirtzUaUs"
|
|
},
|
|
"source": [
|
|
"## Step 1: create a simple sample dataset"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {
|
|
"id": "bVW2DlVQUaUs",
|
|
"outputId": "bd3c9438-7c67-4097-b580-4bfdd695ab92",
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"!pip install -qU pandas==2.0.2"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {
|
|
"id": "fPebr9XNUaUs"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {
|
|
"id": "I_WRSqY8UaUs",
|
|
"outputId": "36348ad8-38ef-40b2-8b0c-fc7e34e12575",
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 206
|
|
}
|
|
},
|
|
"outputs": [
|
|
{
|
|
"output_type": "execute_result",
|
|
"data": {
|
|
"text/plain": [
|
|
" id values sparse_values \\\n",
|
|
"0 1 [0.1, 0.2, 0.3] {'indices': [1, 2, 3], 'values': [0.1, 0.2, 0.3]} \n",
|
|
"1 2 [0.4, 0.5, 0.6] {'indices': [4, 5, 6], 'values': [0.4, 0.5, 0.6]} \n",
|
|
"2 3 [0.7, 0.8, 0.9] {'indices': [7, 8, 9], 'values': [0.7, 0.8, 0.9]} \n",
|
|
"3 4 [1.0, 1.1, 1.2] {'indices': [10, 11, 12], 'values': [1.0, 1.1,... \n",
|
|
"4 5 [1.3, 1.4, 1.5] {'indices': [13, 14, 15], 'values': [1.3, 1.4,... \n",
|
|
"\n",
|
|
" metadata blob \n",
|
|
"0 {'title': 'title1', 'url': 'url1'} {'extra_field': 'extra_value'} \n",
|
|
"1 {'title': 'title2', 'url': 'url2'} None \n",
|
|
"2 {'title': 'title3', 'url': 'url3'} None \n",
|
|
"3 {'title': 'title4', 'url': 'url4'} None \n",
|
|
"4 {'title': 'title5', 'url': 'url5'} {'another_field': 'another_value'} "
|
|
],
|
|
"text/html": [
|
|
"\n",
|
|
"\n",
|
|
" <div id=\"df-ee9831ef-5516-44bf-8080-3a2a74e6f00c\">\n",
|
|
" <div class=\"colab-df-container\">\n",
|
|
" <div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>id</th>\n",
|
|
" <th>values</th>\n",
|
|
" <th>sparse_values</th>\n",
|
|
" <th>metadata</th>\n",
|
|
" <th>blob</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>1</td>\n",
|
|
" <td>[0.1, 0.2, 0.3]</td>\n",
|
|
" <td>{'indices': [1, 2, 3], 'values': [0.1, 0.2, 0.3]}</td>\n",
|
|
" <td>{'title': 'title1', 'url': 'url1'}</td>\n",
|
|
" <td>{'extra_field': 'extra_value'}</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>2</td>\n",
|
|
" <td>[0.4, 0.5, 0.6]</td>\n",
|
|
" <td>{'indices': [4, 5, 6], 'values': [0.4, 0.5, 0.6]}</td>\n",
|
|
" <td>{'title': 'title2', 'url': 'url2'}</td>\n",
|
|
" <td>None</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>3</td>\n",
|
|
" <td>[0.7, 0.8, 0.9]</td>\n",
|
|
" <td>{'indices': [7, 8, 9], 'values': [0.7, 0.8, 0.9]}</td>\n",
|
|
" <td>{'title': 'title3', 'url': 'url3'}</td>\n",
|
|
" <td>None</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>4</td>\n",
|
|
" <td>[1.0, 1.1, 1.2]</td>\n",
|
|
" <td>{'indices': [10, 11, 12], 'values': [1.0, 1.1,...</td>\n",
|
|
" <td>{'title': 'title4', 'url': 'url4'}</td>\n",
|
|
" <td>None</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>5</td>\n",
|
|
" <td>[1.3, 1.4, 1.5]</td>\n",
|
|
" <td>{'indices': [13, 14, 15], 'values': [1.3, 1.4,...</td>\n",
|
|
" <td>{'title': 'title5', 'url': 'url5'}</td>\n",
|
|
" <td>{'another_field': 'another_value'}</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>\n",
|
|
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-ee9831ef-5516-44bf-8080-3a2a74e6f00c')\"\n",
|
|
" title=\"Convert this dataframe to an interactive table.\"\n",
|
|
" style=\"display:none;\">\n",
|
|
"\n",
|
|
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
|
|
" width=\"24px\">\n",
|
|
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
|
|
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
|
|
" </svg>\n",
|
|
" </button>\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
" <div id=\"df-2dbf45c1-fcbc-44df-ae34-b177e0482493\">\n",
|
|
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-2dbf45c1-fcbc-44df-ae34-b177e0482493')\"\n",
|
|
" title=\"Suggest charts.\"\n",
|
|
" style=\"display:none;\">\n",
|
|
"\n",
|
|
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
|
|
" width=\"24px\">\n",
|
|
" <g>\n",
|
|
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
|
|
" </g>\n",
|
|
"</svg>\n",
|
|
" </button>\n",
|
|
" </div>\n",
|
|
"\n",
|
|
"<style>\n",
|
|
" .colab-df-quickchart {\n",
|
|
" background-color: #E8F0FE;\n",
|
|
" border: none;\n",
|
|
" border-radius: 50%;\n",
|
|
" cursor: pointer;\n",
|
|
" display: none;\n",
|
|
" fill: #1967D2;\n",
|
|
" height: 32px;\n",
|
|
" padding: 0 0 0 0;\n",
|
|
" width: 32px;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .colab-df-quickchart:hover {\n",
|
|
" background-color: #E2EBFA;\n",
|
|
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
|
|
" fill: #174EA6;\n",
|
|
" }\n",
|
|
"\n",
|
|
" [theme=dark] .colab-df-quickchart {\n",
|
|
" background-color: #3B4455;\n",
|
|
" fill: #D2E3FC;\n",
|
|
" }\n",
|
|
"\n",
|
|
" [theme=dark] .colab-df-quickchart:hover {\n",
|
|
" background-color: #434B5C;\n",
|
|
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
|
|
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
|
|
" fill: #FFFFFF;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"\n",
|
|
" <script>\n",
|
|
" async function quickchart(key) {\n",
|
|
" const containerElement = document.querySelector('#' + key);\n",
|
|
" const charts = await google.colab.kernel.invokeFunction(\n",
|
|
" 'suggestCharts', [key], {});\n",
|
|
" }\n",
|
|
" </script>\n",
|
|
"\n",
|
|
" <script>\n",
|
|
"\n",
|
|
"function displayQuickchartButton(domScope) {\n",
|
|
" let quickchartButtonEl =\n",
|
|
" domScope.querySelector('#df-2dbf45c1-fcbc-44df-ae34-b177e0482493 button.colab-df-quickchart');\n",
|
|
" quickchartButtonEl.style.display =\n",
|
|
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
|
|
"}\n",
|
|
"\n",
|
|
" displayQuickchartButton(document);\n",
|
|
" </script>\n",
|
|
" <style>\n",
|
|
" .colab-df-container {\n",
|
|
" display:flex;\n",
|
|
" flex-wrap:wrap;\n",
|
|
" gap: 12px;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .colab-df-convert {\n",
|
|
" background-color: #E8F0FE;\n",
|
|
" border: none;\n",
|
|
" border-radius: 50%;\n",
|
|
" cursor: pointer;\n",
|
|
" display: none;\n",
|
|
" fill: #1967D2;\n",
|
|
" height: 32px;\n",
|
|
" padding: 0 0 0 0;\n",
|
|
" width: 32px;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .colab-df-convert:hover {\n",
|
|
" background-color: #E2EBFA;\n",
|
|
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
|
|
" fill: #174EA6;\n",
|
|
" }\n",
|
|
"\n",
|
|
" [theme=dark] .colab-df-convert {\n",
|
|
" background-color: #3B4455;\n",
|
|
" fill: #D2E3FC;\n",
|
|
" }\n",
|
|
"\n",
|
|
" [theme=dark] .colab-df-convert:hover {\n",
|
|
" background-color: #434B5C;\n",
|
|
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
|
|
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
|
|
" fill: #FFFFFF;\n",
|
|
" }\n",
|
|
" </style>\n",
|
|
"\n",
|
|
" <script>\n",
|
|
" const buttonEl =\n",
|
|
" document.querySelector('#df-ee9831ef-5516-44bf-8080-3a2a74e6f00c button.colab-df-convert');\n",
|
|
" buttonEl.style.display =\n",
|
|
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
|
|
"\n",
|
|
" async function convertToInteractive(key) {\n",
|
|
" const element = document.querySelector('#df-ee9831ef-5516-44bf-8080-3a2a74e6f00c');\n",
|
|
" const dataTable =\n",
|
|
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
|
|
" [key], {});\n",
|
|
" if (!dataTable) return;\n",
|
|
"\n",
|
|
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
|
|
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
|
|
" + ' to learn more about interactive tables.';\n",
|
|
" element.innerHTML = '';\n",
|
|
" dataTable['output_type'] = 'display_data';\n",
|
|
" await google.colab.output.renderOutput(dataTable, element);\n",
|
|
" const docLink = document.createElement('div');\n",
|
|
" docLink.innerHTML = docLinkHtml;\n",
|
|
" element.appendChild(docLink);\n",
|
|
" }\n",
|
|
" </script>\n",
|
|
" </div>\n",
|
|
" </div>\n"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"execution_count": 3
|
|
}
|
|
],
|
|
"source": [
|
|
"documents = [\n",
|
|
" {\n",
|
|
" \"id\": \"1\",\n",
|
|
" \"values\": [0.1, 0.2, 0.3],\n",
|
|
" \"sparse_values\": {\"indices\": [1, 2, 3], \"values\": [0.1, 0.2, 0.3]},\n",
|
|
" \"metadata\": {\"title\": \"title1\", \"url\": \"url1\"},\n",
|
|
" \"blob\": {\"extra_field\": \"extra_value\"},\n",
|
|
" },\n",
|
|
" {\n",
|
|
" \"id\": \"2\",\n",
|
|
" \"values\": [0.4, 0.5, 0.6],\n",
|
|
" \"sparse_values\": {\"indices\": [4, 5, 6], \"values\": [0.4, 0.5, 0.6]},\n",
|
|
" \"metadata\": {\"title\": \"title2\", \"url\": \"url2\"},\n",
|
|
" \"blob\": None,\n",
|
|
" },\n",
|
|
" {\n",
|
|
" \"id\": \"3\",\n",
|
|
" \"values\": [0.7, 0.8, 0.9],\n",
|
|
" \"sparse_values\": {\"indices\": [7, 8, 9], \"values\": [0.7, 0.8, 0.9]},\n",
|
|
" \"metadata\": {\"title\": \"title3\", \"url\": \"url3\"},\n",
|
|
" \"blob\": None,\n",
|
|
" },\n",
|
|
" {\n",
|
|
" \"id\": \"4\",\n",
|
|
" \"values\": [1.0, 1.1, 1.2],\n",
|
|
" \"sparse_values\": {\"indices\": [10, 11, 12], \"values\": [1.0, 1.1, 1.2]},\n",
|
|
" \"metadata\": {\"title\": \"title4\", \"url\": \"url4\"},\n",
|
|
" \"blob\": None,\n",
|
|
" },\n",
|
|
" {\n",
|
|
" \"id\": \"5\",\n",
|
|
" \"values\": [1.3, 1.4, 1.5],\n",
|
|
" \"sparse_values\": {\"indices\": [13, 14, 15], \"values\": [1.3, 1.4, 1.5]},\n",
|
|
" \"metadata\": {\"title\": \"title5\", \"url\": \"url5\"},\n",
|
|
" \"blob\": {\"another_field\": \"another_value\"},\n",
|
|
" }\n",
|
|
"]\n",
|
|
"\n",
|
|
"df = pd.DataFrame(documents)\n",
|
|
"df"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "c_zwxJ_OUaUt"
|
|
},
|
|
"source": [
|
|
"Some notes:\n",
|
|
"* Note that we have both metadata field and 'blob' field, the metadata field is the acutal pinecone metadata we will use in our index, blob, is an additional field that we can use to store any additional information we want to store along with the Dataset.\n",
|
|
"* here we used both 'values' and 'sparse_values', however, sparse_values is not a mandatory field, if you don't have sparse values keep it empty."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "BcFx1wFqUaUt"
|
|
},
|
|
"source": [
|
|
"## Pinecone Dataset\n",
|
|
"\n",
|
|
"Now that we have our data Ready, we can create a Pinecone Dataset. A Pinecone Dataset is a collection of documtents, queries and Metadata. We can create a Pinecone\n",
|
|
"* Documents: a collection of records with Id, Vectors (dense, sparse) and metadata\n",
|
|
"* Queries: a collection of queries with Vectors (dense, sparse), metadata filter and top_k\n",
|
|
"* Metadata: a defintion of the dataset: Name, dimension, metric, embedding models, etc."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {
|
|
"id": "DCGFhTtyUaUt"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"!pip install -qU \\\n",
|
|
" pinecone-client==2.2.2 \\\n",
|
|
" pinecone-datasets==0.6.0"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {
|
|
"id": "S9NCQyTqUaUt"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from pinecone_datasets import Dataset, DatasetMetadata"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {
|
|
"id": "Eaiy3IjIUaUt",
|
|
"outputId": "4ff727bd-1a56-42bb-8cd2-e645b5ab390c",
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
}
|
|
},
|
|
"outputs": [
|
|
{
|
|
"output_type": "execute_result",
|
|
"data": {
|
|
"text/plain": [
|
|
"{'name': '',\n",
|
|
" 'created_at': '2023-08-14 09:18:50.196514',\n",
|
|
" 'documents': 0,\n",
|
|
" 'queries': 0,\n",
|
|
" 'source': None,\n",
|
|
" 'license': None,\n",
|
|
" 'bucket': None,\n",
|
|
" 'task': None,\n",
|
|
" 'dense_model': {'name': '', 'tokenizer': None, 'dimension': 0},\n",
|
|
" 'sparse_model': None,\n",
|
|
" 'description': None,\n",
|
|
" 'tags': None,\n",
|
|
" 'args': None}"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"execution_count": 6
|
|
}
|
|
],
|
|
"source": [
|
|
"# creating a new empty metadata\n",
|
|
"metadata = DatasetMetadata.empty()\n",
|
|
"metadata.dict()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {
|
|
"id": "g_ACjKDOUaUt",
|
|
"outputId": "bc47c7d1-a3ef-4cf1-9e4b-7da6f82e111c",
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 206
|
|
}
|
|
},
|
|
"outputs": [
|
|
{
|
|
"output_type": "execute_result",
|
|
"data": {
|
|
"text/plain": [
|
|
" id values sparse_values \\\n",
|
|
"0 1 [0.1, 0.2, 0.3] {'indices': [1, 2, 3], 'values': [0.1, 0.2, 0.3]} \n",
|
|
"1 2 [0.4, 0.5, 0.6] {'indices': [4, 5, 6], 'values': [0.4, 0.5, 0.6]} \n",
|
|
"2 3 [0.7, 0.8, 0.9] {'indices': [7, 8, 9], 'values': [0.7, 0.8, 0.9]} \n",
|
|
"3 4 [1.0, 1.1, 1.2] {'indices': [10, 11, 12], 'values': [1.0, 1.1,... \n",
|
|
"4 5 [1.3, 1.4, 1.5] {'indices': [13, 14, 15], 'values': [1.3, 1.4,... \n",
|
|
"\n",
|
|
" metadata blob \n",
|
|
"0 {'title': 'title1', 'url': 'url1'} {'extra_field': 'extra_value'} \n",
|
|
"1 {'title': 'title2', 'url': 'url2'} None \n",
|
|
"2 {'title': 'title3', 'url': 'url3'} None \n",
|
|
"3 {'title': 'title4', 'url': 'url4'} None \n",
|
|
"4 {'title': 'title5', 'url': 'url5'} {'another_field': 'another_value'} "
|
|
],
|
|
"text/html": [
|
|
"\n",
|
|
"\n",
|
|
" <div id=\"df-7047aefe-be6d-423c-b810-75e31017f008\">\n",
|
|
" <div class=\"colab-df-container\">\n",
|
|
" <div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>id</th>\n",
|
|
" <th>values</th>\n",
|
|
" <th>sparse_values</th>\n",
|
|
" <th>metadata</th>\n",
|
|
" <th>blob</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>1</td>\n",
|
|
" <td>[0.1, 0.2, 0.3]</td>\n",
|
|
" <td>{'indices': [1, 2, 3], 'values': [0.1, 0.2, 0.3]}</td>\n",
|
|
" <td>{'title': 'title1', 'url': 'url1'}</td>\n",
|
|
" <td>{'extra_field': 'extra_value'}</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>2</td>\n",
|
|
" <td>[0.4, 0.5, 0.6]</td>\n",
|
|
" <td>{'indices': [4, 5, 6], 'values': [0.4, 0.5, 0.6]}</td>\n",
|
|
" <td>{'title': 'title2', 'url': 'url2'}</td>\n",
|
|
" <td>None</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>3</td>\n",
|
|
" <td>[0.7, 0.8, 0.9]</td>\n",
|
|
" <td>{'indices': [7, 8, 9], 'values': [0.7, 0.8, 0.9]}</td>\n",
|
|
" <td>{'title': 'title3', 'url': 'url3'}</td>\n",
|
|
" <td>None</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>4</td>\n",
|
|
" <td>[1.0, 1.1, 1.2]</td>\n",
|
|
" <td>{'indices': [10, 11, 12], 'values': [1.0, 1.1,...</td>\n",
|
|
" <td>{'title': 'title4', 'url': 'url4'}</td>\n",
|
|
" <td>None</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>5</td>\n",
|
|
" <td>[1.3, 1.4, 1.5]</td>\n",
|
|
" <td>{'indices': [13, 14, 15], 'values': [1.3, 1.4,...</td>\n",
|
|
" <td>{'title': 'title5', 'url': 'url5'}</td>\n",
|
|
" <td>{'another_field': 'another_value'}</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>\n",
|
|
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-7047aefe-be6d-423c-b810-75e31017f008')\"\n",
|
|
" title=\"Convert this dataframe to an interactive table.\"\n",
|
|
" style=\"display:none;\">\n",
|
|
"\n",
|
|
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
|
|
" width=\"24px\">\n",
|
|
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
|
|
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
|
|
" </svg>\n",
|
|
" </button>\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
" <div id=\"df-ba273d30-2d9c-43f2-b7d1-7b27125abb98\">\n",
|
|
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-ba273d30-2d9c-43f2-b7d1-7b27125abb98')\"\n",
|
|
" title=\"Suggest charts.\"\n",
|
|
" style=\"display:none;\">\n",
|
|
"\n",
|
|
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
|
|
" width=\"24px\">\n",
|
|
" <g>\n",
|
|
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
|
|
" </g>\n",
|
|
"</svg>\n",
|
|
" </button>\n",
|
|
" </div>\n",
|
|
"\n",
|
|
"<style>\n",
|
|
" .colab-df-quickchart {\n",
|
|
" background-color: #E8F0FE;\n",
|
|
" border: none;\n",
|
|
" border-radius: 50%;\n",
|
|
" cursor: pointer;\n",
|
|
" display: none;\n",
|
|
" fill: #1967D2;\n",
|
|
" height: 32px;\n",
|
|
" padding: 0 0 0 0;\n",
|
|
" width: 32px;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .colab-df-quickchart:hover {\n",
|
|
" background-color: #E2EBFA;\n",
|
|
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
|
|
" fill: #174EA6;\n",
|
|
" }\n",
|
|
"\n",
|
|
" [theme=dark] .colab-df-quickchart {\n",
|
|
" background-color: #3B4455;\n",
|
|
" fill: #D2E3FC;\n",
|
|
" }\n",
|
|
"\n",
|
|
" [theme=dark] .colab-df-quickchart:hover {\n",
|
|
" background-color: #434B5C;\n",
|
|
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
|
|
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
|
|
" fill: #FFFFFF;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"\n",
|
|
" <script>\n",
|
|
" async function quickchart(key) {\n",
|
|
" const containerElement = document.querySelector('#' + key);\n",
|
|
" const charts = await google.colab.kernel.invokeFunction(\n",
|
|
" 'suggestCharts', [key], {});\n",
|
|
" }\n",
|
|
" </script>\n",
|
|
"\n",
|
|
" <script>\n",
|
|
"\n",
|
|
"function displayQuickchartButton(domScope) {\n",
|
|
" let quickchartButtonEl =\n",
|
|
" domScope.querySelector('#df-ba273d30-2d9c-43f2-b7d1-7b27125abb98 button.colab-df-quickchart');\n",
|
|
" quickchartButtonEl.style.display =\n",
|
|
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
|
|
"}\n",
|
|
"\n",
|
|
" displayQuickchartButton(document);\n",
|
|
" </script>\n",
|
|
" <style>\n",
|
|
" .colab-df-container {\n",
|
|
" display:flex;\n",
|
|
" flex-wrap:wrap;\n",
|
|
" gap: 12px;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .colab-df-convert {\n",
|
|
" background-color: #E8F0FE;\n",
|
|
" border: none;\n",
|
|
" border-radius: 50%;\n",
|
|
" cursor: pointer;\n",
|
|
" display: none;\n",
|
|
" fill: #1967D2;\n",
|
|
" height: 32px;\n",
|
|
" padding: 0 0 0 0;\n",
|
|
" width: 32px;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .colab-df-convert:hover {\n",
|
|
" background-color: #E2EBFA;\n",
|
|
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
|
|
" fill: #174EA6;\n",
|
|
" }\n",
|
|
"\n",
|
|
" [theme=dark] .colab-df-convert {\n",
|
|
" background-color: #3B4455;\n",
|
|
" fill: #D2E3FC;\n",
|
|
" }\n",
|
|
"\n",
|
|
" [theme=dark] .colab-df-convert:hover {\n",
|
|
" background-color: #434B5C;\n",
|
|
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
|
|
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
|
|
" fill: #FFFFFF;\n",
|
|
" }\n",
|
|
" </style>\n",
|
|
"\n",
|
|
" <script>\n",
|
|
" const buttonEl =\n",
|
|
" document.querySelector('#df-7047aefe-be6d-423c-b810-75e31017f008 button.colab-df-convert');\n",
|
|
" buttonEl.style.display =\n",
|
|
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
|
|
"\n",
|
|
" async function convertToInteractive(key) {\n",
|
|
" const element = document.querySelector('#df-7047aefe-be6d-423c-b810-75e31017f008');\n",
|
|
" const dataTable =\n",
|
|
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
|
|
" [key], {});\n",
|
|
" if (!dataTable) return;\n",
|
|
"\n",
|
|
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
|
|
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
|
|
" + ' to learn more about interactive tables.';\n",
|
|
" element.innerHTML = '';\n",
|
|
" dataTable['output_type'] = 'display_data';\n",
|
|
" await google.colab.output.renderOutput(dataTable, element);\n",
|
|
" const docLink = document.createElement('div');\n",
|
|
" docLink.innerHTML = docLinkHtml;\n",
|
|
" element.appendChild(docLink);\n",
|
|
" }\n",
|
|
" </script>\n",
|
|
" </div>\n",
|
|
" </div>\n"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"execution_count": 7
|
|
}
|
|
],
|
|
"source": [
|
|
"ds = Dataset.from_pandas(documents=df, q=None, metadata=metadata)\n",
|
|
"ds.documents"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "CGzdg2sZUaUt"
|
|
},
|
|
"source": [
|
|
"## Save dataset to local path\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {
|
|
"id": "IVkK6fJUUaUt",
|
|
"outputId": "943ff58d-91d6-4a75-e218-d833214fee1b",
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
}
|
|
},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stderr",
|
|
"text": [
|
|
"/usr/local/lib/python3.10/dist-packages/pinecone_datasets/dataset.py:433: UserWarning: Queries are empty, not saving queries\n",
|
|
" warnings.warn(\"Queries are empty, not saving queries\")\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"ds.to_path('/tmp/ds')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "B5tvJlnSUaUu"
|
|
},
|
|
"source": [
|
|
"### Reload dataset"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"metadata": {
|
|
"id": "pLEhwSaRUaUu"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"new_ds = Dataset.from_path('/tmp/ds')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"metadata": {
|
|
"id": "J5LJGYqxUaUu",
|
|
"outputId": "120f1ebf-e30a-4913-a84f-727e52e2add8",
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 206
|
|
}
|
|
},
|
|
"outputs": [
|
|
{
|
|
"output_type": "execute_result",
|
|
"data": {
|
|
"text/plain": [
|
|
" id values sparse_values \\\n",
|
|
"0 1 [0.1, 0.2, 0.3] {'indices': [1, 2, 3], 'values': [0.1, 0.2, 0.3]} \n",
|
|
"1 2 [0.4, 0.5, 0.6] {'indices': [4, 5, 6], 'values': [0.4, 0.5, 0.6]} \n",
|
|
"2 3 [0.7, 0.8, 0.9] {'indices': [7, 8, 9], 'values': [0.7, 0.8, 0.9]} \n",
|
|
"3 4 [1.0, 1.1, 1.2] {'indices': [10, 11, 12], 'values': [1.0, 1.1,... \n",
|
|
"4 5 [1.3, 1.4, 1.5] {'indices': [13, 14, 15], 'values': [1.3, 1.4,... \n",
|
|
"\n",
|
|
" metadata \\\n",
|
|
"0 {'title': 'title1', 'url': 'url1'} \n",
|
|
"1 {'title': 'title2', 'url': 'url2'} \n",
|
|
"2 {'title': 'title3', 'url': 'url3'} \n",
|
|
"3 {'title': 'title4', 'url': 'url4'} \n",
|
|
"4 {'title': 'title5', 'url': 'url5'} \n",
|
|
"\n",
|
|
" blob \n",
|
|
"0 {'another_field': None, 'extra_field': 'extra_... \n",
|
|
"1 None \n",
|
|
"2 None \n",
|
|
"3 None \n",
|
|
"4 {'another_field': 'another_value', 'extra_fiel... "
|
|
],
|
|
"text/html": [
|
|
"\n",
|
|
"\n",
|
|
" <div id=\"df-924edd68-488a-465d-825a-1743d1db0e66\">\n",
|
|
" <div class=\"colab-df-container\">\n",
|
|
" <div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>id</th>\n",
|
|
" <th>values</th>\n",
|
|
" <th>sparse_values</th>\n",
|
|
" <th>metadata</th>\n",
|
|
" <th>blob</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>1</td>\n",
|
|
" <td>[0.1, 0.2, 0.3]</td>\n",
|
|
" <td>{'indices': [1, 2, 3], 'values': [0.1, 0.2, 0.3]}</td>\n",
|
|
" <td>{'title': 'title1', 'url': 'url1'}</td>\n",
|
|
" <td>{'another_field': None, 'extra_field': 'extra_...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>2</td>\n",
|
|
" <td>[0.4, 0.5, 0.6]</td>\n",
|
|
" <td>{'indices': [4, 5, 6], 'values': [0.4, 0.5, 0.6]}</td>\n",
|
|
" <td>{'title': 'title2', 'url': 'url2'}</td>\n",
|
|
" <td>None</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>3</td>\n",
|
|
" <td>[0.7, 0.8, 0.9]</td>\n",
|
|
" <td>{'indices': [7, 8, 9], 'values': [0.7, 0.8, 0.9]}</td>\n",
|
|
" <td>{'title': 'title3', 'url': 'url3'}</td>\n",
|
|
" <td>None</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>4</td>\n",
|
|
" <td>[1.0, 1.1, 1.2]</td>\n",
|
|
" <td>{'indices': [10, 11, 12], 'values': [1.0, 1.1,...</td>\n",
|
|
" <td>{'title': 'title4', 'url': 'url4'}</td>\n",
|
|
" <td>None</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>5</td>\n",
|
|
" <td>[1.3, 1.4, 1.5]</td>\n",
|
|
" <td>{'indices': [13, 14, 15], 'values': [1.3, 1.4,...</td>\n",
|
|
" <td>{'title': 'title5', 'url': 'url5'}</td>\n",
|
|
" <td>{'another_field': 'another_value', 'extra_fiel...</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>\n",
|
|
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-924edd68-488a-465d-825a-1743d1db0e66')\"\n",
|
|
" title=\"Convert this dataframe to an interactive table.\"\n",
|
|
" style=\"display:none;\">\n",
|
|
"\n",
|
|
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
|
|
" width=\"24px\">\n",
|
|
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
|
|
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
|
|
" </svg>\n",
|
|
" </button>\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
" <div id=\"df-42148e02-48b4-4818-94f5-46b211ba40a7\">\n",
|
|
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-42148e02-48b4-4818-94f5-46b211ba40a7')\"\n",
|
|
" title=\"Suggest charts.\"\n",
|
|
" style=\"display:none;\">\n",
|
|
"\n",
|
|
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
|
|
" width=\"24px\">\n",
|
|
" <g>\n",
|
|
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
|
|
" </g>\n",
|
|
"</svg>\n",
|
|
" </button>\n",
|
|
" </div>\n",
|
|
"\n",
|
|
"<style>\n",
|
|
" .colab-df-quickchart {\n",
|
|
" background-color: #E8F0FE;\n",
|
|
" border: none;\n",
|
|
" border-radius: 50%;\n",
|
|
" cursor: pointer;\n",
|
|
" display: none;\n",
|
|
" fill: #1967D2;\n",
|
|
" height: 32px;\n",
|
|
" padding: 0 0 0 0;\n",
|
|
" width: 32px;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .colab-df-quickchart:hover {\n",
|
|
" background-color: #E2EBFA;\n",
|
|
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
|
|
" fill: #174EA6;\n",
|
|
" }\n",
|
|
"\n",
|
|
" [theme=dark] .colab-df-quickchart {\n",
|
|
" background-color: #3B4455;\n",
|
|
" fill: #D2E3FC;\n",
|
|
" }\n",
|
|
"\n",
|
|
" [theme=dark] .colab-df-quickchart:hover {\n",
|
|
" background-color: #434B5C;\n",
|
|
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
|
|
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
|
|
" fill: #FFFFFF;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"\n",
|
|
" <script>\n",
|
|
" async function quickchart(key) {\n",
|
|
" const containerElement = document.querySelector('#' + key);\n",
|
|
" const charts = await google.colab.kernel.invokeFunction(\n",
|
|
" 'suggestCharts', [key], {});\n",
|
|
" }\n",
|
|
" </script>\n",
|
|
"\n",
|
|
" <script>\n",
|
|
"\n",
|
|
"function displayQuickchartButton(domScope) {\n",
|
|
" let quickchartButtonEl =\n",
|
|
" domScope.querySelector('#df-42148e02-48b4-4818-94f5-46b211ba40a7 button.colab-df-quickchart');\n",
|
|
" quickchartButtonEl.style.display =\n",
|
|
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
|
|
"}\n",
|
|
"\n",
|
|
" displayQuickchartButton(document);\n",
|
|
" </script>\n",
|
|
" <style>\n",
|
|
" .colab-df-container {\n",
|
|
" display:flex;\n",
|
|
" flex-wrap:wrap;\n",
|
|
" gap: 12px;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .colab-df-convert {\n",
|
|
" background-color: #E8F0FE;\n",
|
|
" border: none;\n",
|
|
" border-radius: 50%;\n",
|
|
" cursor: pointer;\n",
|
|
" display: none;\n",
|
|
" fill: #1967D2;\n",
|
|
" height: 32px;\n",
|
|
" padding: 0 0 0 0;\n",
|
|
" width: 32px;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .colab-df-convert:hover {\n",
|
|
" background-color: #E2EBFA;\n",
|
|
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
|
|
" fill: #174EA6;\n",
|
|
" }\n",
|
|
"\n",
|
|
" [theme=dark] .colab-df-convert {\n",
|
|
" background-color: #3B4455;\n",
|
|
" fill: #D2E3FC;\n",
|
|
" }\n",
|
|
"\n",
|
|
" [theme=dark] .colab-df-convert:hover {\n",
|
|
" background-color: #434B5C;\n",
|
|
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
|
|
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
|
|
" fill: #FFFFFF;\n",
|
|
" }\n",
|
|
" </style>\n",
|
|
"\n",
|
|
" <script>\n",
|
|
" const buttonEl =\n",
|
|
" document.querySelector('#df-924edd68-488a-465d-825a-1743d1db0e66 button.colab-df-convert');\n",
|
|
" buttonEl.style.display =\n",
|
|
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
|
|
"\n",
|
|
" async function convertToInteractive(key) {\n",
|
|
" const element = document.querySelector('#df-924edd68-488a-465d-825a-1743d1db0e66');\n",
|
|
" const dataTable =\n",
|
|
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
|
|
" [key], {});\n",
|
|
" if (!dataTable) return;\n",
|
|
"\n",
|
|
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
|
|
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
|
|
" + ' to learn more about interactive tables.';\n",
|
|
" element.innerHTML = '';\n",
|
|
" dataTable['output_type'] = 'display_data';\n",
|
|
" await google.colab.output.renderOutput(dataTable, element);\n",
|
|
" const docLink = document.createElement('div');\n",
|
|
" docLink.innerHTML = docLinkHtml;\n",
|
|
" element.appendChild(docLink);\n",
|
|
" }\n",
|
|
" </script>\n",
|
|
" </div>\n",
|
|
" </div>\n"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"execution_count": 10
|
|
}
|
|
],
|
|
"source": [
|
|
"new_ds.documents"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.9.6"
|
|
},
|
|
"orig_nbformat": 4,
|
|
"colab": {
|
|
"provenance": []
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 0
|
|
} |