upsert is blocked

This commit is contained in:
Roy Miara
2023-06-11 17:04:23 +03:00
parent 6c8b6bf3e9
commit aa20b9eb99

View File

@@ -27,192 +27,97 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting pinecone-datasets==0.5.0rc2\n",
" Using cached pinecone_datasets-0.5.0rc2-py3-none-any.whl (12 kB)\n",
"Collecting datasets==2.12.0\n",
" Using cached datasets-2.12.0-py3-none-any.whl (474 kB)\n",
"Collecting sentence-transformers==2.2.2\n",
" Using cached sentence_transformers-2.2.2-py3-none-any.whl\n",
"Collecting fsspec<2024.0.0,>=2023.1.0 (from pinecone-datasets==0.5.0rc2)\n",
" Using cached fsspec-2023.6.0-py3-none-any.whl (163 kB)\n",
"Collecting gcsfs<2024.0.0,>=2023.1.0 (from pinecone-datasets==0.5.0rc2)\n",
" Using cached gcsfs-2023.5.0-py2.py3-none-any.whl (26 kB)\n",
"Collecting pandas<3.0.0,>=2.0.0 (from pinecone-datasets==0.5.0rc2)\n",
" Downloading pandas-2.0.2-cp39-cp39-macosx_11_0_arm64.whl (10.9 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.9/10.9 MB\u001b[0m \u001b[31m10.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m0:01\u001b[0m\n",
"\u001b[?25hCollecting pinecone-client==3.0.0rc2 (from pinecone-datasets==0.5.0rc2)\n",
" Using cached pinecone_client-3.0.0rc2-cp39-cp39-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl (6.6 MB)\n",
"Collecting polars<0.17.0,>=0.16.4 (from pinecone-datasets==0.5.0rc2)\n",
" Using cached polars-0.16.18-cp37-abi3-macosx_11_0_arm64.whl (13.9 MB)\n",
"Collecting protobuf<3.20.0,>=3.19.3 (from pinecone-datasets==0.5.0rc2)\n",
" Using cached protobuf-3.19.6-py2.py3-none-any.whl (162 kB)\n",
"Collecting pyarrow<13.0.0,>=12.0.0 (from pinecone-datasets==0.5.0rc2)\n",
" Downloading pyarrow-12.0.0-cp39-cp39-macosx_11_0_arm64.whl (22.7 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m22.7/22.7 MB\u001b[0m \u001b[31m7.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
"\u001b[?25hCollecting pydantic<2.0.0,>=1.10.5 (from pinecone-datasets==0.5.0rc2)\n",
" Downloading pydantic-1.10.9-cp39-cp39-macosx_11_0_arm64.whl (2.6 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.6/2.6 MB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0mm\n",
"\u001b[?25hCollecting s3fs<2024.0.0,>=2023.1.0 (from pinecone-datasets==0.5.0rc2)\n",
" Using cached s3fs-2023.6.0-py3-none-any.whl (28 kB)\n",
"Collecting numpy>=1.17 (from datasets==2.12.0)\n",
" Downloading numpy-1.24.3-cp39-cp39-macosx_11_0_arm64.whl (13.9 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.9/13.9 MB\u001b[0m \u001b[31m9.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
"\u001b[?25hCollecting dill<0.3.7,>=0.3.0 (from datasets==2.12.0)\n",
" Using cached dill-0.3.6-py3-none-any.whl (110 kB)\n",
"Collecting requests>=2.19.0 (from datasets==2.12.0)\n",
" Using cached requests-2.31.0-py3-none-any.whl (62 kB)\n",
"Collecting tqdm>=4.62.1 (from datasets==2.12.0)\n",
" Using cached tqdm-4.65.0-py3-none-any.whl (77 kB)\n",
"Collecting xxhash (from datasets==2.12.0)\n",
" Using cached xxhash-3.2.0-cp39-cp39-macosx_11_0_arm64.whl (31 kB)\n",
"Collecting multiprocess (from datasets==2.12.0)\n",
" Using cached multiprocess-0.70.14-py39-none-any.whl (132 kB)\n",
"Collecting aiohttp (from datasets==2.12.0)\n",
" Downloading aiohttp-3.8.4-cp39-cp39-macosx_11_0_arm64.whl (338 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m338.3/338.3 kB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting huggingface-hub<1.0.0,>=0.11.0 (from datasets==2.12.0)\n",
" Using cached huggingface_hub-0.15.1-py3-none-any.whl (236 kB)\n",
"Collecting pinecone-datasets==0.5.0rc5\n",
" Downloading pinecone_datasets-0.5.0rc5-py3-none-any.whl (12 kB)\n",
"Requirement already satisfied: datasets==2.12.0 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (2.12.0)\n",
"Requirement already satisfied: sentence-transformers==2.2.2 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (2.2.2)\n",
"Requirement already satisfied: fsspec<2024.0.0,>=2023.1.0 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from pinecone-datasets==0.5.0rc5) (2023.5.0)\n",
"Requirement already satisfied: gcsfs<2024.0.0,>=2023.1.0 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from pinecone-datasets==0.5.0rc5) (2023.5.0)\n",
"Requirement already satisfied: pandas<3.0.0,>=2.0.0 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from pinecone-datasets==0.5.0rc5) (2.0.2)\n",
"Requirement already satisfied: pinecone-client==3.0.0rc2 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from pinecone-datasets==0.5.0rc5) (3.0.0rc2)\n",
"Requirement already satisfied: polars<0.17.0,>=0.16.4 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from pinecone-datasets==0.5.0rc5) (0.16.18)\n",
"Requirement already satisfied: protobuf<3.20.0,>=3.19.3 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from pinecone-datasets==0.5.0rc5) (3.19.6)\n",
"Requirement already satisfied: pyarrow<13.0.0,>=12.0.0 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from pinecone-datasets==0.5.0rc5) (12.0.0)\n",
"Requirement already satisfied: pydantic<2.0.0,>=1.10.5 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from pinecone-datasets==0.5.0rc5) (1.10.9)\n",
"Requirement already satisfied: s3fs<2024.0.0,>=2023.1.0 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from pinecone-datasets==0.5.0rc5) (2023.5.0)\n",
"Requirement already satisfied: numpy>=1.17 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from datasets==2.12.0) (1.24.3)\n",
"Requirement already satisfied: dill<0.3.7,>=0.3.0 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from datasets==2.12.0) (0.3.6)\n",
"Requirement already satisfied: requests>=2.19.0 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from datasets==2.12.0) (2.31.0)\n",
"Requirement already satisfied: tqdm>=4.62.1 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from datasets==2.12.0) (4.65.0)\n",
"Requirement already satisfied: xxhash in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from datasets==2.12.0) (3.2.0)\n",
"Requirement already satisfied: multiprocess in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from datasets==2.12.0) (0.70.14)\n",
"Requirement already satisfied: aiohttp in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from datasets==2.12.0) (3.8.4)\n",
"Requirement already satisfied: huggingface-hub<1.0.0,>=0.11.0 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from datasets==2.12.0) (0.15.1)\n",
"Requirement already satisfied: packaging in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from datasets==2.12.0) (23.1)\n",
"Collecting responses<0.19 (from datasets==2.12.0)\n",
" Using cached responses-0.18.0-py3-none-any.whl (38 kB)\n",
"Collecting pyyaml>=5.1 (from datasets==2.12.0)\n",
" Using cached PyYAML-6.0-cp39-cp39-macosx_11_0_arm64.whl (173 kB)\n",
"Collecting transformers<5.0.0,>=4.6.0 (from sentence-transformers==2.2.2)\n",
" Using cached transformers-4.30.1-py3-none-any.whl (7.2 MB)\n",
"Collecting torch>=1.6.0 (from sentence-transformers==2.2.2)\n",
" Downloading torch-2.0.1-cp39-none-macosx_11_0_arm64.whl (55.8 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.8/55.8 MB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
"\u001b[?25hCollecting torchvision (from sentence-transformers==2.2.2)\n",
" Downloading torchvision-0.15.2-cp39-cp39-macosx_11_0_arm64.whl (1.4 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.4/1.4 MB\u001b[0m \u001b[31m10.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
"\u001b[?25hCollecting scikit-learn (from sentence-transformers==2.2.2)\n",
" Downloading scikit_learn-1.2.2-cp39-cp39-macosx_12_0_arm64.whl (8.5 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.5/8.5 MB\u001b[0m \u001b[31m10.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
"\u001b[?25hCollecting scipy (from sentence-transformers==2.2.2)\n",
" Downloading scipy-1.10.1-cp39-cp39-macosx_12_0_arm64.whl (28.9 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m28.9/28.9 MB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0mm00:01\u001b[0m\n",
"\u001b[?25hCollecting nltk (from sentence-transformers==2.2.2)\n",
" Using cached nltk-3.8.1-py3-none-any.whl (1.5 MB)\n",
"Collecting sentencepiece (from sentence-transformers==2.2.2)\n",
" Downloading sentencepiece-0.1.99-cp39-cp39-macosx_11_0_arm64.whl (1.2 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0mm\n",
"\u001b[?25hCollecting attrs>=17.3.0 (from aiohttp->datasets==2.12.0)\n",
" Using cached attrs-23.1.0-py3-none-any.whl (61 kB)\n",
"Collecting charset-normalizer<4.0,>=2.0 (from aiohttp->datasets==2.12.0)\n",
" Using cached charset_normalizer-3.1.0-cp39-cp39-macosx_11_0_arm64.whl (122 kB)\n",
"Collecting multidict<7.0,>=4.5 (from aiohttp->datasets==2.12.0)\n",
" Downloading multidict-6.0.4-cp39-cp39-macosx_11_0_arm64.whl (29 kB)\n",
"Collecting async-timeout<5.0,>=4.0.0a3 (from aiohttp->datasets==2.12.0)\n",
" Using cached async_timeout-4.0.2-py3-none-any.whl (5.8 kB)\n",
"Collecting yarl<2.0,>=1.0 (from aiohttp->datasets==2.12.0)\n",
" Downloading yarl-1.9.2-cp39-cp39-macosx_11_0_arm64.whl (62 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.5/62.5 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting frozenlist>=1.1.1 (from aiohttp->datasets==2.12.0)\n",
" Downloading frozenlist-1.3.3-cp39-cp39-macosx_11_0_arm64.whl (35 kB)\n",
"Collecting aiosignal>=1.1.2 (from aiohttp->datasets==2.12.0)\n",
" Using cached aiosignal-1.3.1-py3-none-any.whl (7.6 kB)\n",
"Requirement already satisfied: decorator>4.1.2 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc2) (5.1.1)\n",
"INFO: pip is looking at multiple versions of gcsfs to determine which version is compatible with other requirements. This could take a while.\n",
"Collecting gcsfs<2024.0.0,>=2023.1.0 (from pinecone-datasets==0.5.0rc2)\n",
" Using cached gcsfs-2023.4.0-py2.py3-none-any.whl (26 kB)\n",
" Using cached gcsfs-2023.3.0-py2.py3-none-any.whl (26 kB)\n",
" Using cached gcsfs-2023.1.0-py2.py3-none-any.whl (26 kB)\n",
"Collecting fsspec[http]>=2021.11.1 (from datasets==2.12.0)\n",
" Using cached fsspec-2023.5.0-py3-none-any.whl (160 kB)\n",
"Collecting google-auth>=1.2 (from gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc2)\n",
" Using cached google_auth-2.19.1-py2.py3-none-any.whl (181 kB)\n",
"Collecting google-auth-oauthlib (from gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc2)\n",
" Using cached google_auth_oauthlib-1.0.0-py2.py3-none-any.whl (18 kB)\n",
"Collecting google-cloud-storage (from gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc2)\n",
" Using cached google_cloud_storage-2.9.0-py2.py3-none-any.whl (113 kB)\n",
"Collecting filelock (from huggingface-hub<1.0.0,>=0.11.0->datasets==2.12.0)\n",
" Using cached filelock-3.12.1-py3-none-any.whl (10 kB)\n",
"Requirement already satisfied: responses<0.19 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from datasets==2.12.0) (0.18.0)\n",
"Requirement already satisfied: pyyaml>=5.1 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from datasets==2.12.0) (6.0)\n",
"Requirement already satisfied: transformers<5.0.0,>=4.6.0 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from sentence-transformers==2.2.2) (4.30.1)\n",
"Requirement already satisfied: torch>=1.6.0 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from sentence-transformers==2.2.2) (2.0.1)\n",
"Requirement already satisfied: torchvision in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from sentence-transformers==2.2.2) (0.15.2)\n",
"Requirement already satisfied: scikit-learn in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from sentence-transformers==2.2.2) (1.2.2)\n",
"Requirement already satisfied: scipy in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from sentence-transformers==2.2.2) (1.10.1)\n",
"Requirement already satisfied: nltk in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from sentence-transformers==2.2.2) (3.8.1)\n",
"Requirement already satisfied: sentencepiece in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from sentence-transformers==2.2.2) (0.1.99)\n",
"Requirement already satisfied: attrs>=17.3.0 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from aiohttp->datasets==2.12.0) (23.1.0)\n",
"Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from aiohttp->datasets==2.12.0) (3.1.0)\n",
"Requirement already satisfied: multidict<7.0,>=4.5 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from aiohttp->datasets==2.12.0) (6.0.4)\n",
"Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from aiohttp->datasets==2.12.0) (4.0.2)\n",
"Requirement already satisfied: yarl<2.0,>=1.0 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from aiohttp->datasets==2.12.0) (1.9.2)\n",
"Requirement already satisfied: frozenlist>=1.1.1 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from aiohttp->datasets==2.12.0) (1.3.3)\n",
"Requirement already satisfied: aiosignal>=1.1.2 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from aiohttp->datasets==2.12.0) (1.3.1)\n",
"Requirement already satisfied: decorator>4.1.2 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc5) (5.1.1)\n",
"Requirement already satisfied: google-auth>=1.2 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc5) (2.19.1)\n",
"Requirement already satisfied: google-auth-oauthlib in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc5) (1.0.0)\n",
"Requirement already satisfied: google-cloud-storage in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc5) (2.9.0)\n",
"Requirement already satisfied: filelock in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from huggingface-hub<1.0.0,>=0.11.0->datasets==2.12.0) (3.12.1)\n",
"Requirement already satisfied: typing-extensions>=3.7.4.3 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from huggingface-hub<1.0.0,>=0.11.0->datasets==2.12.0) (4.6.3)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from pandas<3.0.0,>=2.0.0->pinecone-datasets==0.5.0rc2) (2.8.2)\n",
"Collecting pytz>=2020.1 (from pandas<3.0.0,>=2.0.0->pinecone-datasets==0.5.0rc2)\n",
" Using cached pytz-2023.3-py2.py3-none-any.whl (502 kB)\n",
"Collecting tzdata>=2022.1 (from pandas<3.0.0,>=2.0.0->pinecone-datasets==0.5.0rc2)\n",
" Using cached tzdata-2023.3-py2.py3-none-any.whl (341 kB)\n",
"Collecting idna<4,>=2.5 (from requests>=2.19.0->datasets==2.12.0)\n",
" Using cached idna-3.4-py3-none-any.whl (61 kB)\n",
"Collecting urllib3<3,>=1.21.1 (from requests>=2.19.0->datasets==2.12.0)\n",
" Using cached urllib3-2.0.3-py3-none-any.whl (123 kB)\n",
"Collecting certifi>=2017.4.17 (from requests>=2.19.0->datasets==2.12.0)\n",
" Using cached certifi-2023.5.7-py3-none-any.whl (156 kB)\n",
"Collecting aiobotocore~=2.5.0 (from s3fs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc2)\n",
" Using cached aiobotocore-2.5.0-py3-none-any.whl (72 kB)\n",
"INFO: pip is looking at multiple versions of s3fs to determine which version is compatible with other requirements. This could take a while.\n",
"Collecting s3fs<2024.0.0,>=2023.1.0 (from pinecone-datasets==0.5.0rc2)\n",
" Using cached s3fs-2023.5.0-py3-none-any.whl (28 kB)\n",
"Collecting sympy (from torch>=1.6.0->sentence-transformers==2.2.2)\n",
" Using cached sympy-1.12-py3-none-any.whl (5.7 MB)\n",
"Collecting networkx (from torch>=1.6.0->sentence-transformers==2.2.2)\n",
" Using cached networkx-3.1-py3-none-any.whl (2.1 MB)\n",
"Collecting jinja2 (from torch>=1.6.0->sentence-transformers==2.2.2)\n",
" Using cached Jinja2-3.1.2-py3-none-any.whl (133 kB)\n",
"Collecting regex!=2019.12.17 (from transformers<5.0.0,>=4.6.0->sentence-transformers==2.2.2)\n",
" Downloading regex-2023.6.3-cp39-cp39-macosx_11_0_arm64.whl (288 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m289.0/289.0 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers<5.0.0,>=4.6.0->sentence-transformers==2.2.2)\n",
" Downloading tokenizers-0.13.3-cp39-cp39-macosx_12_0_arm64.whl (3.9 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.9/3.9 MB\u001b[0m \u001b[31m9.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0mm\n",
"\u001b[?25hCollecting safetensors>=0.3.1 (from transformers<5.0.0,>=4.6.0->sentence-transformers==2.2.2)\n",
" Downloading safetensors-0.3.1-cp39-cp39-macosx_12_0_arm64.whl (401 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m401.8/401.8 kB\u001b[0m \u001b[31m8.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n",
"\u001b[?25hCollecting click (from nltk->sentence-transformers==2.2.2)\n",
" Using cached click-8.1.3-py3-none-any.whl (96 kB)\n",
"Collecting joblib (from nltk->sentence-transformers==2.2.2)\n",
" Using cached joblib-1.2.0-py3-none-any.whl (297 kB)\n",
"Collecting threadpoolctl>=2.0.0 (from scikit-learn->sentence-transformers==2.2.2)\n",
" Using cached threadpoolctl-3.1.0-py3-none-any.whl (14 kB)\n",
"Collecting pillow!=8.3.*,>=5.3.0 (from torchvision->sentence-transformers==2.2.2)\n",
" Downloading Pillow-9.5.0-cp39-cp39-macosx_11_0_arm64.whl (3.1 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0mm\n",
"\u001b[?25hCollecting botocore<1.29.77,>=1.29.76 (from aiobotocore~=2.5.0->s3fs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc2)\n",
" Using cached botocore-1.29.76-py3-none-any.whl (10.4 MB)\n",
"Collecting wrapt>=1.10.10 (from aiobotocore~=2.5.0->s3fs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc2)\n",
" Using cached wrapt-1.15.0-cp39-cp39-macosx_11_0_arm64.whl (36 kB)\n",
"Collecting aioitertools>=0.5.1 (from aiobotocore~=2.5.0->s3fs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc2)\n",
" Using cached aioitertools-0.11.0-py3-none-any.whl (23 kB)\n",
"Collecting cachetools<6.0,>=2.0.0 (from google-auth>=1.2->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc2)\n",
" Using cached cachetools-5.3.1-py3-none-any.whl (9.3 kB)\n",
"Collecting pyasn1-modules>=0.2.1 (from google-auth>=1.2->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc2)\n",
" Using cached pyasn1_modules-0.3.0-py2.py3-none-any.whl (181 kB)\n",
"Collecting rsa<5,>=3.1.4 (from google-auth>=1.2->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc2)\n",
" Using cached rsa-4.9-py3-none-any.whl (34 kB)\n",
"Requirement already satisfied: six>=1.9.0 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from google-auth>=1.2->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc2) (1.16.0)\n",
"Collecting urllib3<3,>=1.21.1 (from requests>=2.19.0->datasets==2.12.0)\n",
" Using cached urllib3-1.26.16-py2.py3-none-any.whl (143 kB)\n",
"Collecting requests-oauthlib>=0.7.0 (from google-auth-oauthlib->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc2)\n",
" Using cached requests_oauthlib-1.3.1-py2.py3-none-any.whl (23 kB)\n",
"Collecting google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5 (from google-cloud-storage->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc2)\n",
" Using cached google_api_core-2.11.0-py3-none-any.whl (120 kB)\n",
"Collecting google-cloud-core<3.0dev,>=2.3.0 (from google-cloud-storage->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc2)\n",
" Using cached google_cloud_core-2.3.2-py2.py3-none-any.whl (29 kB)\n",
"Collecting google-resumable-media>=2.3.2 (from google-cloud-storage->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc2)\n",
" Using cached google_resumable_media-2.5.0-py2.py3-none-any.whl (77 kB)\n",
"Collecting MarkupSafe>=2.0 (from jinja2->torch>=1.6.0->sentence-transformers==2.2.2)\n",
" Downloading MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_universal2.whl (17 kB)\n",
"Collecting mpmath>=0.19 (from sympy->torch>=1.6.0->sentence-transformers==2.2.2)\n",
" Using cached mpmath-1.3.0-py3-none-any.whl (536 kB)\n",
"Collecting jmespath<2.0.0,>=0.7.1 (from botocore<1.29.77,>=1.29.76->aiobotocore~=2.5.0->s3fs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc2)\n",
" Using cached jmespath-1.0.1-py3-none-any.whl (20 kB)\n",
"Collecting googleapis-common-protos<2.0dev,>=1.56.2 (from google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5->google-cloud-storage->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc2)\n",
" Using cached googleapis_common_protos-1.59.0-py2.py3-none-any.whl (223 kB)\n",
"Collecting google-crc32c<2.0dev,>=1.0 (from google-resumable-media>=2.3.2->google-cloud-storage->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc2)\n",
" Using cached google_crc32c-1.5.0-cp39-cp39-macosx_10_9_universal2.whl (32 kB)\n",
"Collecting pyasn1<0.6.0,>=0.4.6 (from pyasn1-modules>=0.2.1->google-auth>=1.2->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc2)\n",
" Using cached pyasn1-0.5.0-py2.py3-none-any.whl (83 kB)\n",
"Collecting oauthlib>=3.0.0 (from requests-oauthlib>=0.7.0->google-auth-oauthlib->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc2)\n",
" Using cached oauthlib-3.2.2-py3-none-any.whl (151 kB)\n",
"Installing collected packages: tokenizers, sentencepiece, safetensors, pytz, mpmath, xxhash, wrapt, urllib3, tzdata, tqdm, threadpoolctl, sympy, regex, pyyaml, pydantic, pyasn1, protobuf, polars, pinecone-client, pillow, oauthlib, numpy, networkx, multidict, MarkupSafe, joblib, jmespath, idna, google-crc32c, fsspec, frozenlist, filelock, dill, click, charset-normalizer, certifi, cachetools, attrs, async-timeout, aioitertools, yarl, scipy, rsa, requests, pyasn1-modules, pyarrow, pandas, nltk, multiprocess, jinja2, googleapis-common-protos, google-resumable-media, botocore, aiosignal, torch, scikit-learn, responses, requests-oauthlib, huggingface-hub, google-auth, aiohttp, transformers, torchvision, google-auth-oauthlib, google-api-core, aiobotocore, sentence-transformers, s3fs, google-cloud-core, datasets, google-cloud-storage, gcsfs, pinecone-datasets\n",
"Successfully installed MarkupSafe-2.1.3 aiobotocore-2.5.0 aiohttp-3.8.4 aioitertools-0.11.0 aiosignal-1.3.1 async-timeout-4.0.2 attrs-23.1.0 botocore-1.29.76 cachetools-5.3.1 certifi-2023.5.7 charset-normalizer-3.1.0 click-8.1.3 datasets-2.12.0 dill-0.3.6 filelock-3.12.1 frozenlist-1.3.3 fsspec-2023.5.0 gcsfs-2023.5.0 google-api-core-2.11.0 google-auth-2.19.1 google-auth-oauthlib-1.0.0 google-cloud-core-2.3.2 google-cloud-storage-2.9.0 google-crc32c-1.5.0 google-resumable-media-2.5.0 googleapis-common-protos-1.59.0 huggingface-hub-0.15.1 idna-3.4 jinja2-3.1.2 jmespath-1.0.1 joblib-1.2.0 mpmath-1.3.0 multidict-6.0.4 multiprocess-0.70.14 networkx-3.1 nltk-3.8.1 numpy-1.24.3 oauthlib-3.2.2 pandas-2.0.2 pillow-9.5.0 pinecone-client-3.0.0rc2 pinecone-datasets-0.5.0rc2 polars-0.16.18 protobuf-3.19.6 pyarrow-12.0.0 pyasn1-0.5.0 pyasn1-modules-0.3.0 pydantic-1.10.9 pytz-2023.3 pyyaml-6.0 regex-2023.6.3 requests-2.31.0 requests-oauthlib-1.3.1 responses-0.18.0 rsa-4.9 s3fs-2023.5.0 safetensors-0.3.1 scikit-learn-1.2.2 scipy-1.10.1 sentence-transformers-2.2.2 sentencepiece-0.1.99 sympy-1.12 threadpoolctl-3.1.0 tokenizers-0.13.3 torch-2.0.1 torchvision-0.15.2 tqdm-4.65.0 transformers-4.30.1 tzdata-2023.3 urllib3-1.26.16 wrapt-1.15.0 xxhash-3.2.0 yarl-1.9.2\n"
"Requirement already satisfied: python-dateutil>=2.8.2 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from pandas<3.0.0,>=2.0.0->pinecone-datasets==0.5.0rc5) (2.8.2)\n",
"Requirement already satisfied: pytz>=2020.1 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from pandas<3.0.0,>=2.0.0->pinecone-datasets==0.5.0rc5) (2023.3)\n",
"Requirement already satisfied: tzdata>=2022.1 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from pandas<3.0.0,>=2.0.0->pinecone-datasets==0.5.0rc5) (2023.3)\n",
"Requirement already satisfied: idna<4,>=2.5 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from requests>=2.19.0->datasets==2.12.0) (3.4)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from requests>=2.19.0->datasets==2.12.0) (1.26.16)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from requests>=2.19.0->datasets==2.12.0) (2023.5.7)\n",
"Requirement already satisfied: aiobotocore~=2.5.0 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from s3fs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc5) (2.5.0)\n",
"Requirement already satisfied: sympy in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from torch>=1.6.0->sentence-transformers==2.2.2) (1.12)\n",
"Requirement already satisfied: networkx in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from torch>=1.6.0->sentence-transformers==2.2.2) (3.1)\n",
"Requirement already satisfied: jinja2 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from torch>=1.6.0->sentence-transformers==2.2.2) (3.1.2)\n",
"Requirement already satisfied: regex!=2019.12.17 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from transformers<5.0.0,>=4.6.0->sentence-transformers==2.2.2) (2023.6.3)\n",
"Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from transformers<5.0.0,>=4.6.0->sentence-transformers==2.2.2) (0.13.3)\n",
"Requirement already satisfied: safetensors>=0.3.1 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from transformers<5.0.0,>=4.6.0->sentence-transformers==2.2.2) (0.3.1)\n",
"Requirement already satisfied: click in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from nltk->sentence-transformers==2.2.2) (8.1.3)\n",
"Requirement already satisfied: joblib in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from nltk->sentence-transformers==2.2.2) (1.2.0)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from scikit-learn->sentence-transformers==2.2.2) (3.1.0)\n",
"Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from torchvision->sentence-transformers==2.2.2) (9.5.0)\n",
"Requirement already satisfied: botocore<1.29.77,>=1.29.76 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from aiobotocore~=2.5.0->s3fs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc5) (1.29.76)\n",
"Requirement already satisfied: wrapt>=1.10.10 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from aiobotocore~=2.5.0->s3fs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc5) (1.15.0)\n",
"Requirement already satisfied: aioitertools>=0.5.1 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from aiobotocore~=2.5.0->s3fs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc5) (0.11.0)\n",
"Requirement already satisfied: cachetools<6.0,>=2.0.0 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from google-auth>=1.2->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc5) (5.3.1)\n",
"Requirement already satisfied: pyasn1-modules>=0.2.1 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from google-auth>=1.2->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc5) (0.3.0)\n",
"Requirement already satisfied: rsa<5,>=3.1.4 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from google-auth>=1.2->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc5) (4.9)\n",
"Requirement already satisfied: six>=1.9.0 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from google-auth>=1.2->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc5) (1.16.0)\n",
"Requirement already satisfied: requests-oauthlib>=0.7.0 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from google-auth-oauthlib->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc5) (1.3.1)\n",
"Requirement already satisfied: google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from google-cloud-storage->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc5) (2.11.0)\n",
"Requirement already satisfied: google-cloud-core<3.0dev,>=2.3.0 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from google-cloud-storage->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc5) (2.3.2)\n",
"Requirement already satisfied: google-resumable-media>=2.3.2 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from google-cloud-storage->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc5) (2.5.0)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from jinja2->torch>=1.6.0->sentence-transformers==2.2.2) (2.1.3)\n",
"Requirement already satisfied: mpmath>=0.19 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from sympy->torch>=1.6.0->sentence-transformers==2.2.2) (1.3.0)\n",
"Requirement already satisfied: jmespath<2.0.0,>=0.7.1 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from botocore<1.29.77,>=1.29.76->aiobotocore~=2.5.0->s3fs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc5) (1.0.1)\n",
"Requirement already satisfied: googleapis-common-protos<2.0dev,>=1.56.2 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5->google-cloud-storage->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc5) (1.59.0)\n",
"Requirement already satisfied: google-crc32c<2.0dev,>=1.0 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from google-resumable-media>=2.3.2->google-cloud-storage->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc5) (1.5.0)\n",
"Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from pyasn1-modules>=0.2.1->google-auth>=1.2->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc5) (0.5.0)\n",
"Requirement already satisfied: oauthlib>=3.0.0 in /Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets==0.5.0rc5) (3.2.2)\n",
"Installing collected packages: pinecone-datasets\n",
" Attempting uninstall: pinecone-datasets\n",
" Found existing installation: pinecone-datasets 0.5.0rc4\n",
" Uninstalling pinecone-datasets-0.5.0rc4:\n",
" Successfully uninstalled pinecone-datasets-0.5.0rc4\n",
"Successfully installed pinecone-datasets-0.5.0rc5\n"
]
}
],
"source": [
"!pip install -U \\\n",
" pinecone-datasets==0.5.0rc2 \\\n",
" pinecone-datasets==0.5.0rc5 \\\n",
" datasets==2.12.0 \\\n",
" sentence-transformers==2.2.2"
]
@@ -279,40 +184,7 @@
"text": [
"/Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n",
"Downloading builder script: 100%|██████████| 2.38k/2.38k [00:00<00:00, 9.31MB/s]\n",
"Downloading metadata: 100%|██████████| 1.13k/1.13k [00:00<00:00, 9.25MB/s]\n",
"Downloading readme: 100%|██████████| 5.69k/5.69k [00:00<00:00, 27.7MB/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading and preparing dataset quora/default to /Users/roymiara/.cache/huggingface/datasets/quora/default/0.0.0/36ba4cd42107f051a158016f1bea6ae3f4685c5df843529108a54e42d86c1e04...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Downloading data: 100%|██████████| 58.2M/58.2M [00:05<00:00, 10.9MB/s]\n",
"Downloading data files: 100%|██████████| 1/1 [00:05<00:00, 5.88s/it]\n",
"Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 983.19it/s]\n",
" "
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset quora downloaded and prepared to /Users/roymiara/.cache/huggingface/datasets/quora/default/0.0.0/36ba4cd42107f051a158016f1bea6ae3f4685c5df843529108a54e42d86c1e04. Subsequent calls will reuse this data.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r"
"Found cached dataset quora (/Users/roymiara/.cache/huggingface/datasets/quora/default/0.0.0/36ba4cd42107f051a158016f1bea6ae3f4685c5df843529108a54e42d86c1e04)\n"
]
},
{
@@ -348,7 +220,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 3,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
@@ -377,7 +249,7 @@
" 'is_duplicate': [False, True, False, True, False]}"
]
},
"execution_count": 4,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
@@ -398,7 +270,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
@@ -411,11 +283,11 @@
"name": "stdout",
"output_type": "stream",
"text": [
"If Allah is merciful then why would He burn sinned humans with fire for eternity in hell? Cant' He show mercy?\n",
"What percentage of transgender women are sexually attracted to women as opposed to men, or both men and women?\n",
"What is the advantage of polarized sunglasses?\n",
"What are good gift ideas for a dad in his late 40s?\n",
"Who are the people still choosing to vote for Donald Trump and why do they want to vote for him (other than because he's the Republican nominee)?\n",
"Which Finance job requires traveling around the world?\n",
"Does anyone use the Xyleme learning content management system?\n",
"Which folder keeps the extension files of Google Chrome Portable Version?\n",
"What are some good ways to lose weight?\n",
"Can Health services research be a STEM major?\n",
"136057\n"
]
}
@@ -448,7 +320,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 5,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
@@ -474,7 +346,7 @@
")"
]
},
"execution_count": 6,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
@@ -513,7 +385,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 6,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
@@ -525,10 +397,10 @@
{
"data": {
"text/plain": [
"(2, 384)"
"(384,)"
]
},
"execution_count": 16,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@@ -576,7 +448,7 @@
},
{
"cell_type": "code",
"execution_count": 38,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
@@ -590,7 +462,7 @@
},
{
"cell_type": "code",
"execution_count": 39,
"execution_count": 8,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
@@ -617,7 +489,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 8/8 [00:02<00:00, 3.08it/s]\n"
"100%|██████████| 8/8 [00:02<00:00, 3.77it/s]\n"
]
}
],
@@ -643,7 +515,7 @@
},
{
"cell_type": "code",
"execution_count": 52,
"execution_count": 9,
"metadata": {},
"outputs": [
{
@@ -676,32 +548,32 @@
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>[0.079809085, 0.13530786, -0.024871217, 0.0126...</td>\n",
" <td>{'text': 'If Allah is merciful then why would ...</td>\n",
" <td>[0.06995727, -0.041538, -0.07480858, 0.0554511...</td>\n",
" <td>{'text': 'Which Finance job requires traveling...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>[0.08731179, -0.043924116, -0.07815887, 0.0399...</td>\n",
" <td>{'text': 'What percentage of transgender women...</td>\n",
" <td>[0.020836761, -0.055749647, -0.06341955, 0.042...</td>\n",
" <td>{'text': 'Does anyone use the Xyleme learning ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>[-0.044097595, 0.012648403, 0.007437395, 0.013...</td>\n",
" <td>{'text': 'What is the advantage of polarized s...</td>\n",
" <td>[-0.048633844, -0.011845093, 0.005464233, 0.00...</td>\n",
" <td>{'text': 'Which folder keeps the extension fil...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>[0.019823564, 0.062449012, 0.015589851, -0.011...</td>\n",
" <td>{'text': 'What are good gift ideas for a dad i...</td>\n",
" <td>[-0.07174875, 0.07797023, 0.056134596, 0.12874...</td>\n",
" <td>{'text': 'What are some good ways to lose weig...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>[0.056418877, -0.089993075, 0.06808353, -0.019...</td>\n",
" <td>{'text': 'Who are the people still choosing to...</td>\n",
" <td>[-0.01627545, 0.070892945, -0.0010130059, -0.0...</td>\n",
" <td>{'text': 'Can Health services research be a ST...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
@@ -712,32 +584,32 @@
" <tr>\n",
" <th>1019</th>\n",
" <td>1019</td>\n",
" <td>[-0.033038545, 0.08281174, -0.055870146, 0.061...</td>\n",
" <td>{'text': 'How was hemoglobin discovered? Who d...</td>\n",
" <td>[0.063618615, 0.016510956, 0.04254935, 0.01624...</td>\n",
" <td>{'text': 'Should I prepare &amp; crack CAT by leav...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1020</th>\n",
" <td>1020</td>\n",
" <td>[0.018484745, 0.062107757, 0.034533918, 0.0269...</td>\n",
" <td>{'text': 'Why did Steve Jobs drop out of colle...</td>\n",
" <td>[0.029550772, 0.047756128, -0.028292943, -0.03...</td>\n",
" <td>{'text': 'Why are Europe and Asia separate con...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1021</th>\n",
" <td>1021</td>\n",
" <td>[0.005033609, -0.084230006, -0.013950559, 0.01...</td>\n",
" <td>{'text': 'My wife and I fight a lot and I need...</td>\n",
" <td>[0.0754963, -0.10504479, 0.0089978445, 0.05468...</td>\n",
" <td>{'text': 'Who is the most beautiful and glamor...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1022</th>\n",
" <td>1022</td>\n",
" <td>[0.054642506, -0.06619325, -0.05927356, -0.035...</td>\n",
" <td>{'text': 'I'm worried about my relationship. S...</td>\n",
" <td>[-0.009103978, -0.07920409, 0.065698035, -0.04...</td>\n",
" <td>{'text': 'What are the major factors that moti...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1023</th>\n",
" <td>1023</td>\n",
" <td>[-0.025504842, 0.008972524, -0.070964225, -0.0...</td>\n",
" <td>{'text': 'Why naidus and Reddys dominate in po...</td>\n",
" <td>[0.024876645, 0.0046750004, -0.010520815, 0.00...</td>\n",
" <td>{'text': 'How can I improve my math skills for...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
@@ -746,35 +618,35 @@
],
"text/plain": [
" id values \\\n",
"0 0 [0.079809085, 0.13530786, -0.024871217, 0.0126... \n",
"1 1 [0.08731179, -0.043924116, -0.07815887, 0.0399... \n",
"2 2 [-0.044097595, 0.012648403, 0.007437395, 0.013... \n",
"3 3 [0.019823564, 0.062449012, 0.015589851, -0.011... \n",
"4 4 [0.056418877, -0.089993075, 0.06808353, -0.019... \n",
"0 0 [0.06995727, -0.041538, -0.07480858, 0.0554511... \n",
"1 1 [0.020836761, -0.055749647, -0.06341955, 0.042... \n",
"2 2 [-0.048633844, -0.011845093, 0.005464233, 0.00... \n",
"3 3 [-0.07174875, 0.07797023, 0.056134596, 0.12874... \n",
"4 4 [-0.01627545, 0.070892945, -0.0010130059, -0.0... \n",
"... ... ... \n",
"1019 1019 [-0.033038545, 0.08281174, -0.055870146, 0.061... \n",
"1020 1020 [0.018484745, 0.062107757, 0.034533918, 0.0269... \n",
"1021 1021 [0.005033609, -0.084230006, -0.013950559, 0.01... \n",
"1022 1022 [0.054642506, -0.06619325, -0.05927356, -0.035... \n",
"1023 1023 [-0.025504842, 0.008972524, -0.070964225, -0.0... \n",
"1019 1019 [0.063618615, 0.016510956, 0.04254935, 0.01624... \n",
"1020 1020 [0.029550772, 0.047756128, -0.028292943, -0.03... \n",
"1021 1021 [0.0754963, -0.10504479, 0.0089978445, 0.05468... \n",
"1022 1022 [-0.009103978, -0.07920409, 0.065698035, -0.04... \n",
"1023 1023 [0.024876645, 0.0046750004, -0.010520815, 0.00... \n",
"\n",
" metadata \n",
"0 {'text': 'If Allah is merciful then why would ... \n",
"1 {'text': 'What percentage of transgender women... \n",
"2 {'text': 'What is the advantage of polarized s... \n",
"3 {'text': 'What are good gift ideas for a dad i... \n",
"4 {'text': 'Who are the people still choosing to... \n",
"0 {'text': 'Which Finance job requires traveling... \n",
"1 {'text': 'Does anyone use the Xyleme learning ... \n",
"2 {'text': 'Which folder keeps the extension fil... \n",
"3 {'text': 'What are some good ways to lose weig... \n",
"4 {'text': 'Can Health services research be a ST... \n",
"... ... \n",
"1019 {'text': 'How was hemoglobin discovered? Who d... \n",
"1020 {'text': 'Why did Steve Jobs drop out of colle... \n",
"1021 {'text': 'My wife and I fight a lot and I need... \n",
"1022 {'text': 'I'm worried about my relationship. S... \n",
"1023 {'text': 'Why naidus and Reddys dominate in po... \n",
"1019 {'text': 'Should I prepare & crack CAT by leav... \n",
"1020 {'text': 'Why are Europe and Asia separate con... \n",
"1021 {'text': 'Who is the most beautiful and glamor... \n",
"1022 {'text': 'What are the major factors that moti... \n",
"1023 {'text': 'How can I improve my math skills for... \n",
"\n",
"[1024 rows x 3 columns]"
]
},
"execution_count": 52,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@@ -789,7 +661,7 @@
},
{
"cell_type": "code",
"execution_count": 53,
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
@@ -799,7 +671,7 @@
},
{
"cell_type": "code",
"execution_count": 60,
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
@@ -827,7 +699,7 @@
},
{
"cell_type": "code",
"execution_count": 55,
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
@@ -836,7 +708,7 @@
},
{
"cell_type": "code",
"execution_count": 56,
"execution_count": 45,
"metadata": {},
"outputs": [
{
@@ -871,41 +743,41 @@
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>[0.079809085, 0.13530786, -0.024871217, 0.0126...</td>\n",
" <td>[0.06995727, -0.041538, -0.07480858, 0.0554511...</td>\n",
" <td>None</td>\n",
" <td>{'text': 'If Allah is merciful then why would ...</td>\n",
" <td>{'text': 'Which Finance job requires traveling...</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>[0.08731179, -0.043924116, -0.07815887, 0.0399...</td>\n",
" <td>[0.020836761, -0.055749647, -0.06341955, 0.042...</td>\n",
" <td>None</td>\n",
" <td>{'text': 'What percentage of transgender women...</td>\n",
" <td>{'text': 'Does anyone use the Xyleme learning ...</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>[-0.044097595, 0.012648403, 0.007437395, 0.013...</td>\n",
" <td>[-0.048633844, -0.011845093, 0.005464233, 0.00...</td>\n",
" <td>None</td>\n",
" <td>{'text': 'What is the advantage of polarized s...</td>\n",
" <td>{'text': 'Which folder keeps the extension fil...</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>[0.019823564, 0.062449012, 0.015589851, -0.011...</td>\n",
" <td>[-0.07174875, 0.07797023, 0.056134596, 0.12874...</td>\n",
" <td>None</td>\n",
" <td>{'text': 'What are good gift ideas for a dad i...</td>\n",
" <td>{'text': 'What are some good ways to lose weig...</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>[0.056418877, -0.089993075, 0.06808353, -0.019...</td>\n",
" <td>[-0.01627545, 0.070892945, -0.0010130059, -0.0...</td>\n",
" <td>None</td>\n",
" <td>{'text': 'Who are the people still choosing to...</td>\n",
" <td>{'text': 'Can Health services research be a ST...</td>\n",
" <td>None</td>\n",
" </tr>\n",
" </tbody>\n",
@@ -914,21 +786,21 @@
],
"text/plain": [
" id values sparse_values \\\n",
"0 0 [0.079809085, 0.13530786, -0.024871217, 0.0126... None \n",
"1 1 [0.08731179, -0.043924116, -0.07815887, 0.0399... None \n",
"2 2 [-0.044097595, 0.012648403, 0.007437395, 0.013... None \n",
"3 3 [0.019823564, 0.062449012, 0.015589851, -0.011... None \n",
"4 4 [0.056418877, -0.089993075, 0.06808353, -0.019... None \n",
"0 0 [0.06995727, -0.041538, -0.07480858, 0.0554511... None \n",
"1 1 [0.020836761, -0.055749647, -0.06341955, 0.042... None \n",
"2 2 [-0.048633844, -0.011845093, 0.005464233, 0.00... None \n",
"3 3 [-0.07174875, 0.07797023, 0.056134596, 0.12874... None \n",
"4 4 [-0.01627545, 0.070892945, -0.0010130059, -0.0... None \n",
"\n",
" metadata blob \n",
"0 {'text': 'If Allah is merciful then why would ... None \n",
"1 {'text': 'What percentage of transgender women... None \n",
"2 {'text': 'What is the advantage of polarized s... None \n",
"3 {'text': 'What are good gift ideas for a dad i... None \n",
"4 {'text': 'Who are the people still choosing to... None "
"0 {'text': 'Which Finance job requires traveling... None \n",
"1 {'text': 'Does anyone use the Xyleme learning ... None \n",
"2 {'text': 'Which folder keeps the extension fil... None \n",
"3 {'text': 'What are some good ways to lose weig... None \n",
"4 {'text': 'Can Health services research be a ST... None "
]
},
"execution_count": 56,
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
@@ -939,7 +811,7 @@
},
{
"cell_type": "code",
"execution_count": 57,
"execution_count": 33,
"metadata": {},
"outputs": [
{
@@ -958,29 +830,26 @@
},
{
"cell_type": "code",
"execution_count": 51,
"execution_count": 46,
"metadata": {},
"outputs": [
{
"ename": "ConnectionError",
"evalue": "Failed to connect to Pinecone's controller on region YOUR_ENVIRONMENT. Please verify client configuration: API key, region and project_id. See more info: https://docs.pinecone.io/docs/quickstart#2-get-and-verify-your-pinecone-api-key\nUnderlying Error: error sending request for url (https://controller.your_environment.pinecone.io/actions/whoami): error trying to connect: dns error: failed to lookup address information: nodename nor servname provided, or not known",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mConnectionError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[51], line 6\u001b[0m\n\u001b[1;32m 3\u001b[0m os\u001b[39m.\u001b[39menviron[\u001b[39m\"\u001b[39m\u001b[39mPINECONE_API_KEY\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mYOUR_API_KEY\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 4\u001b[0m os\u001b[39m.\u001b[39menviron[\u001b[39m\"\u001b[39m\u001b[39mPINECONE_ENVIRONMENT\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mYOUR_ENVIRONMENT\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m----> 6\u001b[0m pds\u001b[39m.\u001b[39;49mto_index(\u001b[39m\"\u001b[39;49m\u001b[39mseamntic-search\u001b[39;49m\u001b[39m\"\u001b[39;49m, batch_size\u001b[39m=\u001b[39;49m\u001b[39m300\u001b[39;49m, concurrency\u001b[39m=\u001b[39;49m\u001b[39m16\u001b[39;49m, create_index\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m, metadata_config\u001b[39m=\u001b[39;49m{\u001b[39m\"\u001b[39;49m\u001b[39mindexed\u001b[39;49m\u001b[39m\"\u001b[39;49m: []})\n",
"File \u001b[0;32m~/Pinecone/examples/.venv/lib/python3.9/site-packages/pinecone_datasets/dataset.py:410\u001b[0m, in \u001b[0;36mDataset.to_index\u001b[0;34m(self, index_name, bath_size, concurrency, create_index, **kwargs)\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mPINECONE_API_KEY environment variable must be set\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 409\u001b[0m \u001b[39m# create client\u001b[39;00m\n\u001b[0;32m--> 410\u001b[0m pinecone \u001b[39m=\u001b[39m Client(api_key\u001b[39m=\u001b[39;49mos\u001b[39m.\u001b[39;49menviron[\u001b[39m\"\u001b[39;49m\u001b[39mPINECONE_API_KEY\u001b[39;49m\u001b[39m\"\u001b[39;49m], region\u001b[39m=\u001b[39;49mregion)\n\u001b[1;32m 412\u001b[0m pinecone_index_list \u001b[39m=\u001b[39m pinecone\u001b[39m.\u001b[39mlist_indexes()\n\u001b[1;32m 414\u001b[0m \u001b[39mif\u001b[39;00m create_index:\n\u001b[1;32m 415\u001b[0m \u001b[39m# make sure the index does not exist\u001b[39;00m\n",
"\u001b[0;31mConnectionError\u001b[0m: Failed to connect to Pinecone's controller on region YOUR_ENVIRONMENT. Please verify client configuration: API key, region and project_id. See more info: https://docs.pinecone.io/docs/quickstart#2-get-and-verify-your-pinecone-api-key\nUnderlying Error: error sending request for url (https://controller.your_environment.pinecone.io/actions/whoami): error trying to connect: dns error: failed to lookup address information: nodename nor servname provided, or not known"
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/roymiara/Pinecone/examples/.venv/lib/python3.9/site-packages/pinecone_datasets/dataset.py:444: RuntimeWarning: coroutine 'Dataset._async_upsert' was never awaited\n",
" self._async_upsert(\n",
"RuntimeWarning: Enable tracemalloc to get the object allocation traceback\n"
]
}
],
"source": [
"# upserting datsaet to Pinecone\n",
"import os\n",
"os.environ[\"PINECONE_API_KEY\"] = \"YOUR_API_KEY\"\n",
"os.environ[\"PINECONE_ENVIRONMENT\"] = \"YOUR_ENVIRONMENT\"\n",
"os.environ[\"PINECONE_API_KEY\"] = \"a91ac336-1d39-4085-a28d-2c675a1aefce\"\n",
"os.environ[\"PINECONE_ENVIRONMENT\"] = \"us-west1-gcp\"\n",
"\n",
"pds.to_index(\"seamntic-search\", batch_size=300, concurrency=16, create_index=True, metadata_config={\"indexed\": []})"
"pds.to_index(\"semantic-search\", batch_size=300, concurrency=16)"
]
},
{