initial example work
This commit is contained in:
448
examples/classify-recipes/train.ipynb
Normal file
448
examples/classify-recipes/train.ipynb
Normal file
@@ -0,0 +1,448 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now let's get to the fun part -- training a model. We'll start by installing our dependencies."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Requirement already satisfied: peft==0.5.0 in /usr/local/lib/python3.10/dist-packages (0.5.0)\n",
|
||||
"Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from peft==0.5.0) (1.24.4)\n",
|
||||
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from peft==0.5.0) (23.1)\n",
|
||||
"Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from peft==0.5.0) (5.9.5)\n",
|
||||
"Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from peft==0.5.0) (6.0)\n",
|
||||
"Requirement already satisfied: torch>=1.13.0 in /usr/local/lib/python3.10/dist-packages (from peft==0.5.0) (2.0.1+cu118)\n",
|
||||
"Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (from peft==0.5.0) (4.33.0.dev0)\n",
|
||||
"Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from peft==0.5.0) (4.66.1)\n",
|
||||
"Requirement already satisfied: accelerate in /usr/local/lib/python3.10/dist-packages (from peft==0.5.0) (0.22.0.dev0)\n",
|
||||
"Requirement already satisfied: safetensors in /usr/local/lib/python3.10/dist-packages (from peft==0.5.0) (0.3.2)\n",
|
||||
"Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft==0.5.0) (3.9.0)\n",
|
||||
"Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft==0.5.0) (4.7.1)\n",
|
||||
"Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft==0.5.0) (1.11.1)\n",
|
||||
"Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft==0.5.0) (3.0)\n",
|
||||
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft==0.5.0) (3.1.2)\n",
|
||||
"Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft==0.5.0) (2.0.0)\n",
|
||||
"Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.13.0->peft==0.5.0) (3.25.0)\n",
|
||||
"Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.13.0->peft==0.5.0) (15.0.7)\n",
|
||||
"Requirement already satisfied: huggingface-hub<1.0,>=0.15.1 in /usr/local/lib/python3.10/dist-packages (from transformers->peft==0.5.0) (0.16.4)\n",
|
||||
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers->peft==0.5.0) (2023.8.8)\n",
|
||||
"Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers->peft==0.5.0) (2.28.1)\n",
|
||||
"Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.10/dist-packages (from transformers->peft==0.5.0) (0.13.3)\n",
|
||||
"Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.15.1->transformers->peft==0.5.0) (2023.6.0)\n",
|
||||
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.13.0->peft==0.5.0) (2.1.2)\n",
|
||||
"Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers->peft==0.5.0) (2.1.1)\n",
|
||||
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers->peft==0.5.0) (3.4)\n",
|
||||
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers->peft==0.5.0) (1.26.13)\n",
|
||||
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers->peft==0.5.0) (2022.12.7)\n",
|
||||
"Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.13.0->peft==0.5.0) (1.2.1)\n",
|
||||
"\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
|
||||
"\u001b[0m\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.1.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3.10 -m pip install --upgrade pip\u001b[0m\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n",
|
||||
"fatal: destination path 'axolotl' already exists and is not an empty directory.\n",
|
||||
"Obtaining file:///workspace/gpt4-fine-tuning/axolotl\n",
|
||||
" Preparing metadata (setup.py) ... \u001b[?25ldone\n",
|
||||
"\u001b[?25hCollecting transformers@ git+https://github.com/huggingface/transformers.git (from axolotl==0.1)\n",
|
||||
" Cloning https://github.com/huggingface/transformers.git to /tmp/pip-install-8yfermge/transformers_22e4388baf16446d8445557008e38efe\n",
|
||||
" Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers.git /tmp/pip-install-8yfermge/transformers_22e4388baf16446d8445557008e38efe\n",
|
||||
" Resolved https://github.com/huggingface/transformers.git to commit 4d40109c3a93c9b8bbca204cb046ed510f1c72e8\n",
|
||||
" Installing build dependencies ... \u001b[?25ldone\n",
|
||||
"\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n",
|
||||
"\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n",
|
||||
"\u001b[?25hCollecting accelerate@ git+https://github.com/huggingface/accelerate@2a289f6108e77a77a4efffb3f6316bc98538413b (from axolotl==0.1)\n",
|
||||
" Using cached accelerate-0.22.0.dev0-py3-none-any.whl\n",
|
||||
"Requirement already satisfied: bitsandbytes>=0.41.1 in /usr/local/lib/python3.10/dist-packages (from axolotl==0.1) (0.41.1)\n",
|
||||
"Requirement already satisfied: addict in /usr/local/lib/python3.10/dist-packages (from axolotl==0.1) (2.4.0)\n",
|
||||
"Requirement already satisfied: fire in /usr/local/lib/python3.10/dist-packages (from axolotl==0.1) (0.5.0)\n",
|
||||
"Requirement already satisfied: PyYAML==6.0 in /usr/local/lib/python3.10/dist-packages (from axolotl==0.1) (6.0)\n",
|
||||
"Requirement already satisfied: datasets in /usr/local/lib/python3.10/dist-packages (from axolotl==0.1) (2.14.4)\n",
|
||||
"Requirement already satisfied: sentencepiece in /usr/local/lib/python3.10/dist-packages (from axolotl==0.1) (0.1.99)\n",
|
||||
"Requirement already satisfied: wandb in /usr/local/lib/python3.10/dist-packages (from axolotl==0.1) (0.15.8)\n",
|
||||
"Requirement already satisfied: einops in /usr/local/lib/python3.10/dist-packages (from axolotl==0.1) (0.6.1)\n",
|
||||
"Requirement already satisfied: xformers in /usr/local/lib/python3.10/dist-packages (from axolotl==0.1) (0.0.21)\n",
|
||||
"Requirement already satisfied: optimum in /usr/local/lib/python3.10/dist-packages (from axolotl==0.1) (1.11.2)\n",
|
||||
"Requirement already satisfied: hf_transfer in /usr/local/lib/python3.10/dist-packages (from axolotl==0.1) (0.1.3)\n",
|
||||
"Requirement already satisfied: colorama in /usr/local/lib/python3.10/dist-packages (from axolotl==0.1) (0.4.6)\n",
|
||||
"Requirement already satisfied: numba in /usr/local/lib/python3.10/dist-packages (from axolotl==0.1) (0.57.1)\n",
|
||||
"Requirement already satisfied: numpy==1.24.4 in /usr/local/lib/python3.10/dist-packages (from axolotl==0.1) (1.24.4)\n",
|
||||
"Requirement already satisfied: bert-score==0.3.13 in /usr/local/lib/python3.10/dist-packages (from axolotl==0.1) (0.3.13)\n",
|
||||
"Requirement already satisfied: evaluate==0.4.0 in /usr/local/lib/python3.10/dist-packages (from axolotl==0.1) (0.4.0)\n",
|
||||
"Requirement already satisfied: rouge-score==0.1.2 in /usr/local/lib/python3.10/dist-packages (from axolotl==0.1) (0.1.2)\n",
|
||||
"Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from axolotl==0.1) (1.11.2)\n",
|
||||
"Requirement already satisfied: scikit-learn==1.2.2 in /usr/local/lib/python3.10/dist-packages (from axolotl==0.1) (1.2.2)\n",
|
||||
"Requirement already satisfied: pynvml in /usr/local/lib/python3.10/dist-packages (from axolotl==0.1) (11.5.0)\n",
|
||||
"Requirement already satisfied: flash-attn==2.0.8 in /usr/local/lib/python3.10/dist-packages (from axolotl==0.1) (2.0.8)\n",
|
||||
"Requirement already satisfied: torch>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from bert-score==0.3.13->axolotl==0.1) (2.0.1+cu118)\n",
|
||||
"Requirement already satisfied: pandas>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from bert-score==0.3.13->axolotl==0.1) (2.0.3)\n",
|
||||
"Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from bert-score==0.3.13->axolotl==0.1) (2.28.1)\n",
|
||||
"Requirement already satisfied: tqdm>=4.31.1 in /usr/local/lib/python3.10/dist-packages (from bert-score==0.3.13->axolotl==0.1) (4.66.1)\n",
|
||||
"Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from bert-score==0.3.13->axolotl==0.1) (3.7.2)\n",
|
||||
"Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from bert-score==0.3.13->axolotl==0.1) (23.1)\n",
|
||||
"Requirement already satisfied: dill in /usr/local/lib/python3.10/dist-packages (from evaluate==0.4.0->axolotl==0.1) (0.3.7)\n",
|
||||
"Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from evaluate==0.4.0->axolotl==0.1) (3.3.0)\n",
|
||||
"Requirement already satisfied: multiprocess in /usr/local/lib/python3.10/dist-packages (from evaluate==0.4.0->axolotl==0.1) (0.70.15)\n",
|
||||
"Requirement already satisfied: fsspec[http]>=2021.05.0 in /usr/local/lib/python3.10/dist-packages (from evaluate==0.4.0->axolotl==0.1) (2023.6.0)\n",
|
||||
"Requirement already satisfied: huggingface-hub>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from evaluate==0.4.0->axolotl==0.1) (0.16.4)\n",
|
||||
"Requirement already satisfied: responses<0.19 in /usr/local/lib/python3.10/dist-packages (from evaluate==0.4.0->axolotl==0.1) (0.18.0)\n",
|
||||
"Requirement already satisfied: ninja in /usr/local/lib/python3.10/dist-packages (from flash-attn==2.0.8->axolotl==0.1) (1.11.1)\n",
|
||||
"Requirement already satisfied: absl-py in /usr/local/lib/python3.10/dist-packages (from rouge-score==0.1.2->axolotl==0.1) (1.4.0)\n",
|
||||
"Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (from rouge-score==0.1.2->axolotl==0.1) (3.8.1)\n",
|
||||
"Requirement already satisfied: six>=1.14.0 in /usr/lib/python3/dist-packages (from rouge-score==0.1.2->axolotl==0.1) (1.16.0)\n",
|
||||
"Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn==1.2.2->axolotl==0.1) (1.3.2)\n",
|
||||
"Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn==1.2.2->axolotl==0.1) (3.2.0)\n",
|
||||
"Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets->axolotl==0.1) (12.0.1)\n",
|
||||
"Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets->axolotl==0.1) (3.8.5)\n",
|
||||
"Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers@ git+https://github.com/huggingface/transformers.git->axolotl==0.1) (3.9.0)\n",
|
||||
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers@ git+https://github.com/huggingface/transformers.git->axolotl==0.1) (2023.8.8)\n",
|
||||
"Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.10/dist-packages (from transformers@ git+https://github.com/huggingface/transformers.git->axolotl==0.1) (0.13.3)\n",
|
||||
"Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers@ git+https://github.com/huggingface/transformers.git->axolotl==0.1) (0.3.2)\n",
|
||||
"Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate@ git+https://github.com/huggingface/accelerate@2a289f6108e77a77a4efffb3f6316bc98538413b->axolotl==0.1) (5.9.5)\n",
|
||||
"Requirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from fire->axolotl==0.1) (2.3.0)\n",
|
||||
"Requirement already satisfied: llvmlite<0.41,>=0.40.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba->axolotl==0.1) (0.40.1)\n",
|
||||
"Requirement already satisfied: coloredlogs in /usr/local/lib/python3.10/dist-packages (from optimum->axolotl==0.1) (15.0.1)\n",
|
||||
"Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from optimum->axolotl==0.1) (1.11.1)\n",
|
||||
"Requirement already satisfied: Click!=8.0.0,>=7.1 in /usr/local/lib/python3.10/dist-packages (from wandb->axolotl==0.1) (8.1.7)\n",
|
||||
"Requirement already satisfied: GitPython!=3.1.29,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from wandb->axolotl==0.1) (3.1.32)\n",
|
||||
"Requirement already satisfied: sentry-sdk>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from wandb->axolotl==0.1) (1.29.2)\n",
|
||||
"Requirement already satisfied: docker-pycreds>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from wandb->axolotl==0.1) (0.4.0)\n",
|
||||
"Requirement already satisfied: pathtools in /usr/local/lib/python3.10/dist-packages (from wandb->axolotl==0.1) (0.1.2)\n",
|
||||
"Requirement already satisfied: setproctitle in /usr/local/lib/python3.10/dist-packages (from wandb->axolotl==0.1) (1.3.2)\n",
|
||||
"Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from wandb->axolotl==0.1) (68.0.0)\n",
|
||||
"Requirement already satisfied: appdirs>=1.4.3 in /usr/local/lib/python3.10/dist-packages (from wandb->axolotl==0.1) (1.4.4)\n",
|
||||
"Requirement already satisfied: protobuf!=4.21.0,<5,>=3.19.0 in /usr/local/lib/python3.10/dist-packages (from wandb->axolotl==0.1) (4.24.1)\n",
|
||||
"Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=1.0.0->bert-score==0.3.13->axolotl==0.1) (4.7.1)\n",
|
||||
"Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.0.0->bert-score==0.3.13->axolotl==0.1) (3.0)\n",
|
||||
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.0.0->bert-score==0.3.13->axolotl==0.1) (3.1.2)\n",
|
||||
"Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.0.0->bert-score==0.3.13->axolotl==0.1) (2.0.0)\n",
|
||||
"Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.0.0->bert-score==0.3.13->axolotl==0.1) (3.25.0)\n",
|
||||
"Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.0.0->bert-score==0.3.13->axolotl==0.1) (15.0.7)\n",
|
||||
"Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->axolotl==0.1) (23.1.0)\n",
|
||||
"Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->axolotl==0.1) (2.1.1)\n",
|
||||
"Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->axolotl==0.1) (6.0.4)\n",
|
||||
"Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->axolotl==0.1) (4.0.3)\n",
|
||||
"Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->axolotl==0.1) (1.9.2)\n",
|
||||
"Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->axolotl==0.1) (1.4.0)\n",
|
||||
"Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->axolotl==0.1) (1.3.1)\n",
|
||||
"Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.10/dist-packages (from GitPython!=3.1.29,>=1.0.0->wandb->axolotl==0.1) (4.0.10)\n",
|
||||
"Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.0.1->bert-score==0.3.13->axolotl==0.1) (2.8.2)\n",
|
||||
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.0.1->bert-score==0.3.13->axolotl==0.1) (2023.3)\n",
|
||||
"Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.0.1->bert-score==0.3.13->axolotl==0.1) (2023.3)\n",
|
||||
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->bert-score==0.3.13->axolotl==0.1) (3.4)\n",
|
||||
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->bert-score==0.3.13->axolotl==0.1) (1.26.13)\n",
|
||||
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->bert-score==0.3.13->axolotl==0.1) (2022.12.7)\n",
|
||||
"Requirement already satisfied: humanfriendly>=9.1 in /usr/local/lib/python3.10/dist-packages (from coloredlogs->optimum->axolotl==0.1) (10.0)\n",
|
||||
"Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->bert-score==0.3.13->axolotl==0.1) (1.1.0)\n",
|
||||
"Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->bert-score==0.3.13->axolotl==0.1) (0.11.0)\n",
|
||||
"Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->bert-score==0.3.13->axolotl==0.1) (4.42.1)\n",
|
||||
"Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->bert-score==0.3.13->axolotl==0.1) (1.4.4)\n",
|
||||
"Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->bert-score==0.3.13->axolotl==0.1) (9.3.0)\n",
|
||||
"Requirement already satisfied: pyparsing<3.1,>=2.3.1 in /usr/lib/python3/dist-packages (from matplotlib->bert-score==0.3.13->axolotl==0.1) (2.4.7)\n",
|
||||
"Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->optimum->axolotl==0.1) (1.2.1)\n",
|
||||
"Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.10/dist-packages (from gitdb<5,>=4.0.1->GitPython!=3.1.29,>=1.0.0->wandb->axolotl==0.1) (5.0.0)\n",
|
||||
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.0.0->bert-score==0.3.13->axolotl==0.1) (2.1.2)\n",
|
||||
"Installing collected packages: axolotl\n",
|
||||
" Attempting uninstall: axolotl\n",
|
||||
" Found existing installation: axolotl 0.1\n",
|
||||
" Uninstalling axolotl-0.1:\n",
|
||||
" Successfully uninstalled axolotl-0.1\n",
|
||||
" Running setup.py develop for axolotl\n",
|
||||
"Successfully installed axolotl\n",
|
||||
"\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
|
||||
"\u001b[0m\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.1.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3.10 -m pip install --upgrade pip\u001b[0m\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install peft==0.5.0\n",
|
||||
"\n",
|
||||
"!git clone https://github.com/OpenAccess-AI-Collective/axolotl\n",
|
||||
"%pip install -e \"./axolotl[flash-attn]\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We'll use the [axolotl](https://github.com/OpenAccess-AI-Collective/axolotl) library to manage our training run. It includes a lot of neat tricks that speed up training without sacrificing quality.\n",
|
||||
"\n",
|
||||
"In this case we'll use 8-bit training to use less GPU RAM, and sample packing to maximize GPU utilization. You can read more about the available options at https://github.com/OpenAccess-AI-Collective/axolotl.\n",
|
||||
"\n",
|
||||
"The training run options we're using here are defined in [training-args.yaml](./training-args.yaml)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The following values were not passed to `accelerate launch` and had defaults used instead:\n",
|
||||
"\t`--num_processes` was set to a value of `1`\n",
|
||||
"\t`--num_machines` was set to a value of `1`\n",
|
||||
"\t`--mixed_precision` was set to a value of `'no'`\n",
|
||||
"\t`--dynamo_backend` was set to a value of `'no'`\n",
|
||||
"To avoid this warning pass in values for each of the problematic parameters or run `accelerate config`.\n",
|
||||
"\n",
|
||||
" dP dP dP\n",
|
||||
" 88 88 88\n",
|
||||
".d8888b. dP. .dP .d8888b. 88 .d8888b. d8888P 88\n",
|
||||
"88' `88 `8bd8' 88' `88 88 88' `88 88 88\n",
|
||||
"88. .88 .d88b. 88. .88 88 88. .88 88 88\n",
|
||||
"`88888P8 dP' `dP `88888P' dP `88888P' dP dP\n",
|
||||
"\n",
|
||||
"[2023-08-24 04:29:56,887] [INFO] [axolotl.normalize_config:72] [PID:89149] GPU memory usage baseline: 0.000GB (+0.674GB misc)\u001b[39m\n",
|
||||
"[2023-08-24 04:29:56,887] [INFO] [axolotl.scripts.train:189] [PID:89149] loading tokenizer... meta-llama/Llama-2-7b-hf\u001b[39m\n",
|
||||
"[2023-08-24 04:29:57,058] [DEBUG] [axolotl.load_tokenizer:64] [PID:89149] EOS: 2 / </s>\u001b[39m\n",
|
||||
"[2023-08-24 04:29:57,058] [DEBUG] [axolotl.load_tokenizer:65] [PID:89149] BOS: 1 / <s>\u001b[39m\n",
|
||||
"[2023-08-24 04:29:57,058] [DEBUG] [axolotl.load_tokenizer:66] [PID:89149] PAD: 0 / [PAD]\u001b[39m\n",
|
||||
"[2023-08-24 04:29:57,058] [DEBUG] [axolotl.load_tokenizer:67] [PID:89149] UNK: 0 / <unk>\u001b[39m\n",
|
||||
"[2023-08-24 04:29:57,058] [INFO] [axolotl.load_tokenized_prepared_datasets:126] [PID:89149] Unable to find prepared dataset in last_run_prepared/82cd9d58e34e0db98296199248c92d0d\u001b[39m\n",
|
||||
"[2023-08-24 04:29:57,058] [INFO] [axolotl.load_tokenized_prepared_datasets:127] [PID:89149] Loading raw datasets...\u001b[39m\n",
|
||||
"[2023-08-24 04:29:57,058] [INFO] [axolotl.load_tokenized_prepared_datasets:132] [PID:89149] No seed provided, using default seed of 42\u001b[39m\n",
|
||||
"/usr/local/lib/python3.10/dist-packages/datasets/load.py:2072: FutureWarning: 'use_auth_token' was deprecated in favor of 'token' in version 2.14.0 and will be removed in 3.0.0.\n",
|
||||
"You can remove this warning by passing 'token=None' instead.\n",
|
||||
" warnings.warn(\n",
|
||||
"Downloading data files: 100%|███████████████████| 1/1 [00:00<00:00, 1431.99it/s]\n",
|
||||
"Extracting data files: 100%|█████████████████████| 1/1 [00:00<00:00, 141.85it/s]\n",
|
||||
"Generating train split: 1812 examples [00:00, 54440.66 examples/s]\n",
|
||||
"Map (num_proc=64): 100%|███████████| 1812/1812 [00:00<00:00, 2293.91 examples/s]\n",
|
||||
"[2023-08-24 04:29:59,312] [INFO] [axolotl.load_tokenized_prepared_datasets:330] [PID:89149] merging datasets\u001b[39m\n",
|
||||
"[2023-08-24 04:29:59,320] [INFO] [axolotl.load_tokenized_prepared_datasets:337] [PID:89149] Saving merged prepared dataset to disk... last_run_prepared/82cd9d58e34e0db98296199248c92d0d\u001b[39m\n",
|
||||
"Saving the dataset (1/1 shards): 100%|█| 1812/1812 [00:00<00:00, 41091.50 exampl\n",
|
||||
"Filter (num_proc=255): 100%|███████| 1721/1721 [00:01<00:00, 1410.20 examples/s]\n",
|
||||
"Filter (num_proc=91): 100%|█████████████| 91/91 [00:00<00:00, 178.90 examples/s]\n",
|
||||
"Map (num_proc=255): 100%|███████████| 1721/1721 [00:02<00:00, 615.40 examples/s]\n",
|
||||
"Map (num_proc=91): 100%|████████████████| 91/91 [00:00<00:00, 155.55 examples/s]\n",
|
||||
"[2023-08-24 04:30:19,728] [INFO] [axolotl.calculate_total_num_steps:346] [PID:89149] calculating total_num_tokens\u001b[39m\n",
|
||||
"[2023-08-24 04:30:19,735] [INFO] [axolotl.calculate_total_num_steps:353] [PID:89149] 📝 UPDATE CONFIG WITH: `total_num_tokens: 603850`\u001b[39m\n",
|
||||
"[2023-08-24 04:30:19,742] [INFO] [axolotl.utils.dataloader.generate_batches:181] [PID:89149] generating packed batches\u001b[39m\n",
|
||||
"[2023-08-24 04:30:19,782] [INFO] [axolotl.utils.dataloader.generate_batches:187] [PID:89149] dee682e8d4cd8a48b5f2c4c497f5830ec7f0dab6640f6b215f5dc1d162e00b2d\u001b[39m\n",
|
||||
"[2023-08-24 04:30:26,417] [INFO] [axolotl.utils.dataloader.len_w_stats:293] [PID:89149] packing_efficiency_estimate: 1.0 actual packing efficiency: 0.969896818462171\u001b[39m\n",
|
||||
"[2023-08-24 04:30:26,417] [INFO] [axolotl.utils.dataloader._len_est:262] [PID:89149] packing_efficiency_estimate: 1.0 total_num_tokens per device: 603850\u001b[39m\n",
|
||||
"[2023-08-24 04:30:26,417] [INFO] [axolotl.calculate_total_num_steps:393] [PID:89149] data_loader_len: 71\u001b[39m\n",
|
||||
"[2023-08-24 04:30:26,417] [INFO] [axolotl.calculate_total_num_steps:402] [PID:89149] 📝 UPDATE CONFIG WITH: `sample_packing_eff_est: 0.97`\u001b[39m\n",
|
||||
"[2023-08-24 04:30:26,417] [INFO] [axolotl.calculate_total_num_steps:410] [PID:89149] total_num_steps: 53\u001b[39m\n",
|
||||
"[2023-08-24 04:30:26,417] [INFO] [axolotl.scripts.train:211] [PID:89149] loading model and (optionally) peft_config...\u001b[39m\n",
|
||||
"[2023-08-24 04:30:26,445] [INFO] [axolotl.load_model:106] [PID:89149] patching with flash attention\u001b[39m\n",
|
||||
"[2023-08-24 04:30:26,452] [INFO] [axolotl.load_model:147] [PID:89149] patching _expand_mask\u001b[39m\n",
|
||||
"Loading checkpoint shards: 100%|██████████████████| 2/2 [00:18<00:00, 9.36s/it]\n",
|
||||
"\u001b[33m[2023-08-24 04:30:45,869] [WARNING] [axolotl.load_model:337] [PID:89149] increasing model.config.max_position_embeddings to 4096\u001b[39m\n",
|
||||
"[2023-08-24 04:30:45,870] [INFO] [axolotl.load_model:343] [PID:89149] GPU memory usage after model load: 6.681GB (+0.364GB cache, +1.159GB misc)\u001b[39m\n",
|
||||
"[2023-08-24 04:30:45,870] [INFO] [axolotl.load_model:349] [PID:89149] converting PEFT model w/ prepare_model_for_kbit_training\u001b[39m\n",
|
||||
"[2023-08-24 04:30:45,879] [INFO] [axolotl.load_lora:473] [PID:89149] found linear modules: ['down_proj', 'gate_proj', 'k_proj', 'o_proj', 'v_proj', 'up_proj', 'q_proj']\u001b[39m\n",
|
||||
"trainable params: 79,953,920 || all params: 6,818,369,536 || trainable%: 1.172625208678628\n",
|
||||
"[2023-08-24 04:31:41,698] [INFO] [axolotl.load_model:394] [PID:89149] GPU memory usage after adapters: 6.830GB (+1.365GB cache, +1.159GB misc)\u001b[39m\n",
|
||||
"[2023-08-24 04:31:41,732] [INFO] [axolotl.scripts.train:267] [PID:89149] Compiling torch model\u001b[39m\n",
|
||||
"[2023-08-24 04:31:41,887] [INFO] [axolotl.scripts.train:272] [PID:89149] Pre-saving adapter config to ./models/recipe-model\u001b[39m\n",
|
||||
"[2023-08-24 04:31:41,893] [INFO] [axolotl.scripts.train:288] [PID:89149] Starting trainer...\u001b[39m\n",
|
||||
"[2023-08-24 04:31:42,090] [INFO] [axolotl.utils.dataloader._len_est:262] [PID:89149] packing_efficiency_estimate: 0.97 total_num_tokens per device: 603850\u001b[39m\n",
|
||||
"[2023-08-24 04:31:42,090] [INFO] [axolotl.utils.dataloader._len_est:262] [PID:89149] packing_efficiency_estimate: 0.97 total_num_tokens per device: 603850\u001b[39m\n",
|
||||
" 0%| | 0/54 [00:00<?, ?it/s][2023-08-24 04:31:42,113] [INFO] [axolotl.utils.dataloader._len_est:262] [PID:89149] packing_efficiency_estimate: 0.97 total_num_tokens per device: 603850\u001b[39m\n",
|
||||
"[2023-08-24 04:31:42,113] [INFO] [axolotl.utils.dataloader.generate_batches:181] [PID:89149] generating packed batches\u001b[39m\n",
|
||||
"[2023-08-24 04:31:42,114] [INFO] [axolotl.utils.dataloader.generate_batches:187] [PID:89149] 848c81b46839193f2474e41caca3515e1e0772a7b854a116e55b5be6428e67c7\u001b[39m\n",
|
||||
"[2023-08-24 04:31:42,115] [INFO] [axolotl.utils.dataloader._len_est:262] [PID:89149] packing_efficiency_estimate: 0.97 total_num_tokens per device: 603850\u001b[39m\n",
|
||||
"/usr/local/lib/python3.10/dist-packages/bitsandbytes/autograd/_functions.py:322: UserWarning: MatMul8bitLt: inputs will be cast from torch.bfloat16 to float16 during quantization\n",
|
||||
" warnings.warn(f\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\")\n",
|
||||
"{'loss': 1.7586, 'learning_rate': 2e-05, 'epoch': 0.05} \n",
|
||||
" 2%|▊ | 1/54 [00:19<16:49, 19.05s/it][2023-08-24 04:32:19,963] [INFO] [axolotl.callbacks.on_step_end:96] [PID:89149] GPU memory usage while training: 7.110GB (+10.363GB cache, +1.190GB misc)\u001b[39m\n",
|
||||
"{'loss': 1.7663, 'learning_rate': 4e-05, 'epoch': 0.11} \n",
|
||||
"{'loss': 1.7667, 'learning_rate': 6e-05, 'epoch': 0.16} \n",
|
||||
"{'loss': 1.7467, 'learning_rate': 8e-05, 'epoch': 0.22} \n",
|
||||
"{'loss': 1.7098, 'learning_rate': 0.0001, 'epoch': 0.27} \n",
|
||||
"{'loss': 1.657, 'learning_rate': 0.00012, 'epoch': 0.32} \n",
|
||||
"{'loss': 1.5029, 'learning_rate': 0.00014, 'epoch': 0.38} \n",
|
||||
"{'loss': 1.2696, 'learning_rate': 0.00016, 'epoch': 0.43} \n",
|
||||
"{'loss': 0.9878, 'learning_rate': 0.00018, 'epoch': 0.49} \n",
|
||||
"{'loss': 0.717, 'learning_rate': 0.0002, 'epoch': 0.54} \n",
|
||||
"{'loss': 0.4798, 'learning_rate': 0.00019973322836635518, 'epoch': 0.59} \n",
|
||||
"{'loss': 0.3126, 'learning_rate': 0.00019893433680751103, 'epoch': 0.65} \n",
|
||||
"{'loss': 0.2195, 'learning_rate': 0.00019760758775559274, 'epoch': 0.7} \n",
|
||||
"{'loss': 0.0979, 'learning_rate': 0.0001957600599908406, 'epoch': 0.76} \n",
|
||||
"{'loss': 0.0885, 'learning_rate': 0.0001934016108732548, 'epoch': 0.81} \n",
|
||||
"{'loss': 0.0664, 'learning_rate': 0.00019054482374931467, 'epoch': 0.86} \n",
|
||||
"{'loss': 0.039, 'learning_rate': 0.00018720494081438078, 'epoch': 0.92} \n",
|
||||
"{'loss': 0.0373, 'learning_rate': 0.0001833997817889878, 'epoch': 0.97} \n",
|
||||
" 33%|██████████████▎ | 18/54 [05:38<11:14, 18.74s/it][2023-08-24 04:37:29,814] [INFO] [axolotl.utils.dataloader._len_est:262] [PID:89149] packing_efficiency_estimate: 0.97 total_num_tokens per device: 603850\u001b[39m\n",
|
||||
"[2023-08-24 04:37:29,814] [INFO] [axolotl.utils.dataloader.generate_batches:181] [PID:89149] generating packed batches\u001b[39m\n",
|
||||
"[2023-08-24 04:37:29,815] [INFO] [axolotl.utils.dataloader.generate_batches:187] [PID:89149] c57105ae350fe835ff66a80aedb1409510a82fd764ebddb085f2d181f4ad8e28\u001b[39m\n",
|
||||
"[2023-08-24 04:37:29,816] [INFO] [axolotl.utils.dataloader._len_est:262] [PID:89149] packing_efficiency_estimate: 0.97 total_num_tokens per device: 603850\u001b[39m\n",
|
||||
"{'loss': 0.0374, 'learning_rate': 0.00017914964884292544, 'epoch': 1.03} \n",
|
||||
"{'loss': 0.0312, 'learning_rate': 0.0001744772182743782, 'epoch': 1.08} \n",
|
||||
" 37%|███████████████▉ | 20/54 [06:15<10:37, 18.74s/it][2023-08-24 04:37:57,798] [INFO] [axolotl.utils.dataloader._len_est:262] [PID:89149] packing_efficiency_estimate: 0.97 total_num_tokens per device: 31311\u001b[39m\n",
|
||||
"[2023-08-24 04:37:57,806] [INFO] [axolotl.utils.dataloader.generate_batches:181] [PID:89149] generating packed batches\u001b[39m\n",
|
||||
"[2023-08-24 04:37:57,806] [INFO] [axolotl.utils.dataloader.generate_batches:187] [PID:89149] 3ada03fc400394f41d492cd6cd41f4c9f2b3fbd736749807d50d4dfadb841911\u001b[39m\n",
|
||||
"[2023-08-24 04:37:57,806] [INFO] [axolotl.utils.dataloader._len_est:262] [PID:89149] packing_efficiency_estimate: 0.97 total_num_tokens per device: 31311\u001b[39m\n",
|
||||
"/usr/local/lib/python3.10/dist-packages/bitsandbytes/autograd/_functions.py:322: UserWarning: MatMul8bitLt: inputs will be cast from torch.bfloat16 to float16 during quantization\n",
|
||||
" warnings.warn(f\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\")\n",
|
||||
"[2023-08-24 04:37:59,192] [INFO] [accelerate.accelerator.log:60] [PID:89149] The used dataset had no length, returning gathered tensors. You should drop the remainder yourself.\n",
|
||||
"[2023-08-24 04:37:59,192] [INFO] [axolotl.utils.dataloader._len_est:262] [PID:89149] packing_efficiency_estimate: 0.97 total_num_tokens per device: 31311\u001b[39m\n",
|
||||
"[2023-08-24 04:37:59,192] [INFO] [axolotl.utils.dataloader._len_est:262] [PID:89149] packing_efficiency_estimate: 0.97 total_num_tokens per device: 31311\u001b[39m\n",
|
||||
"\n",
|
||||
" 0%| | 0/2 [00:00<?, ?it/s]\u001b[A[2023-08-24 04:38:00,615] [INFO] [accelerate.accelerator.log:60] [PID:89149] The used dataset had no length, returning gathered tensors. You should drop the remainder yourself.\n",
|
||||
"[2023-08-24 04:38:00,615] [INFO] [axolotl.utils.dataloader._len_est:262] [PID:89149] packing_efficiency_estimate: 0.97 total_num_tokens per device: 31311\u001b[39m\n",
|
||||
"\n",
|
||||
" \u001b[A\n",
|
||||
"\u001b[A{'eval_loss': 0.021838348358869553, 'eval_runtime': 2.8425, 'eval_samples_per_second': 32.014, 'eval_steps_per_second': 16.183, 'epoch': 1.08}\n",
|
||||
" 37%|███████████████▉ | 20/54 [06:18<10:37, 18.74s/it]\n",
|
||||
"100%|█████████████████████████████████████████████| 2/2 [00:01<00:00, 1.41it/s]\u001b[A\n",
|
||||
"{'loss': 0.0323, 'learning_rate': 0.0001694074195220634, 'epoch': 1.14} \u001b[A\n",
|
||||
"{'loss': 0.0298, 'learning_rate': 0.00016396730215588915, 'epoch': 1.19} \n",
|
||||
"{'loss': 0.0296, 'learning_rate': 0.0001581858915557953, 'epoch': 1.24} \n",
|
||||
"{'loss': 0.0228, 'learning_rate': 0.00015209403404879303, 'epoch': 1.3} \n",
|
||||
"{'loss': 0.0265, 'learning_rate': 0.00014572423233046386, 'epoch': 1.35} \n",
|
||||
"{'loss': 0.0267, 'learning_rate': 0.0001391104720490156, 'epoch': 1.41} \n",
|
||||
"{'loss': 0.0247, 'learning_rate': 0.00013228804047714463, 'epoch': 1.46} \n",
|
||||
"{'loss': 0.0222, 'learning_rate': 0.00012529333823916807, 'epoch': 1.51} \n",
|
||||
"{'loss': 0.0209, 'learning_rate': 0.00011816368509794364, 'epoch': 1.57} \n",
|
||||
"{'loss': 0.0235, 'learning_rate': 0.00011093712083778746, 'epoch': 1.62} \n",
|
||||
"{'loss': 0.0207, 'learning_rate': 0.0001036522023057659, 'epoch': 1.68} \n",
|
||||
"{'loss': 0.0203, 'learning_rate': 9.63477976942341e-05, 'epoch': 1.73} \n",
|
||||
"{'loss': 0.0231, 'learning_rate': 8.906287916221259e-05, 'epoch': 1.78} \n",
|
||||
"{'loss': 0.0191, 'learning_rate': 8.183631490205637e-05, 'epoch': 1.84} \n",
|
||||
"{'loss': 0.017, 'learning_rate': 7.470666176083192e-05, 'epoch': 1.89} \n",
|
||||
"{'loss': 0.0204, 'learning_rate': 6.77119595228554e-05, 'epoch': 1.95} \n",
|
||||
"{'loss': 0.0205, 'learning_rate': 6.0889527950984416e-05, 'epoch': 2.0} \n",
|
||||
" 69%|█████████████████████████████▍ | 37/54 [11:37<05:20, 18.83s/it][2023-08-24 04:43:19,875] [INFO] [axolotl.utils.dataloader._len_est:262] [PID:89149] packing_efficiency_estimate: 0.97 total_num_tokens per device: 603850\u001b[39m\n",
|
||||
"[2023-08-24 04:43:19,875] [INFO] [axolotl.utils.dataloader.generate_batches:181] [PID:89149] generating packed batches\u001b[39m\n",
|
||||
"[2023-08-24 04:43:19,876] [INFO] [axolotl.utils.dataloader.generate_batches:187] [PID:89149] 27a7b97e8e923002d7b4b63b2f6f62acdf710239a1b0b39869915794835c3843\u001b[39m\n",
|
||||
"[2023-08-24 04:43:19,877] [INFO] [axolotl.utils.dataloader._len_est:262] [PID:89149] packing_efficiency_estimate: 0.97 total_num_tokens per device: 603850\u001b[39m\n",
|
||||
"{'loss': 0.0187, 'learning_rate': 5.4275767669536146e-05, 'epoch': 2.05} \n",
|
||||
"{'loss': 0.0189, 'learning_rate': 4.790596595120699e-05, 'epoch': 2.11} \n",
|
||||
"{'loss': 0.0217, 'learning_rate': 4.181410844420474e-05, 'epoch': 2.16} \n",
|
||||
" 74%|███████████████████████████████▊ | 40/54 [12:34<04:22, 18.78s/it][2023-08-24 04:44:16,231] [INFO] [axolotl.utils.dataloader._len_est:262] [PID:89149] packing_efficiency_estimate: 0.97 total_num_tokens per device: 31311\u001b[39m\n",
|
||||
"[2023-08-24 04:44:16,239] [INFO] [axolotl.utils.dataloader.generate_batches:181] [PID:89149] generating packed batches\u001b[39m\n",
|
||||
"[2023-08-24 04:44:16,239] [INFO] [axolotl.utils.dataloader.generate_batches:187] [PID:89149] 3ada03fc400394f41d492cd6cd41f4c9f2b3fbd736749807d50d4dfadb841911\u001b[39m\n",
|
||||
"[2023-08-24 04:44:16,240] [INFO] [axolotl.utils.dataloader._len_est:262] [PID:89149] packing_efficiency_estimate: 0.97 total_num_tokens per device: 31311\u001b[39m\n",
|
||||
"/usr/local/lib/python3.10/dist-packages/bitsandbytes/autograd/_functions.py:322: UserWarning: MatMul8bitLt: inputs will be cast from torch.bfloat16 to float16 during quantization\n",
|
||||
" warnings.warn(f\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\")\n",
|
||||
"[2023-08-24 04:44:17,625] [INFO] [accelerate.accelerator.log:60] [PID:89149] The used dataset had no length, returning gathered tensors. You should drop the remainder yourself.\n",
|
||||
"[2023-08-24 04:44:17,625] [INFO] [axolotl.utils.dataloader._len_est:262] [PID:89149] packing_efficiency_estimate: 0.97 total_num_tokens per device: 31311\u001b[39m\n",
|
||||
"[2023-08-24 04:44:17,625] [INFO] [axolotl.utils.dataloader._len_est:262] [PID:89149] packing_efficiency_estimate: 0.97 total_num_tokens per device: 31311\u001b[39m\n",
|
||||
"\n",
|
||||
" 0%| | 0/2 [00:00<?, ?it/s]\u001b[A[2023-08-24 04:44:19,048] [INFO] [accelerate.accelerator.log:60] [PID:89149] The used dataset had no length, returning gathered tensors. You should drop the remainder yourself.\n",
|
||||
"[2023-08-24 04:44:19,048] [INFO] [axolotl.utils.dataloader._len_est:262] [PID:89149] packing_efficiency_estimate: 0.97 total_num_tokens per device: 31311\u001b[39m\n",
|
||||
"\n",
|
||||
" \u001b[A\n",
|
||||
"\u001b[A{'eval_loss': 0.017654186114668846, 'eval_runtime': 2.8421, 'eval_samples_per_second': 32.019, 'eval_steps_per_second': 16.185, 'epoch': 2.16}\n",
|
||||
" 74%|███████████████████████████████▊ | 40/54 [12:36<04:22, 18.78s/it]\n",
|
||||
"100%|█████████████████████████████████████████████| 2/2 [00:01<00:00, 1.41it/s]\u001b[A\n",
|
||||
"{'loss': 0.0192, 'learning_rate': 3.60326978441109e-05, 'epoch': 2.22} \u001b[A\n",
|
||||
"{'loss': 0.0188, 'learning_rate': 3.059258047793661e-05, 'epoch': 2.27} \n",
|
||||
"{'loss': 0.0207, 'learning_rate': 2.5522781725621813e-05, 'epoch': 2.32} \n",
|
||||
"{'loss': 0.0175, 'learning_rate': 2.0850351157074598e-05, 'epoch': 2.38} \n",
|
||||
"{'loss': 0.0211, 'learning_rate': 1.660021821101222e-05, 'epoch': 2.43} \n",
|
||||
"{'loss': 0.0185, 'learning_rate': 1.2795059185619229e-05, 'epoch': 2.49} \n",
|
||||
"{'loss': 0.0194, 'learning_rate': 9.455176250685338e-06, 'epoch': 2.54} \n",
|
||||
"{'loss': 0.0187, 'learning_rate': 6.598389126745208e-06, 'epoch': 2.59} \n",
|
||||
"{'loss': 0.0196, 'learning_rate': 4.2399400091594154e-06, 'epoch': 2.65} \n",
|
||||
"{'loss': 0.019, 'learning_rate': 2.392412244407294e-06, 'epoch': 2.7} \n",
|
||||
"{'loss': 0.0161, 'learning_rate': 1.0656631924889749e-06, 'epoch': 2.76} \n",
|
||||
"{'loss': 0.0175, 'learning_rate': 2.667716336448356e-07, 'epoch': 2.81} \n",
|
||||
"{'loss': 0.0173, 'learning_rate': 0.0, 'epoch': 2.86} \n",
|
||||
"{'loss': 0.0195, 'learning_rate': 2.667716336448356e-07, 'epoch': 2.92} \n",
|
||||
"{'train_runtime': 1020.2939, 'train_samples_per_second': 5.06, 'train_steps_per_second': 0.053, 'train_loss': 0.3150797028232504, 'epoch': 2.92}\n",
|
||||
"100%|███████████████████████████████████████████| 54/54 [17:00<00:00, 18.89s/it]\n",
|
||||
"[2023-08-24 04:48:42,405] [INFO] [axolotl.scripts.train:303] [PID:89149] Training Completed!!! Saving pre-trained model to ./models/recipe-model\u001b[39m\n",
|
||||
"\u001b[0m"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!accelerate launch ./axolotl/scripts/finetune.py training-args.yaml"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Nice work! If you look on your filesystem you should see a new directory `./models/recipe-model`. This contains your trained model, which you can use to classify more recipes.\n",
|
||||
"\n",
|
||||
"Before we using it though, we need to *merge* the model. We trained our model using [LoRA](https://huggingface.co/docs/peft/conceptual_guides/lora), which is a memory-efficient training method. But the inference library we'll use for testing doesn't support LoRA models yet, so we need to \"merge\" our LoRA model to transform it into a standard Llama2-style model. We've defined a helper to do that that we'll use below."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Loading base model\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "a7a9904315e3472186b774c7d121c9b3",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Loading PEFT model\n",
|
||||
"Running merge_and_unload\n",
|
||||
"Model saved to ./models/recipe-model/merged\n",
|
||||
"Final model saved to ./models/recipe-model/merged\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from utils import merge_lora_model\n",
|
||||
"\n",
|
||||
"print(\"Merging model (this could take a while)\")\n",
|
||||
"final_model_dir = merge_lora_model(\"training-args.yaml\")\n",
|
||||
"print(f\"Final model saved to '{final_model_dir}'\")\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
Reference in New Issue
Block a user