mirror of
https://github.com/AI4Finance-Foundation/FinGPT.git
synced 2024-02-15 23:10:01 +03:00
508 lines
18 KiB
Plaintext
508 lines
18 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# only for WSL\n",
|
|
"import os \n",
|
|
"os.environ[\"PATH\"] = f\"{os.environ['PATH']}:/usr/local/cuda/bin\"\n",
|
|
"os.environ['LD_LIBRARY_PATH'] = \"/usr/lib/wsl/lib:/usr/local/cuda/lib64\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[2023-07-15 02:35:25,700] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n",
|
|
"\n",
|
|
"===================================BUG REPORT===================================\n",
|
|
"Welcome to bitsandbytes. For bug reports, please run\n",
|
|
"\n",
|
|
"python -m bitsandbytes\n",
|
|
"\n",
|
|
" and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues\n",
|
|
"================================================================================\n",
|
|
"bin /home/oliverwang15/miniconda3/envs/fingpt/lib/python3.9/site-packages/bitsandbytes-0.40.0.post4-py3.9.egg/bitsandbytes/libbitsandbytes_cuda120.so\n",
|
|
"CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so\n",
|
|
"CUDA SETUP: Highest compute capability among GPUs detected: 8.6\n",
|
|
"CUDA SETUP: Detected CUDA version 120\n",
|
|
"CUDA SETUP: Loading binary /home/oliverwang15/miniconda3/envs/fingpt/lib/python3.9/site-packages/bitsandbytes-0.40.0.post4-py3.9.egg/bitsandbytes/libbitsandbytes_cuda120.so...\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"/home/oliverwang15/miniconda3/envs/fingpt/lib/python3.9/site-packages/bitsandbytes-0.40.0.post4-py3.9.egg/bitsandbytes/cuda_setup/main.py:149: UserWarning: /home/oliverwang15/miniconda3/envs/fingpt did not contain ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] as expected! Searching further paths...\n",
|
|
" warn(msg)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"from typing import List, Dict, Optional\n",
|
|
"\n",
|
|
"import datasets\n",
|
|
"import torch\n",
|
|
"from loguru import logger\n",
|
|
"from datasets import load_dataset\n",
|
|
"from transformers import (\n",
|
|
" AutoModel,\n",
|
|
" AutoTokenizer,\n",
|
|
" TrainingArguments,\n",
|
|
" Trainer,\n",
|
|
" BitsAndBytesConfig\n",
|
|
")\n",
|
|
"from peft import (\n",
|
|
" TaskType,\n",
|
|
" LoraConfig,\n",
|
|
" get_peft_model,\n",
|
|
" set_peft_model_state_dict,\n",
|
|
" prepare_model_for_kbit_training,\n",
|
|
" prepare_model_for_int8_training,\n",
|
|
")\n",
|
|
"from peft.utils import TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"training_args = TrainingArguments(\n",
|
|
" output_dir='./finetuned_model', # saved model path\n",
|
|
" logging_steps = 500,\n",
|
|
" # max_steps=10000,\n",
|
|
" num_train_epochs = 2,\n",
|
|
" per_device_train_batch_size=4,\n",
|
|
" gradient_accumulation_steps=8,\n",
|
|
" learning_rate=1e-4,\n",
|
|
" weight_decay=0.01,\n",
|
|
" warmup_steps=1000,\n",
|
|
" save_steps=500,\n",
|
|
" fp16=True,\n",
|
|
" # bf16=True,\n",
|
|
" torch_compile = False,\n",
|
|
" load_best_model_at_end = True,\n",
|
|
" evaluation_strategy=\"steps\",\n",
|
|
" remove_unused_columns=False,\n",
|
|
"\n",
|
|
" )"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# # Quantization\n",
|
|
"# q_config = BitsAndBytesConfig(load_in_4bit=True,\n",
|
|
"# bnb_4bit_quant_type='nf4',\n",
|
|
"# bnb_4bit_use_double_quant=True,\n",
|
|
"# bnb_4bit_compute_dtype=torch.float16\n",
|
|
"# )"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"application/vnd.jupyter.widget-view+json": {
|
|
"model_id": "278d0501a0f94fdabe335bc98e0af31b",
|
|
"version_major": 2,
|
|
"version_minor": 0
|
|
},
|
|
"text/plain": [
|
|
"Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"/home/oliverwang15/miniconda3/envs/fingpt/lib/python3.9/site-packages/peft/utils/other.py:102: FutureWarning: prepare_model_for_int8_training is deprecated and will be removed in a future version. Use prepare_model_for_kbit_training instead.\n",
|
|
" warnings.warn(\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Load tokenizer & model\n",
|
|
"model_name = \"THUDM/chatglm2-6b\"\n",
|
|
"tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n",
|
|
"model = AutoModel.from_pretrained(\n",
|
|
" model_name, \n",
|
|
" # quantization_config=q_config,\n",
|
|
" load_in_8bit = True,\n",
|
|
" trust_remote_code=True, \n",
|
|
" device='cuda'\n",
|
|
" )\n",
|
|
"model = prepare_model_for_int8_training(model, use_gradient_checkpointing=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def print_trainable_parameters(model):\n",
|
|
" \"\"\"\n",
|
|
" Prints the number of trainable parameters in the model.\n",
|
|
" \"\"\"\n",
|
|
" trainable_params = 0\n",
|
|
" all_param = 0\n",
|
|
" for _, param in model.named_parameters():\n",
|
|
" all_param += param.numel()\n",
|
|
" if param.requires_grad:\n",
|
|
" trainable_params += param.numel()\n",
|
|
" print(\n",
|
|
" f\"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}\"\n",
|
|
" )"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"trainable params: 1949696 || all params: 6245533696 || trainable%: 0.031217444255383614\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# LoRA\n",
|
|
"target_modules = TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING['chatglm']\n",
|
|
"lora_config = LoraConfig(\n",
|
|
" task_type=TaskType.CAUSAL_LM,\n",
|
|
" inference_mode=False,\n",
|
|
" r=8,\n",
|
|
" lora_alpha=32,\n",
|
|
" lora_dropout=0.1,\n",
|
|
" target_modules=target_modules,\n",
|
|
" bias='none',\n",
|
|
")\n",
|
|
"model = get_peft_model(model, lora_config)\n",
|
|
"print_trainable_parameters(model)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"resume_from_checkpoint = None\n",
|
|
"if resume_from_checkpoint is not None:\n",
|
|
" checkpoint_name = os.path.join(resume_from_checkpoint, 'pytorch_model.bin')\n",
|
|
" if not os.path.exists(checkpoint_name):\n",
|
|
" checkpoint_name = os.path.join(\n",
|
|
" resume_from_checkpoint, 'adapter_model.bin'\n",
|
|
" )\n",
|
|
" resume_from_checkpoint = False\n",
|
|
" if os.path.exists(checkpoint_name):\n",
|
|
" logger.info(f'Restarting from {checkpoint_name}')\n",
|
|
" adapters_weights = torch.load(checkpoint_name)\n",
|
|
" set_peft_model_state_dict(model, adapters_weights)\n",
|
|
" else:\n",
|
|
" logger.info(f'Checkpoint {checkpoint_name} not found')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"trainable params: 1,949,696 || all params: 6,245,533,696 || trainable%: 0.031217444255383614\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"model.print_trainable_parameters()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# load data\n",
|
|
"dataset = datasets.load_from_disk(\"../data/dataset_new\")\n",
|
|
"dataset = dataset.train_test_split(0.2, shuffle=True, seed = 42)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"class ModifiedTrainer(Trainer):\n",
|
|
" def compute_loss(self, model, inputs, return_outputs=False):\n",
|
|
" return model(\n",
|
|
" input_ids=inputs[\"input_ids\"],\n",
|
|
" labels=inputs[\"labels\"],\n",
|
|
" ).loss\n",
|
|
"\n",
|
|
" def prediction_step(self, model: torch.nn.Module, inputs, prediction_loss_only: bool, ignore_keys = None):\n",
|
|
" with torch.no_grad():\n",
|
|
" res = model(\n",
|
|
" input_ids=inputs[\"input_ids\"].to(model.device),\n",
|
|
" labels=inputs[\"labels\"].to(model.device),\n",
|
|
" ).loss\n",
|
|
" return (res, None, None)\n",
|
|
"\n",
|
|
" def save_model(self, output_dir=None, _internal_call=False):\n",
|
|
" from transformers.trainer import TRAINING_ARGS_NAME\n",
|
|
"\n",
|
|
" os.makedirs(output_dir, exist_ok=True)\n",
|
|
" torch.save(self.args, os.path.join(output_dir, TRAINING_ARGS_NAME))\n",
|
|
" saved_params = {\n",
|
|
" k: v.to(\"cpu\") for k, v in self.model.named_parameters() if v.requires_grad\n",
|
|
" }\n",
|
|
" torch.save(saved_params, os.path.join(output_dir, \"adapter_model.bin\"))\n",
|
|
"\n",
|
|
"def data_collator(features: list) -> dict:\n",
|
|
" len_ids = [len(feature[\"input_ids\"]) for feature in features]\n",
|
|
" longest = max(len_ids)\n",
|
|
" input_ids = []\n",
|
|
" labels_list = []\n",
|
|
" for ids_l, feature in sorted(zip(len_ids, features), key=lambda x: -x[0]):\n",
|
|
" ids = feature[\"input_ids\"]\n",
|
|
" seq_len = feature[\"seq_len\"]\n",
|
|
" labels = (\n",
|
|
" [tokenizer.pad_token_id] * (seq_len - 1) + ids[(seq_len - 1) :] + [tokenizer.pad_token_id] * (longest - ids_l)\n",
|
|
" )\n",
|
|
" ids = ids + [tokenizer.pad_token_id] * (longest - ids_l)\n",
|
|
" _ids = torch.LongTensor(ids)\n",
|
|
" labels_list.append(torch.LongTensor(labels))\n",
|
|
" input_ids.append(_ids)\n",
|
|
" input_ids = torch.stack(input_ids)\n",
|
|
" labels = torch.stack(labels_list)\n",
|
|
" return {\n",
|
|
" \"input_ids\": input_ids,\n",
|
|
" \"labels\": labels,\n",
|
|
" }\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from torch.utils.tensorboard import SummaryWriter\n",
|
|
"from transformers.integrations import TensorBoardCallback"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"You are adding a <class 'transformers.integrations.TensorBoardCallback'> to the callbacks of this Trainer, but there is already one. The currentlist of callbacks is\n",
|
|
":DefaultFlowCallback\n",
|
|
"TensorBoardCallback\n",
|
|
"/home/oliverwang15/miniconda3/envs/fingpt/lib/python3.9/site-packages/transformers/optimization.py:411: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
|
|
" warnings.warn(\n",
|
|
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...\n",
|
|
"/home/oliverwang15/miniconda3/envs/fingpt/lib/python3.9/site-packages/bitsandbytes-0.40.0.post4-py3.9.egg/bitsandbytes/autograd/_functions.py:322: UserWarning: MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization\n",
|
|
" warnings.warn(f\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\")\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"\n",
|
|
" <div>\n",
|
|
" \n",
|
|
" <progress value='3838' max='3838' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
|
|
" [3838/3838 7:41:31, Epoch 1/2]\n",
|
|
" </div>\n",
|
|
" <table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: left;\">\n",
|
|
" <th>Step</th>\n",
|
|
" <th>Training Loss</th>\n",
|
|
" <th>Validation Loss</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <td>500</td>\n",
|
|
" <td>9.616700</td>\n",
|
|
" <td>5.970341</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <td>1000</td>\n",
|
|
" <td>5.773300</td>\n",
|
|
" <td>5.637556</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <td>1500</td>\n",
|
|
" <td>5.606400</td>\n",
|
|
" <td>5.624945</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <td>2000</td>\n",
|
|
" <td>5.598000</td>\n",
|
|
" <td>5.620837</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <td>2500</td>\n",
|
|
" <td>5.594000</td>\n",
|
|
" <td>5.618898</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <td>3000</td>\n",
|
|
" <td>5.593100</td>\n",
|
|
" <td>5.617580</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <td>3500</td>\n",
|
|
" <td>5.591000</td>\n",
|
|
" <td>5.616888</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table><p>"
|
|
],
|
|
"text/plain": [
|
|
"<IPython.core.display.HTML object>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"/home/oliverwang15/miniconda3/envs/fingpt/lib/python3.9/site-packages/bitsandbytes-0.40.0.post4-py3.9.egg/bitsandbytes/autograd/_functions.py:322: UserWarning: MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization\n",
|
|
" warnings.warn(f\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\")\n",
|
|
"/home/oliverwang15/miniconda3/envs/fingpt/lib/python3.9/site-packages/bitsandbytes-0.40.0.post4-py3.9.egg/bitsandbytes/autograd/_functions.py:322: UserWarning: MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization\n",
|
|
" warnings.warn(f\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\")\n",
|
|
"/home/oliverwang15/miniconda3/envs/fingpt/lib/python3.9/site-packages/bitsandbytes-0.40.0.post4-py3.9.egg/bitsandbytes/autograd/_functions.py:322: UserWarning: MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization\n",
|
|
" warnings.warn(f\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\")\n",
|
|
"/home/oliverwang15/miniconda3/envs/fingpt/lib/python3.9/site-packages/bitsandbytes-0.40.0.post4-py3.9.egg/bitsandbytes/autograd/_functions.py:322: UserWarning: MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization\n",
|
|
" warnings.warn(f\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\")\n",
|
|
"/home/oliverwang15/miniconda3/envs/fingpt/lib/python3.9/site-packages/bitsandbytes-0.40.0.post4-py3.9.egg/bitsandbytes/autograd/_functions.py:322: UserWarning: MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization\n",
|
|
" warnings.warn(f\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\")\n",
|
|
"/home/oliverwang15/miniconda3/envs/fingpt/lib/python3.9/site-packages/bitsandbytes-0.40.0.post4-py3.9.egg/bitsandbytes/autograd/_functions.py:322: UserWarning: MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization\n",
|
|
" warnings.warn(f\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\")\n",
|
|
"/home/oliverwang15/miniconda3/envs/fingpt/lib/python3.9/site-packages/bitsandbytes-0.40.0.post4-py3.9.egg/bitsandbytes/autograd/_functions.py:322: UserWarning: MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization\n",
|
|
" warnings.warn(f\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\")\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Train\n",
|
|
"writer = SummaryWriter()\n",
|
|
"trainer = ModifiedTrainer(\n",
|
|
" model=model, \n",
|
|
" args=training_args, # Trainer args\n",
|
|
" train_dataset=dataset[\"train\"], # Training set\n",
|
|
" eval_dataset=dataset[\"test\"], # Testing set\n",
|
|
" data_collator=data_collator, # Data Collator\n",
|
|
" callbacks=[TensorBoardCallback(writer)],\n",
|
|
")\n",
|
|
"trainer.train()\n",
|
|
"writer.close()\n",
|
|
"# save model\n",
|
|
"model.save_pretrained(training_args.output_dir)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## GPU Trining MEM: 46.7%\n",
|
|
"## GPU Evaling MEM: 83.5%"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "fingpt",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.9.17"
|
|
},
|
|
"orig_nbformat": 4
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|