mirror of
https://github.com/hate-alert/DE-LIMIT.git
synced 2021-05-12 18:32:23 +03:00
808 lines
42 KiB
Plaintext
808 lines
42 KiB
Plaintext
{
|
|
"nbformat": 4,
|
|
"nbformat_minor": 0,
|
|
"metadata": {
|
|
"colab": {
|
|
"name": "N_Class_model.ipynb",
|
|
"provenance": [],
|
|
"authorship_tag": "ABX9TyPedlFLQPDVH9N93WjDnYYe",
|
|
"include_colab_link": true
|
|
},
|
|
"kernelspec": {
|
|
"name": "python3",
|
|
"display_name": "Python 3"
|
|
},
|
|
"accelerator": "GPU"
|
|
},
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "view-in-github",
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"<a href=\"https://colab.research.google.com/github/SaiSakethAluru/DE-LIMIT/blob/master/Example/N_Class_model.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "aneUnNP1gtZg",
|
|
"colab_type": "code",
|
|
"colab": {}
|
|
},
|
|
"source": [
|
|
"import torch"
|
|
],
|
|
"execution_count": 0,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "qisVckV-jR4F",
|
|
"colab_type": "code",
|
|
"colab": {}
|
|
},
|
|
"source": [
|
|
"from transformers import BertTokenizer\n",
|
|
"from transformers import BertForSequenceClassification"
|
|
],
|
|
"execution_count": 0,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "z2wK_PiCjdSL",
|
|
"colab_type": "code",
|
|
"outputId": "e1f4d17b-1079-4ba8-e40f-69990cb9d35f",
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 53
|
|
}
|
|
},
|
|
"source": [
|
|
"if torch.cuda.is_available(): \n",
|
|
" # Tell PyTorch to use the GPU. \n",
|
|
" device = torch.device(\"cuda\")\n",
|
|
" print('There are %d GPU(s) available.' % torch.cuda.device_count())\n",
|
|
" print('We will use the GPU:', torch.cuda.get_device_name(0))\n",
|
|
"# If not...\n",
|
|
"else:\n",
|
|
" print('No GPU available, using the CPU instead.')\n",
|
|
" device = torch.device(\"cpu\")\n",
|
|
"\n"
|
|
],
|
|
"execution_count": 23,
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"text": [
|
|
"There are 1 GPU(s) available.\n",
|
|
"We will use the GPU: Tesla T4\n"
|
|
],
|
|
"name": "stdout"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "SqlNpWEikGhG",
|
|
"colab_type": "code",
|
|
"colab": {}
|
|
},
|
|
"source": [
|
|
" model = BertForSequenceClassification.from_pretrained(\n",
|
|
" \"Hate-speech-CNERG/dehatebert-mono-arabic\",\n",
|
|
" num_labels = 2, # The number of output labels--2 for binary classification # You can increase this for multi-class tasks. \n",
|
|
" output_attentions = False, # Whether the model returns attentions weights.\n",
|
|
" output_hidden_states = False, # Whether the model returns all hidden-states.\n",
|
|
" )"
|
|
],
|
|
"execution_count": 0,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "GSVg7-psnPQE",
|
|
"colab_type": "code",
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 1000
|
|
},
|
|
"outputId": "6300e4cd-b081-49b2-eaf3-56ac37cb98ac"
|
|
},
|
|
"source": [
|
|
"print(model)"
|
|
],
|
|
"execution_count": 25,
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"text": [
|
|
"BertForSequenceClassification(\n",
|
|
" (bert): BertModel(\n",
|
|
" (embeddings): BertEmbeddings(\n",
|
|
" (word_embeddings): Embedding(105879, 768, padding_idx=0)\n",
|
|
" (position_embeddings): Embedding(512, 768)\n",
|
|
" (token_type_embeddings): Embedding(2, 768)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" (encoder): BertEncoder(\n",
|
|
" (layer): ModuleList(\n",
|
|
" (0): BertLayer(\n",
|
|
" (attention): BertAttention(\n",
|
|
" (self): BertSelfAttention(\n",
|
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" (output): BertSelfOutput(\n",
|
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (intermediate): BertIntermediate(\n",
|
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
|
" )\n",
|
|
" (output): BertOutput(\n",
|
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (1): BertLayer(\n",
|
|
" (attention): BertAttention(\n",
|
|
" (self): BertSelfAttention(\n",
|
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" (output): BertSelfOutput(\n",
|
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (intermediate): BertIntermediate(\n",
|
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
|
" )\n",
|
|
" (output): BertOutput(\n",
|
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (2): BertLayer(\n",
|
|
" (attention): BertAttention(\n",
|
|
" (self): BertSelfAttention(\n",
|
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" (output): BertSelfOutput(\n",
|
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (intermediate): BertIntermediate(\n",
|
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
|
" )\n",
|
|
" (output): BertOutput(\n",
|
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (3): BertLayer(\n",
|
|
" (attention): BertAttention(\n",
|
|
" (self): BertSelfAttention(\n",
|
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" (output): BertSelfOutput(\n",
|
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (intermediate): BertIntermediate(\n",
|
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
|
" )\n",
|
|
" (output): BertOutput(\n",
|
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (4): BertLayer(\n",
|
|
" (attention): BertAttention(\n",
|
|
" (self): BertSelfAttention(\n",
|
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" (output): BertSelfOutput(\n",
|
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (intermediate): BertIntermediate(\n",
|
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
|
" )\n",
|
|
" (output): BertOutput(\n",
|
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (5): BertLayer(\n",
|
|
" (attention): BertAttention(\n",
|
|
" (self): BertSelfAttention(\n",
|
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" (output): BertSelfOutput(\n",
|
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (intermediate): BertIntermediate(\n",
|
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
|
" )\n",
|
|
" (output): BertOutput(\n",
|
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (6): BertLayer(\n",
|
|
" (attention): BertAttention(\n",
|
|
" (self): BertSelfAttention(\n",
|
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" (output): BertSelfOutput(\n",
|
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (intermediate): BertIntermediate(\n",
|
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
|
" )\n",
|
|
" (output): BertOutput(\n",
|
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (7): BertLayer(\n",
|
|
" (attention): BertAttention(\n",
|
|
" (self): BertSelfAttention(\n",
|
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" (output): BertSelfOutput(\n",
|
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (intermediate): BertIntermediate(\n",
|
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
|
" )\n",
|
|
" (output): BertOutput(\n",
|
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (8): BertLayer(\n",
|
|
" (attention): BertAttention(\n",
|
|
" (self): BertSelfAttention(\n",
|
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" (output): BertSelfOutput(\n",
|
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (intermediate): BertIntermediate(\n",
|
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
|
" )\n",
|
|
" (output): BertOutput(\n",
|
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (9): BertLayer(\n",
|
|
" (attention): BertAttention(\n",
|
|
" (self): BertSelfAttention(\n",
|
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" (output): BertSelfOutput(\n",
|
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (intermediate): BertIntermediate(\n",
|
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
|
" )\n",
|
|
" (output): BertOutput(\n",
|
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (10): BertLayer(\n",
|
|
" (attention): BertAttention(\n",
|
|
" (self): BertSelfAttention(\n",
|
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" (output): BertSelfOutput(\n",
|
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (intermediate): BertIntermediate(\n",
|
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
|
" )\n",
|
|
" (output): BertOutput(\n",
|
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (11): BertLayer(\n",
|
|
" (attention): BertAttention(\n",
|
|
" (self): BertSelfAttention(\n",
|
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" (output): BertSelfOutput(\n",
|
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (intermediate): BertIntermediate(\n",
|
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
|
" )\n",
|
|
" (output): BertOutput(\n",
|
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (pooler): BertPooler(\n",
|
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (activation): Tanh()\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" (classifier): Linear(in_features=768, out_features=2, bias=True)\n",
|
|
")\n"
|
|
],
|
|
"name": "stdout"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "WJVCEn9-nYDs",
|
|
"colab_type": "code",
|
|
"colab": {}
|
|
},
|
|
"source": [
|
|
"NUM_CLASSES = 3\n",
|
|
"model.classifier = torch.nn.Linear(in_features=model.classifier.in_features, out_features=NUM_CLASSES)"
|
|
],
|
|
"execution_count": 0,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "sxRyyw7boFx5",
|
|
"colab_type": "code",
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 1000
|
|
},
|
|
"outputId": "a276ab53-5a36-4562-e4a5-c0e899f20dae"
|
|
},
|
|
"source": [
|
|
"print(model)"
|
|
],
|
|
"execution_count": 27,
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"text": [
|
|
"BertForSequenceClassification(\n",
|
|
" (bert): BertModel(\n",
|
|
" (embeddings): BertEmbeddings(\n",
|
|
" (word_embeddings): Embedding(105879, 768, padding_idx=0)\n",
|
|
" (position_embeddings): Embedding(512, 768)\n",
|
|
" (token_type_embeddings): Embedding(2, 768)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" (encoder): BertEncoder(\n",
|
|
" (layer): ModuleList(\n",
|
|
" (0): BertLayer(\n",
|
|
" (attention): BertAttention(\n",
|
|
" (self): BertSelfAttention(\n",
|
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" (output): BertSelfOutput(\n",
|
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (intermediate): BertIntermediate(\n",
|
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
|
" )\n",
|
|
" (output): BertOutput(\n",
|
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (1): BertLayer(\n",
|
|
" (attention): BertAttention(\n",
|
|
" (self): BertSelfAttention(\n",
|
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" (output): BertSelfOutput(\n",
|
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (intermediate): BertIntermediate(\n",
|
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
|
" )\n",
|
|
" (output): BertOutput(\n",
|
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (2): BertLayer(\n",
|
|
" (attention): BertAttention(\n",
|
|
" (self): BertSelfAttention(\n",
|
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" (output): BertSelfOutput(\n",
|
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (intermediate): BertIntermediate(\n",
|
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
|
" )\n",
|
|
" (output): BertOutput(\n",
|
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (3): BertLayer(\n",
|
|
" (attention): BertAttention(\n",
|
|
" (self): BertSelfAttention(\n",
|
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" (output): BertSelfOutput(\n",
|
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (intermediate): BertIntermediate(\n",
|
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
|
" )\n",
|
|
" (output): BertOutput(\n",
|
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (4): BertLayer(\n",
|
|
" (attention): BertAttention(\n",
|
|
" (self): BertSelfAttention(\n",
|
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" (output): BertSelfOutput(\n",
|
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (intermediate): BertIntermediate(\n",
|
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
|
" )\n",
|
|
" (output): BertOutput(\n",
|
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (5): BertLayer(\n",
|
|
" (attention): BertAttention(\n",
|
|
" (self): BertSelfAttention(\n",
|
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" (output): BertSelfOutput(\n",
|
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (intermediate): BertIntermediate(\n",
|
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
|
" )\n",
|
|
" (output): BertOutput(\n",
|
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (6): BertLayer(\n",
|
|
" (attention): BertAttention(\n",
|
|
" (self): BertSelfAttention(\n",
|
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" (output): BertSelfOutput(\n",
|
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (intermediate): BertIntermediate(\n",
|
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
|
" )\n",
|
|
" (output): BertOutput(\n",
|
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (7): BertLayer(\n",
|
|
" (attention): BertAttention(\n",
|
|
" (self): BertSelfAttention(\n",
|
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" (output): BertSelfOutput(\n",
|
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (intermediate): BertIntermediate(\n",
|
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
|
" )\n",
|
|
" (output): BertOutput(\n",
|
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (8): BertLayer(\n",
|
|
" (attention): BertAttention(\n",
|
|
" (self): BertSelfAttention(\n",
|
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" (output): BertSelfOutput(\n",
|
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (intermediate): BertIntermediate(\n",
|
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
|
" )\n",
|
|
" (output): BertOutput(\n",
|
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (9): BertLayer(\n",
|
|
" (attention): BertAttention(\n",
|
|
" (self): BertSelfAttention(\n",
|
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" (output): BertSelfOutput(\n",
|
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (intermediate): BertIntermediate(\n",
|
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
|
" )\n",
|
|
" (output): BertOutput(\n",
|
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (10): BertLayer(\n",
|
|
" (attention): BertAttention(\n",
|
|
" (self): BertSelfAttention(\n",
|
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" (output): BertSelfOutput(\n",
|
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (intermediate): BertIntermediate(\n",
|
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
|
" )\n",
|
|
" (output): BertOutput(\n",
|
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (11): BertLayer(\n",
|
|
" (attention): BertAttention(\n",
|
|
" (self): BertSelfAttention(\n",
|
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" (output): BertSelfOutput(\n",
|
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (intermediate): BertIntermediate(\n",
|
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
|
" )\n",
|
|
" (output): BertOutput(\n",
|
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" )\n",
|
|
" )\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (pooler): BertPooler(\n",
|
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
|
" (activation): Tanh()\n",
|
|
" )\n",
|
|
" )\n",
|
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
|
" (classifier): Linear(in_features=768, out_features=3, bias=True)\n",
|
|
")\n"
|
|
],
|
|
"name": "stdout"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "Jfj7PwEioQy2",
|
|
"colab_type": "code",
|
|
"colab": {}
|
|
},
|
|
"source": [
|
|
"# Since the new classifier layer weights are randomly initialized, need to train the model again for accurate predictions.\n",
|
|
"# In case you wish to train the whole model with the dataset, proceed with the above constructed 'model' variable directly for training. \n",
|
|
"# Else if you wish to freeze the rest of the model (bert layers) except the final linear layer, you can do so like this. "
|
|
],
|
|
"execution_count": 0,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "g93xx0GmMb4G",
|
|
"colab_type": "code",
|
|
"colab": {}
|
|
},
|
|
"source": [
|
|
"for name,layer in model.named_parameters():\n",
|
|
" if 'classifier' not in name:\n",
|
|
" layer.requires_grad = False"
|
|
],
|
|
"execution_count": 0,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "ZKbK2hPrNZUe",
|
|
"colab_type": "code",
|
|
"colab": {}
|
|
},
|
|
"source": [
|
|
""
|
|
],
|
|
"execution_count": 0,
|
|
"outputs": []
|
|
}
|
|
]
|
|
} |