mirror of
https://github.com/inzva/Medical-Electra.git
synced 2021-06-01 09:24:59 +03:00
QA Finetuning
This commit is contained in:
145
fine_tuning/QA_Finetuning.ipynb
Normal file
145
fine_tuning/QA_Finetuning.ipynb
Normal file
@@ -0,0 +1,145 @@
|
||||
{
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"name": "QA_finetuning",
|
||||
"provenance": [],
|
||||
"collapsed_sections": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"display_name": "Python 3"
|
||||
},
|
||||
"accelerator": "GPU"
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"id": "FW0KtxFbsKjP"
|
||||
},
|
||||
"source": [
|
||||
"!pip install transformers"
|
||||
],
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"id": "0LzPy9QOuXCp"
|
||||
},
|
||||
"source": [
|
||||
"!wget https://raw.githubusercontent.com/huggingface/transformers/master/examples/question-answering/run_squad.py\n",
|
||||
"\n",
|
||||
"!git clone https://github.com/dmis-lab/biobert.git"
|
||||
],
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"id": "-RQTc30woc10"
|
||||
},
|
||||
"source": [
|
||||
"# Default BioASQ test files doesn't match SQUAD format. So add dummy keys \n",
|
||||
"# to match it to make predictions.\n",
|
||||
"with open(\"BioASQ/BioASQ/BioASQ-test-factoid-4b-1.json\") as f:\n",
|
||||
" data = json.load(f)\n",
|
||||
"\n",
|
||||
"new_paragraph = []\n",
|
||||
"for questions in data[\"data\"][0][\"paragraphs\"]:\n",
|
||||
" new_questions = []\n",
|
||||
" for question in questions[\"qas\"]:\n",
|
||||
" new_question = {**question}\n",
|
||||
" # Adding dummy answer key.\n",
|
||||
" new_question[\"answers\"] = [{\"text\": \"\", \"answer_start\": 0}]\n",
|
||||
" new_questions.append(new_question)\n",
|
||||
" \n",
|
||||
" new_paragraph.append({**questions, \"qas\": new_questions})\n",
|
||||
" \n",
|
||||
"data[\"data\"][0][\"paragraphs\"] = new_paragraph\n",
|
||||
"\n",
|
||||
"with open(\"BioASQ/BioASQ/Answered_BioASQ-test-factoid-4b-1.json\", \"w\") as f:\n",
|
||||
" json.dump(data, f)"
|
||||
],
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"id": "XSBhXzODoG9s"
|
||||
},
|
||||
"source": [
|
||||
"# train_files directory format\n",
|
||||
"# train_files\n",
|
||||
"# train-v1.1.json\n",
|
||||
"# dev-v1.1.json\n",
|
||||
"# For the predictions on the raw data\n",
|
||||
"# Answered_BioASQ-test-factoid-4b-1.json"
|
||||
],
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"id": "8XIPgsL8oeGq"
|
||||
},
|
||||
"source": [
|
||||
"# google/electra-small-discriminator\n",
|
||||
"# enelpi/med-electra-small-discriminator"
|
||||
],
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"id": "e5IVU_WV_kc1"
|
||||
},
|
||||
"source": [
|
||||
"!python run_squad.py \\\n",
|
||||
"--model_type electra \\\n",
|
||||
"--model_name_or_path enelpi/med-electra-small-discriminator \\\n",
|
||||
"--do_train \\\n",
|
||||
"--do_eval \\\n",
|
||||
"--data_dir train_files \\\n",
|
||||
"--predict_file Answered_BioASQ-test-factoid-4b-1.json \\\n",
|
||||
"--cache_dir med_electra_cached_data \\\n",
|
||||
"--per_gpu_train_batch_size 16 \\\n",
|
||||
"--num_train_epochs 5 \\\n",
|
||||
"--max_seq_length 128 \\\n",
|
||||
"--doc_stride 128 \\\n",
|
||||
"--output_dir ./med_electra_finetuned_model"
|
||||
],
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"id": "nOKkXUpzGXdp"
|
||||
},
|
||||
"source": [
|
||||
"!python biobert/biocodes/transform_nbset2bioasqform.py --nbest_path=med_electra_finetuned_model/nbest_predictions_.json --output_path=prediction_outputs"
|
||||
],
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"id": "KRHTnIHhnt1c"
|
||||
},
|
||||
"source": [
|
||||
"!java -Xmx10G -cp $CLASSPATH:./flat/BioASQEvaluation/dist/BioASQEvaluation.jar evaluation.EvaluatorTask1b -phaseB -e 5 BioASQ/BioASQ/4B1_golden.json prediction_outputs"
|
||||
],
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
}
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user