mirror of
https://github.com/ashishpatel26/Amazing-Feature-Engineering.git
synced 2022-05-07 18:26:02 +03:00
680 lines
39 KiB
Plaintext
680 lines
39 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import numpy as np\n",
|
||
"# import seaborn as sns\n",
|
||
"# import matplotlib.pyplot as plt\n",
|
||
"import os\n",
|
||
"from sklearn.model_selection import train_test_split\n",
|
||
"from sklearn.linear_model import Lasso, LogisticRegression\n",
|
||
"from sklearn.preprocessing import RobustScaler\n",
|
||
"from sklearn.feature_selection import SelectFromModel\n",
|
||
"from feature_selection import embedded_method\n",
|
||
"# plt.style.use('seaborn-colorblind')\n",
|
||
"# %matplotlib inline\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Load Dataset"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"from sklearn.datasets import load_breast_cancer\n",
|
||
"data = load_breast_cancer()\n",
|
||
"data = pd.DataFrame(np.c_[data['data'], data['target']],\n",
|
||
" columns= np.append(data['feature_names'], ['target']))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>mean radius</th>\n",
|
||
" <th>mean texture</th>\n",
|
||
" <th>mean perimeter</th>\n",
|
||
" <th>mean area</th>\n",
|
||
" <th>mean smoothness</th>\n",
|
||
" <th>mean compactness</th>\n",
|
||
" <th>mean concavity</th>\n",
|
||
" <th>mean concave points</th>\n",
|
||
" <th>mean symmetry</th>\n",
|
||
" <th>mean fractal dimension</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>worst texture</th>\n",
|
||
" <th>worst perimeter</th>\n",
|
||
" <th>worst area</th>\n",
|
||
" <th>worst smoothness</th>\n",
|
||
" <th>worst compactness</th>\n",
|
||
" <th>worst concavity</th>\n",
|
||
" <th>worst concave points</th>\n",
|
||
" <th>worst symmetry</th>\n",
|
||
" <th>worst fractal dimension</th>\n",
|
||
" <th>target</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>17.99</td>\n",
|
||
" <td>10.38</td>\n",
|
||
" <td>122.80</td>\n",
|
||
" <td>1001.0</td>\n",
|
||
" <td>0.11840</td>\n",
|
||
" <td>0.27760</td>\n",
|
||
" <td>0.3001</td>\n",
|
||
" <td>0.14710</td>\n",
|
||
" <td>0.2419</td>\n",
|
||
" <td>0.07871</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>17.33</td>\n",
|
||
" <td>184.60</td>\n",
|
||
" <td>2019.0</td>\n",
|
||
" <td>0.1622</td>\n",
|
||
" <td>0.6656</td>\n",
|
||
" <td>0.7119</td>\n",
|
||
" <td>0.2654</td>\n",
|
||
" <td>0.4601</td>\n",
|
||
" <td>0.11890</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>20.57</td>\n",
|
||
" <td>17.77</td>\n",
|
||
" <td>132.90</td>\n",
|
||
" <td>1326.0</td>\n",
|
||
" <td>0.08474</td>\n",
|
||
" <td>0.07864</td>\n",
|
||
" <td>0.0869</td>\n",
|
||
" <td>0.07017</td>\n",
|
||
" <td>0.1812</td>\n",
|
||
" <td>0.05667</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>23.41</td>\n",
|
||
" <td>158.80</td>\n",
|
||
" <td>1956.0</td>\n",
|
||
" <td>0.1238</td>\n",
|
||
" <td>0.1866</td>\n",
|
||
" <td>0.2416</td>\n",
|
||
" <td>0.1860</td>\n",
|
||
" <td>0.2750</td>\n",
|
||
" <td>0.08902</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>19.69</td>\n",
|
||
" <td>21.25</td>\n",
|
||
" <td>130.00</td>\n",
|
||
" <td>1203.0</td>\n",
|
||
" <td>0.10960</td>\n",
|
||
" <td>0.15990</td>\n",
|
||
" <td>0.1974</td>\n",
|
||
" <td>0.12790</td>\n",
|
||
" <td>0.2069</td>\n",
|
||
" <td>0.05999</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>25.53</td>\n",
|
||
" <td>152.50</td>\n",
|
||
" <td>1709.0</td>\n",
|
||
" <td>0.1444</td>\n",
|
||
" <td>0.4245</td>\n",
|
||
" <td>0.4504</td>\n",
|
||
" <td>0.2430</td>\n",
|
||
" <td>0.3613</td>\n",
|
||
" <td>0.08758</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>11.42</td>\n",
|
||
" <td>20.38</td>\n",
|
||
" <td>77.58</td>\n",
|
||
" <td>386.1</td>\n",
|
||
" <td>0.14250</td>\n",
|
||
" <td>0.28390</td>\n",
|
||
" <td>0.2414</td>\n",
|
||
" <td>0.10520</td>\n",
|
||
" <td>0.2597</td>\n",
|
||
" <td>0.09744</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>26.50</td>\n",
|
||
" <td>98.87</td>\n",
|
||
" <td>567.7</td>\n",
|
||
" <td>0.2098</td>\n",
|
||
" <td>0.8663</td>\n",
|
||
" <td>0.6869</td>\n",
|
||
" <td>0.2575</td>\n",
|
||
" <td>0.6638</td>\n",
|
||
" <td>0.17300</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>20.29</td>\n",
|
||
" <td>14.34</td>\n",
|
||
" <td>135.10</td>\n",
|
||
" <td>1297.0</td>\n",
|
||
" <td>0.10030</td>\n",
|
||
" <td>0.13280</td>\n",
|
||
" <td>0.1980</td>\n",
|
||
" <td>0.10430</td>\n",
|
||
" <td>0.1809</td>\n",
|
||
" <td>0.05883</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>16.67</td>\n",
|
||
" <td>152.20</td>\n",
|
||
" <td>1575.0</td>\n",
|
||
" <td>0.1374</td>\n",
|
||
" <td>0.2050</td>\n",
|
||
" <td>0.4000</td>\n",
|
||
" <td>0.1625</td>\n",
|
||
" <td>0.2364</td>\n",
|
||
" <td>0.07678</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 31 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" mean radius mean texture mean perimeter mean area mean smoothness \\\n",
|
||
"0 17.99 10.38 122.80 1001.0 0.11840 \n",
|
||
"1 20.57 17.77 132.90 1326.0 0.08474 \n",
|
||
"2 19.69 21.25 130.00 1203.0 0.10960 \n",
|
||
"3 11.42 20.38 77.58 386.1 0.14250 \n",
|
||
"4 20.29 14.34 135.10 1297.0 0.10030 \n",
|
||
"\n",
|
||
" mean compactness mean concavity mean concave points mean symmetry \\\n",
|
||
"0 0.27760 0.3001 0.14710 0.2419 \n",
|
||
"1 0.07864 0.0869 0.07017 0.1812 \n",
|
||
"2 0.15990 0.1974 0.12790 0.2069 \n",
|
||
"3 0.28390 0.2414 0.10520 0.2597 \n",
|
||
"4 0.13280 0.1980 0.10430 0.1809 \n",
|
||
"\n",
|
||
" mean fractal dimension ... worst texture worst perimeter worst area \\\n",
|
||
"0 0.07871 ... 17.33 184.60 2019.0 \n",
|
||
"1 0.05667 ... 23.41 158.80 1956.0 \n",
|
||
"2 0.05999 ... 25.53 152.50 1709.0 \n",
|
||
"3 0.09744 ... 26.50 98.87 567.7 \n",
|
||
"4 0.05883 ... 16.67 152.20 1575.0 \n",
|
||
"\n",
|
||
" worst smoothness worst compactness worst concavity worst concave points \\\n",
|
||
"0 0.1622 0.6656 0.7119 0.2654 \n",
|
||
"1 0.1238 0.1866 0.2416 0.1860 \n",
|
||
"2 0.1444 0.4245 0.4504 0.2430 \n",
|
||
"3 0.2098 0.8663 0.6869 0.2575 \n",
|
||
"4 0.1374 0.2050 0.4000 0.1625 \n",
|
||
"\n",
|
||
" worst symmetry worst fractal dimension target \n",
|
||
"0 0.4601 0.11890 0.0 \n",
|
||
"1 0.2750 0.08902 0.0 \n",
|
||
"2 0.3613 0.08758 0.0 \n",
|
||
"3 0.6638 0.17300 0.0 \n",
|
||
"4 0.2364 0.07678 0.0 \n",
|
||
"\n",
|
||
"[5 rows x 31 columns]"
|
||
]
|
||
},
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data.head(5)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"((455, 30), (114, 30))"
|
||
]
|
||
},
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_train, X_test, y_train, y_test = train_test_split(data.drop(labels=['target'], axis=1), \n",
|
||
" data.target, test_size=0.2,\n",
|
||
" random_state=0)\n",
|
||
"X_train.shape, X_test.shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Lasso\n",
|
||
"Regularization with Lasso.Lasso (L1) has the property that is able to shrink some of the coefficients to zero. Therefore, that feature can be removed from the model"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,\n",
|
||
" with_scaling=True)"
|
||
]
|
||
},
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# linear models benefit from feature scaling\n",
|
||
"\n",
|
||
"scaler = RobustScaler()\n",
|
||
"scaler.fit(X_train)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
|
||
" FutureWarning)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"SelectFromModel(estimator=LogisticRegression(C=1, class_weight=None, dual=False, fit_intercept=True,\n",
|
||
" intercept_scaling=1, max_iter=100, multi_class='warn',\n",
|
||
" n_jobs=None, penalty='l1', random_state=None, solver='warn',\n",
|
||
" tol=0.0001, verbose=0, warm_start=False),\n",
|
||
" max_features=None, norm_order=1, prefit=False, threshold=None)"
|
||
]
|
||
},
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# fit the LR model\n",
|
||
"sel_ = SelectFromModel(LogisticRegression(C=1, penalty='l1'))\n",
|
||
"sel_.fit(scaler.transform(X_train), y_train)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"total features: 30\n",
|
||
"selected features: 14\n",
|
||
"features with coefficients shrank to zero: 16\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# make a list with the selected features\n",
|
||
"selected_feat = X_train.columns[(sel_.get_support())]\n",
|
||
"\n",
|
||
"print('total features: {}'.format((X_train.shape[1])))\n",
|
||
"print('selected features: {}'.format(len(selected_feat)))\n",
|
||
"print('features with coefficients shrank to zero: {}'.format(\n",
|
||
" np.sum(sel_.estimator_.coef_ == 0)))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"Index(['mean radius', 'mean perimeter', 'mean area', 'mean smoothness',\n",
|
||
" 'mean compactness', 'mean concavity', 'mean fractal dimension',\n",
|
||
" 'texture error', 'perimeter error', 'smoothness error',\n",
|
||
" 'concavity error', 'concave points error', 'symmetry error',\n",
|
||
" 'worst radius', 'worst perimeter', 'worst compactness'],\n",
|
||
" dtype='object')"
|
||
]
|
||
},
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# we can identify the removed features like this:\n",
|
||
"removed_feats = X_train.columns[(sel_.estimator_.coef_ == 0).ravel().tolist()]\n",
|
||
"removed_feats"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"((455, 14), (114, 14))"
|
||
]
|
||
},
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# remove the features from the training and testing set\n",
|
||
"\n",
|
||
"X_train_selected = sel_.transform(X_train.fillna(0))\n",
|
||
"X_test_selected = sel_.transform(X_test.fillna(0))\n",
|
||
"\n",
|
||
"X_train_selected.shape, X_test_selected.shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Random Forest Importance\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Feature ranking:\n",
|
||
"1. feature no:27 feature name:worst concave points (0.206316)\n",
|
||
"2. feature no:22 feature name:worst perimeter (0.147163)\n",
|
||
"3. feature no:7 feature name:mean concave points (0.100672)\n",
|
||
"4. feature no:20 feature name:worst radius (0.082449)\n",
|
||
"5. feature no:6 feature name:mean concavity (0.060420)\n",
|
||
"6. feature no:2 feature name:mean perimeter (0.048284)\n",
|
||
"7. feature no:23 feature name:worst area (0.046151)\n",
|
||
"8. feature no:3 feature name:mean area (0.038594)\n",
|
||
"9. feature no:13 feature name:area error (0.035627)\n",
|
||
"10. feature no:0 feature name:mean radius (0.030476)\n",
|
||
"11. feature no:10 feature name:radius error (0.028711)\n",
|
||
"12. feature no:26 feature name:worst concavity (0.028533)\n",
|
||
"13. feature no:12 feature name:perimeter error (0.019986)\n",
|
||
"14. feature no:21 feature name:worst texture (0.018623)\n",
|
||
"15. feature no:1 feature name:mean texture (0.013840)\n",
|
||
"16. feature no:25 feature name:worst compactness (0.013195)\n",
|
||
"17. feature no:29 feature name:worst fractal dimension (0.011840)\n",
|
||
"18. feature no:24 feature name:worst smoothness (0.008988)\n",
|
||
"19. feature no:28 feature name:worst symmetry (0.008973)\n",
|
||
"20. feature no:18 feature name:symmetry error (0.007378)\n",
|
||
"21. feature no:11 feature name:texture error (0.006736)\n",
|
||
"22. feature no:15 feature name:compactness error (0.005464)\n",
|
||
"23. feature no:19 feature name:fractal dimension error (0.005117)\n",
|
||
"24. feature no:16 feature name:concavity error (0.004957)\n",
|
||
"25. feature no:8 feature name:mean symmetry (0.004660)\n",
|
||
"26. feature no:4 feature name:mean smoothness (0.004614)\n",
|
||
"27. feature no:9 feature name:mean fractal dimension (0.003689)\n",
|
||
"28. feature no:17 feature name:concave points error (0.002993)\n",
|
||
"29. feature no:5 feature name:mean compactness (0.002844)\n",
|
||
"30. feature no:14 feature name:smoothness error (0.002706)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEICAYAAAC3Y/QeAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAFvpJREFUeJzt3X20JHV95/H3hwF8ABSVUXkYGDQc\ndlnj44hk14d7hERAHVgjEVYNuBiW7HKMm+zGibrEYMzxYWM2OUuioxJZNwiIuzqJY9AYSWKyEEYz\nGgGJ4wScG55GQMVHHPjuH1VD2mvfmeF29b0z/t6vc/pMV9Wv6/urmr6fqv51dXeqCklSW/Za6g5I\nkhaf4S9JDTL8JalBhr8kNcjwl6QGGf6S1CDDX3ucJO9K8t+Wuh/Snszwb0iSm5J8N8m3Rm6HTLjO\nmSSzQ/VxV1TVuVX15sWsOZ8k70/ym0vdj1017f4mWZvkxiT3JzlrzPL/nOS2JN9IclGSh0yrL9ox\nw789L66q/UdutyxlZ5LsvZT1J5Fk2VL3YTf0eeA/Ap+buyDJC4A1wPHASuAJwG8sZuc0oqq8NXID\nbgJOmGfZccDfAF+n+wOeGVn2KuAG4B5gM/Af+vn7Ad8F7ge+1d8OAd4P/ObI42eA2Tn9eB3wBeD7\nwN794z4MbAX+EXjNDrbjgfVvXzfwq8AdwK3AqcDJwD8AdwGvH3nsm4ArgMv67fkc8JSR5f8SuKrf\nD9cBq+fU/QNgPfBt4BzgB8C9/bb/cd9uDfCVfv3XA/92ZB1nAZ8B/jtwd7+tJ40sfzTwh8At/fKP\njCx7EbCx79vfAE8eWfY64J/6mjcCx4/Zb/P1d2fb/C7gk/26/wI4Yheea58Bzpoz7xLgt0amjwdu\nW+q/i1ZvS94Bb4v4nz1P+AOHAnf2gbkX8NP99PJ++QuBJwIBngd8B3h6v2yGkWDv5z0QzuPa9P3Y\nCKwAHtbX/CxwPrAv3RnhZuAF82zHA+vv172tf+w+wC/QHUAuAQ4A/hXwPeAJffs39QH40r79f+kD\neJ/+tgl4fd+P5/eBd/RI3W8A/6bv80Pnbmvf7jS6g9lewMvoDhQH98vO6uv/ArAM+EW6oE+//GN0\nB6ZH9f15Xj//6XQHt2f1jzuz348PAY4GtgCH9G1XAk/c2b7rp3dlm+8BntvX+l3gM7vwXBsX/p8H\nXjYyfRBQwGOW+m+jxZvDPu35SJKv97eP9PNeAayvqvVVdX9VfRLYQHcwoKo+VlVfqc5fAJ8AnjNh\nP36vqrZU1XeBZ9IdaC6oqnurajPwHuD0XVzXD4C3VNUPgEvpQuV3q+qeqrqO7mz2ySPtP1tVV/Tt\n30kX4sf1t/2Bt/b9+HPgT4AzRh770ar6634/fW9cZ6rqQ1V1S9/mMuDLwLEjTW6uqvdU1X3AxcDB\nwOOSHAycBJxbVXdX1Q/6/Q3dweLdVXVNVd1XVRfTvWo6DriPLpiPSbJPVd1UVV/ZxX23K9v8sar6\ny6r6PvAG4KeSrNjF9Y/an+7gud32+wcsYF2akOHfnlOr6sD+dmo/7wjgtJGDwteBZ9OFEklOSnJ1\nkrv6ZSfTBewktozcPwI4ZE791wOP28V13dkHKXTDUAC3jyz/Ll3w/EjtqrqfbtjokP62pZ+33c10\nr4zG9XusJD+fZOPItjyJH95ft43U/05/d3+6V0J3VdXdY1Z7BPArc/bRCrqz/U3Aa+le1dyR5NIH\n8Ub+g9rmqvoW3VDaQi4U+BbwiJHp7ffvWcC6NCHDX9D9cX9g5KBwYFXtV1Vv7a/G+DDdGPXjqupA\nujHv9I8d97Ww3wYePjL9+DFtRh+3BfjHOfUPqKqTJ96y8R44a02yF3AY3dDLLcCKft52h9ONpY/r\n949MJzmC7lXLeXTDGQcCX+Sf99eObAEeneTAeZa9Zc4+enhVfRCgqi6pqmfTHSQKeNs8Neb2f1e2\neXR/7U/3vsRCLhS4DnjKyPRTgNur6s4FrEsTMvwF8L+BFyd5QZJlSR7aX8J5GN048EPoxtG3JTkJ\n+JmRx94OPCbJI0fmbQROTvLoJI+nOyvdkb8FvpnkdUke1vfhSUmeOdgW/rBnJHlJf6XRa+mGT64G\nrqE7cP1qkn2SzAAvphtKms/tdO9RbLcfXcBuBUjyKroz/52qqluBjwO/n+RRfR+e2y9+D3Bukmel\ns1+SFyY5IMnRSZ7fH6i/R/dK5755yszt765s88lJnp1kX+DNwDVVNfYVUJJ9kzyU7mC3T/9c2p4z\n/ws4O8kxSR4FvJHuPQUtAcNf9H/Ip9ANtWylO8v8r8BeVXUP8BrgcrqrT/4dsG7ksV8CPghs7ocj\nDgE+QPfm3k107w9ctpP699EFzlPp3nz9GvBe4JE7etwEPkr3RuzdwCuBl/Tj6/cCq+nG3b8G/D7w\n8/02zud9dGPtX0/ykaq6Hvht4P/RBe1PAn/9IPr2Srr3ML5E9wbvawGqagPduP//7Pu9ie7NY+gO\nzm/t+3wb8Fi6/8td6e+ubPMlwK/TDfc8A3j5Dvr/CbqDz78G1vb3n9tvw58Cbwc+TTe0dHO/Xi2B\n7VcYSE1I8ibgJ6rqFUvdlz1BkvfTXan1xqXui4blmb8kNcjwl6QGOewjSQ3yzF+SGrTbfqnWQQcd\nVCtXrlzqbkjSHuWzn/3s16pq+c7a7bbhv3LlSjZs2LDU3ZCkPUqSm3elncM+ktQgw1+SGmT4S1KD\nDH9JatAg4Z/kxP53OzclWTNm+VlJtvZfc7sxyauHqCtJWpiJr/bpf8f0Qrpff5oFrk2yrv+Cq1GX\nVdV5k9aTJE1uiDP/Y4FNVbW5/4bAS+m+IVKStJsaIvwP5Yd/3WiWH/4VoO1+NskXklwx30/AJTkn\nyYYkG7Zu3TpA1yRJ4wwR/uN+oWjuFwb9MbCyqp4M/Bnd75b+6IOq1lbVqqpatXz5Tj+gNhUzMzPM\nzMwsSW1JWixDhP8sIz/zxj//JN4DqurO/sefoftFomcMUFeStEBDhP+1wFFJjux/5u10Rn7pCSDJ\nwSOTq4EbBqgrSVqgia/2qaptSc4DrgSWARdV1XVJLgA2VNU64DVJVgPb6H4K7qxJ60qSFm6QL3ar\nqvXA+jnzzh+5/2vArw1RS5I0OT/hK0kNMvwlqUGGvyQ1yPCXpAYZ/pLUIMNfkhpk+EtSgwx/SWqQ\n4S9JDTL8JalBhr8kNcjwl6QGGf6S1CDDX5IaZPhLUoMMf0lqkOEvSQ0y/CWpQYa/JDXI8JekBhn+\nktQgw1+SGmT4S1KDBgn/JCcmuTHJpiRrdtDupUkqyaoh6kqSFmbi8E+yDLgQOAk4BjgjyTFj2h0A\nvAa4ZtKakqTJDHHmfyywqao2V9W9wKXAKWPavRl4O/C9AWpKkiYwRPgfCmwZmZ7t5z0gydOAFVX1\nJztaUZJzkmxIsmHr1q0DdG3PMTMzw8zMzFJ3Q1Ijhgj/jJlXDyxM9gJ+B/iVna2oqtZW1aqqWrV8\n+fIBuiZJGmeI8J8FVoxMHwbcMjJ9APAk4KokNwHHAet801eSls4Q4X8tcFSSI5PsC5wOrNu+sKq+\nUVUHVdXKqloJXA2srqoNA9SWJC3AxOFfVduA84ArgRuAy6vquiQXJFk96folScPbe4iVVNV6YP2c\neefP03ZmiJqSpIXzE76S1CDDX5IaZPhLUoMMf0lqkOEvSQ0y/CWpQYa/JDXI8JekBhn+ktQgw1+S\nGmT4S1KDDH9JapDhL0kNMvwlqUGGvyQ1yPCXpAYZ/pLUIMNfkhpk+IuZmRlmZmaWuhuSFpHhL0kN\nMvwlqUGGvyQ1yPCXpAYNEv5JTkxyY5JNSdaMWX5ukr9PsjHJZ5IcM0RdSdLCTBz+SZYBFwInAccA\nZ4wJ90uq6ier6qnA24F3TlpXkrRwQ5z5HwtsqqrNVXUvcClwymiDqvrmyOR+QA1QV5K0QHsPsI5D\ngS0j07PAs+Y2SvKfgF8G9gWeP25FSc4BzgE4/PDDB+iaJGmcIc78M2bej5zZV9WFVfVE4HXAG8et\nqKrWVtWqqlq1fPnyAbomSRpniPCfBVaMTB8G3LKD9pcCpw5QV5K0QEOE/7XAUUmOTLIvcDqwbrRB\nkqNGJl8IfHmAupKkBZp4zL+qtiU5D7gSWAZcVFXXJbkA2FBV64DzkpwA/AC4Gzhz0rqSpIUb4g1f\nqmo9sH7OvPNH7v/SEHUkScPwE76S1CDDX0vGr5KWlo7hL0kNMvwlqUGGvyQ1yPCXpAYZ/pLUIMNf\nkhpk+EtSgwx/SWqQ4S9JDTL8JalBhr8kNcjwl6QGGf6S1CDDX5IaZPhLUoMMf0lqkOEvSQ0y/CWp\nQYa/JDXI8JekBhn+ktSgQcI/yYlJbkyyKcmaMct/Ocn1Sb6Q5FNJjhiiriRpYfaedAVJlgEXAj8N\nzALXJllXVdePNPs7YFVVfSfJLwJvB142ae0H0cnpPqbqwa9fkpbQEGf+xwKbqmpzVd0LXAqcMtqg\nqj5dVd/pJ68GDhugrrRgMzMzzMzMLHU3pCUzRPgfCmwZmZ7t583nbODj4xYkOSfJhiQbtm7dOkDX\npN2LBx3tLoYI/3HjI2PHQZK8AlgFvGPc8qpaW1WrqmrV8uXLB+iaJGmcicf86c70V4xMHwbcMrdR\nkhOANwDPq6rvD1BXkrRAQ5z5XwscleTIJPsCpwPrRhskeRrwbmB1Vd0xQE1J0gQmDv+q2gacB1wJ\n3ABcXlXXJbkgyeq+2TuA/YEPJdmYZN08q5MkLYIhhn2oqvXA+jnzzh+5f8IQdSRJw/ATvlIjvNJI\nowx/SWqQ4S9JDTL8JalBhr+kqfK9ht2T4S9JDTL8JalBhr8kNcjwl6QGGf6S1KBBvt5B8/AXxCTt\npjzzl/RjaSkvMd0TLm81/CWpQYa/JDXI8JekBhn+ktQgw1+SGmT4S1KDDH9JapDhL0kNMvwlqUGG\nvyQ1aJDwT3JikhuTbEqyZszy5yb5XJJtSV46RE1J0sJNHP5JlgEXAicBxwBnJDlmTrOvAmcBl0xa\nT5I0uSG+1fNYYFNVbQZIcilwCnD99gZVdVO/7P4B6kmSJjRE+B8KbBmZngWetZAVJTkHOAfg8MMP\nn7xnLfPrpCXtwBBj/uMSY0HJUFVrq2pVVa1avnz5hN2SJM1niPCfBVaMTB8G3DLAeiVJUzJE+F8L\nHJXkyCT7AqcD6wZYryRpSiYO/6raBpwHXAncAFxeVdcluSDJaoAkz0wyC5wGvDvJdZPWlSQt3CC/\n4VtV64H1c+adP3L/WrrhIEnSbsAfcNewvMpI2iP49Q6S1CDDX5IaZPhLUoMMf0lqkOEvSQ0y/CWp\nQV7qqR8PC7nE9ME+zstM9WPEM39JapDhL0kNMvwlqUGO+UuT8isttAcy/KU9lW9yawIO+0hSgzzz\nl/TgOdS1xzP8Je05pn3QgWYOPA77SFKDPPOXpF3xYzbU5Zm/JDXI8JekBhn+ktQgw1+SGmT4S1KD\nBgn/JCcmuTHJpiRrxix/SJLL+uXXJFk5RF1J0sJMHP5JlgEXAicBxwBnJDlmTrOzgbur6ieA3wHe\nNmldSdLCDXHmfyywqao2V9W9wKXAKXPanAJc3N+/Ajg+Wei3UkmSJjXEh7wOBbaMTM8Cz5qvTVVt\nS/IN4DHA10YbJTkHOAfg8MMPH6BrvQfz4YmZme7fq67ac+suZe09oe5S1nab98y6S117CoY48x93\nBj93L+1KG6pqbVWtqqpVy5cvH6BrkqRxhgj/WWDFyPRhwC3ztUmyN/BI4K4BakuSFmCI8L8WOCrJ\nkUn2BU4H1s1psw44s7//UuDPqxr56jxJ2g1NPObfj+GfB1wJLAMuqqrrklwAbKiqdcD7gA8k2UR3\nxn/6pHUlSQs3yLd6VtV6YP2ceeeP3P8ecNoQtSRJk/MTvpLUIMNfkhpk+EtSgwx/SWqQ4S9JDTL8\nJalBhr8kNcjwl6QGGf6S1CDDX5IaZPhLUoMMf0lqkOEvSQ0y/CWpQYa/JDXI8JekBhn+ktQgw1+S\nGjTIzzhK0u7mqquuWuou7NY885ekBnnmryZ5VqjWGf5zGAqSWmD4S43wxGbx7An7eqLwT/Jo4DJg\nJXAT8HNVdfeYdn8KHAd8pqpeNElNSXuWPSEIWzTpG75rgE9V1VHAp/rpcd4BvHLCWpKkgUwa/qcA\nF/f3LwZOHdeoqj4F3DNhLUnSQCYN/8dV1a0A/b+PnWRlSc5JsiHJhq1bt07YNUnSfHY65p/kz4DH\nj1n0hqE7U1VrgbUAq1atqqHXLy01x7+1u9hp+FfVCfMtS3J7koOr6tYkBwN3DNq7hhgKkhbTpMM+\n64Az+/tnAh+dcH2SpEUw6XX+bwUuT3I28FXgNIAkq4Bzq+rV/fRfAf8C2D/JLHB2VV05YW3t4Xy1\nIy2dicK/qu4Ejh8zfwPw6pHp50xSR5I0LL/YTZIaZPhLUoP8bh859i41yDN/SWqQ4S9JDTL8JalB\nhr8kNcjwl6QGGf6S1CDDX5IaZPhLUoMMf0lqUKp2z99MSbIVuHmJyh8EfK2huktZ221uo3ZrdZey\n9hFVtXxnjXbb8F9KSTZU1apW6i5lbbe5jdqt1V3q2rvCYR9JapDhL0kNMvzHW9tY3aWs7Ta3Ubu1\nuktde6cc85ekBnnmL0kNMvwlqUFNh3+SFUk+neSGJNcl+aV+/mVJNva3m5JsXMTa70jypSRfSPJ/\nkxw4dO2RPhw9sp0bk3wzyWunWG++bX50kk8m+XL/76Om1Ye+3oFJruj38w1Jfmqa9Ubqjt3+xayV\n5M39c2tjkk8kOWRafejrPTTJ3yb5fN+P35hirYuS3JHkiyPzFnV7+5onJrkxyaYka6Zdb8Gqqtkb\ncDDw9P7+AcA/AMfMafPbwPmLVRv4GWDvfv7bgLct0r5YBtxG9wGRRd3fwNuBNf38NdPeZuBi4NX9\n/X2BA3eX59si7OtHjLR5DfCuKW9zgP37+/sA1wDHTanWc4GnA18cmbfY27sM+ArwhP659flp/R9P\nemv6zL+qbq2qz/X37wFuAA7dvjxJgJ8DPrhYtavqE1W1rW92NXDY0LXncTzwlaqa2qeqd7C/T6EL\nZPp/T51WH5I8gi4k3tf3496q+vq06o3a2fNtMWpV1TdHmu0HTPWKj+p8q5/cp79NpWZV/SVw15x5\ni7q9wLHApqraXFX3ApfSPb93O02H/6gkK4Gn0Z2ZbPcc4Paq+vIS1Ab498DHp1l7xOlM4SA3nznb\n/LiquhW60AIeO8XSTwC2An+Y5O+SvDfJflOsN9YO/s+nXivJW5JsAV4OnL8I9Zf1Q6d3AJ+sqqlv\n85z6i7m9hwJbRqZnmdIBflKGP5Bkf+DDwGvnnCmcwZQDcb7aSd4AbAP+aJr1+1r7AquBD027Vl9v\nvv29GPamGxr4g6p6GvBtuqGmRbOY2z+uVlW9oapW0D23zptm/b7efVX1VLpXsccmedK0a86pv5jb\nm3FdmHLNBWk+/JPsQ/fH8UdV9X9G5u8NvAS4bAlqnwm8CHh59QOJU3YS8Lmqun3ahebZ5tuTHNwv\nP5juDHFaZoHZkbPPK+gOBotivv/zJap1CfCz0+zDqH547SrgxMWqOcdibO8ssGJk+jDglinXXJCm\nw78f038fcENVvXPO4hOAL1XV7GLWTnIi8DpgdVV9Zxq1x5j6KxzY4f5eB5zZ3z8T+Oi0+lBVtwFb\nkhzdzzoeuH5a9Ubt5Pm2KLWSHDXSbDXwpSn3Y/n2K9aSPIz+72qaNefUX9TtBa4FjkpyZP+K+nS6\n5/dup+lP+CZ5NvBXwN8D9/ezX19V65O8H7i6qt61mLWB3wMeAtzZz7u6qs6dRh/6fjycbozyCVX1\njWnV6WvNt83XAJcDhwNfBU6rqrvGrmSYfjwVeC/d1RibgVdV1d3TqjdSd97n22LVAs4Gju7n3Qyc\nW1X/NHT9kX48me5N/GV0J5uXV9UFU6r1QWCG7quUbwd+HTiZRdzevh8nA/+Dbpsvqqq3TLPeQjUd\n/pLUqqaHfSSpVYa/JDXI8JekBhn+ktQgw1+SGmT4S1KDDH9JatD/B2TJhsaWure9AAAAAElFTkSu\nQmCC\n",
|
||
"text/plain": [
|
||
"<Figure size 432x288 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"model = embedded_method.rf_importance(X_train=X_train,y_train=y_train,\n",
|
||
" max_depth=10,top_n=10)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"Index(['mean concavity', 'mean concave points', 'worst radius',\n",
|
||
" 'worst perimeter', 'worst concave points'],\n",
|
||
" dtype='object')"
|
||
]
|
||
},
|
||
"execution_count": 11,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# select features whose importance > threshold\n",
|
||
"from sklearn.feature_selection import SelectFromModel\n",
|
||
"\n",
|
||
"# only 5 features have importance > 0.05\n",
|
||
"feature_selection = SelectFromModel(model, threshold=0.05,prefit=True) \n",
|
||
"selected_feat = X_train.columns[(feature_selection.get_support())]\n",
|
||
"selected_feat"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"Index(['mean radius', 'mean perimeter', 'mean area', 'mean concavity',\n",
|
||
" 'mean concave points', 'radius error', 'area error', 'worst radius',\n",
|
||
" 'worst perimeter', 'worst area', 'worst concavity',\n",
|
||
" 'worst concave points'],\n",
|
||
" dtype='object')"
|
||
]
|
||
},
|
||
"execution_count": 12,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# only 12 features have importance > 2 times median\n",
|
||
"feature_selection2 = SelectFromModel(model, threshold='2*median',prefit=True) \n",
|
||
"selected_feat2 = X_train.columns[(feature_selection2.get_support())]\n",
|
||
"selected_feat2"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Gradient Boosted Trees Importance"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Feature ranking:\n",
|
||
"1. feature no:27 feature name:worst concave points (0.694636)\n",
|
||
"2. feature no:23 feature name:worst area (0.131077)\n",
|
||
"3. feature no:4 feature name:mean smoothness (0.033800)\n",
|
||
"4. feature no:8 feature name:mean symmetry (0.018609)\n",
|
||
"5. feature no:22 feature name:worst perimeter (0.015998)\n",
|
||
"6. feature no:21 feature name:worst texture (0.013732)\n",
|
||
"7. feature no:2 feature name:mean perimeter (0.010792)\n",
|
||
"8. feature no:26 feature name:worst concavity (0.010138)\n",
|
||
"9. feature no:17 feature name:concave points error (0.008941)\n",
|
||
"10. feature no:13 feature name:area error (0.008934)\n",
|
||
"11. feature no:0 feature name:mean radius (0.007928)\n",
|
||
"12. feature no:12 feature name:perimeter error (0.006268)\n",
|
||
"13. feature no:18 feature name:symmetry error (0.005472)\n",
|
||
"14. feature no:3 feature name:mean area (0.005069)\n",
|
||
"15. feature no:1 feature name:mean texture (0.005034)\n",
|
||
"16. feature no:10 feature name:radius error (0.004299)\n",
|
||
"17. feature no:16 feature name:concavity error (0.003595)\n",
|
||
"18. feature no:6 feature name:mean concavity (0.003354)\n",
|
||
"19. feature no:19 feature name:fractal dimension error (0.003092)\n",
|
||
"20. feature no:14 feature name:smoothness error (0.002149)\n",
|
||
"21. feature no:29 feature name:worst fractal dimension (0.001952)\n",
|
||
"22. feature no:25 feature name:worst compactness (0.001149)\n",
|
||
"23. feature no:9 feature name:mean fractal dimension (0.000942)\n",
|
||
"24. feature no:11 feature name:texture error (0.000917)\n",
|
||
"25. feature no:15 feature name:compactness error (0.000671)\n",
|
||
"26. feature no:5 feature name:mean compactness (0.000636)\n",
|
||
"27. feature no:20 feature name:worst radius (0.000354)\n",
|
||
"28. feature no:24 feature name:worst smoothness (0.000305)\n",
|
||
"29. feature no:28 feature name:worst symmetry (0.000145)\n",
|
||
"30. feature no:7 feature name:mean concave points (0.000013)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEICAYAAACktLTqAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAF71JREFUeJzt3XuUHHWZxvHvQ0K4r6CMCkkgUQPH\nrKLAEFhFHAE1AU1QQZMVBZc1i2tE1FXiZbMYdA/gbd1jXA3IomgIiC6OGDfoQvSgC2aCkSUJ0TEE\nM4bLcBXkGnj3j6pg0XRPV8/0JfnN8zmnT7qqfl3vWzXpp2uqe7oUEZiZWVp26HQDZmbWfA53M7ME\nOdzNzBLkcDczS5DD3cwsQQ53M7MEOdxtmyPpa5L+udN9mG3PHO4JkbRR0iOSHirc9h3hOnskDTSr\nxzIi4vSIOKedNWuRdLGkz3S6j7Ja3a+kxZLWS3pK0qlVln9I0h2SHpB0kaSdWtWLDc3hnp43R8Tu\nhdvmTjYjaWwn64+EpDGd7mEb9BvgH4EbKxdIeiMwHzgGmAS8CPh0O5uzgojwLZEbsBE4tsayI4Bf\nAveTPUF7CsveA6wDHgQ2AP+Qz98NeAR4Cngov+0LXAx8pvD4HmCgoo+zgJuAx4Cx+eO+BwwCtwJn\nDLEdT69/67qBjwF3AbcDJwDHAb8F7gU+UXjs2cAVwGX59twIvKKw/KXAinw/rAFmVtT9D2AZ8Gdg\nLvAE8Hi+7T/Mx80Hfp+vfy3wlsI6TgWuAz4P3Jdv64zC8ucC/wlszpdfWVj2JmB13tsvgYMKy84C\n/pjXXA8cU2W/1eq33jZ/DfhJvu6fAfuX+L92HXBqxbwlwL8Wpo8B7uj082K03jregG9N/GHWCHdg\nPHBPHog7AK/Pp7vy5ccDLwYEvBZ4GDgkX9ZDIbjzeU+Hb7UxeR+rgYnALnnNVcACYBzZEd0G4I01\ntuPp9efr3pI/dkfgvWQvEEuAPYC/Bh4FXpSPPzsPuBPz8f+UB+yO+a0f+ETex9F5oB1YqPsA8Oq8\n550rtzUfdxLZi9UOwDvIXgj2yZedmtd/LzAGeB9ZkCtf/iOyF5698n5em88/hOzF6/D8cafk+3En\n4EBgE7BvPnYS8OJ6+y6fLrPNDwJH5bW+DFxX4v9atXD/DfCOwvTeQADP6/RzYzTefFomPVdKuj+/\nXZnPOxlYFhHLIuKpiPgJ0EcW9kTEjyLi95H5GXA18JoR9vHvEbEpIh4BDiN7IVkYEY9HxAbgAmB2\nyXU9AXw2Ip4AlpKFxpcj4sGIWEN2NHpQYfyqiLgiH/9FspA+Ir/tDpyb93ENcBUwp/DYH0TEL/L9\n9Gi1ZiLiuxGxOR9zGfA7YFphyG0RcUFEPAl8E9gHeIGkfYAZwOkRcV9EPJHvb8heDL4eETdExJMR\n8U2y33qOAJ4kC96pknaMiI0R8fuS+67MNv8oIn4eEY8BnwT+RtLEkusv2p3sxXGrrff3GMa6bIQc\n7uk5ISL2zG8n5PP2B04qhP79wJFkoYOkGZKul3Rvvuw4sgAdiU2F+/sD+1bU/wTwgpLruicPSshO\nEwHcWVj+CFmwPKt2RDxFdlpn3/y2KZ+31W1kv9lU67sqSe+WtLqwLS/jmfvrjkL9h/O7u5P9JnNv\nRNxXZbX7Ax+p2EcTyY7W+4EzyX4ruUvS0gbeKG9omyPiIbJTXcN5I/4h4K8K01vvPziMddkIOdxH\nh03AJYXQ3zMidouIc/NPM3yP7BzxCyJiT7JzzsofW+1rQ/8M7FqYfmGVMcXHbQJurai/R0QcN+It\nq+7po05JOwATyE6NbAYm5vO22o/sXHa1vp81LWl/st865pGdbtgTuJm/7K+hbAKeK2nPGss+W7GP\ndo2ISwEiYklEHEn2IhDAeTVqVPZfZpuL+2t3svcFhvNG/BrgFYXpVwB3RsQ9w1iXjZDDfXT4NvBm\nSW+UNEbSzvlHHCeQnYfdiew89hZJM4A3FB57J/A8Sc8pzFsNHCfpuZJeSHZUOZRfAX+SdJakXfIe\nXibpsKZt4TMdKumt+Sd1ziQ7vXE9cAPZC9PHJO0oqQd4M9mpnlruJHuPYKvdyAJ0EEDSe8iO3OuK\niNuBHwNflbRX3sNR+eILgNMlHa7MbpKOl7SHpAMlHZ2/ED9K9pvKkzXKVPZbZpuPk3SkpHHAOcAN\nEVH1NxhJ4yTtTPZitmP+f2lrjnwLOE3SVEl7AZ8iO6dvHeBwHwXyJ+osslMhg2RHiR8FdoiIB4Ez\ngMvJPr3xt0Bv4bG3AJcCG/LTBfsCl5C9ebaR7Pz8ZXXqP0kWKK8ke3PzbuBC4DlDPW4EfkD2Rud9\nwLuAt+bntx8HZpKd974b+Crw7nwba/kG2bnu+yVdGRFrgS8A/0sWpC8HftFAb+8iew/hFrI3UM8E\niIg+svPuX8n77id7cxayF99z857vAJ5P9rMs02+ZbV4C/AvZ6ZhDgXcO0f/VZC8urwIW5/ePyrfh\nv4HzgWvJTv3clq/XOmDrO/hmSZB0NvCSiDi5071sDyRdTPZJp091uhdrLh+5m5klyOFuZpYgn5Yx\nM0uQj9zNzBLUsS912nvvvWPSpEmdKm9mtl1atWrV3RHRVW9cx8J90qRJ9PX1daq8mdl2SdJtZcb5\ntIyZWYIc7mZmCXK4m5klyOFuZpagUuEuaXp+3cR+SfOrLP9S/hWoqyX9Nv+6UjMz65C6n5bJryO5\niOzqPQPASkm9+RcoARARHyqM/wBwcAt6NTOzksocuU8D+iNiQ/4Nc0vJvmGwljlk3yJoZmYdUibc\nx/PMq9MM8MyruDwtv5DBZOCaGsvnSuqT1Dc4ONhor2ZmVlKZcK92hZlaX0gzG7iicEm0Zz4oYnFE\ndEdEd1dX3T+waomenh56eno6UtvMrF3K/IXqAIXLcPGXS5ZVMxt4/0ibaojKXN1shI/zl6uZ2Xam\nTLivBKZImkx23cXZZFfreQZJBwJ7kV2hZpu1otMNmJm1Qd3TMhGxhexiwMuBdcDlEbFG0kJJMwtD\n5wBLw98hbGbWcaW+OCwilgHLKuYtqJg+u3ltmZnZSPgvVM3MEuRwNzNLkMPdzCxBDnczswQ53M3M\nEuRwNzNLkMPdzCxBDnczswQ53M3MEuRwNzNLkMPdzCxBDnczswQ53M3MEuRwNzNLkMPdzCxBDncz\nswQ53M3MEuRwNzNLkMPdzCxBDnczswSVCndJ0yWtl9QvaX6NMW+XtFbSGklLmtummZk1Ymy9AZLG\nAIuA1wMDwEpJvRGxtjBmCvBx4NURcZ+k57eqYTMzq6/Mkfs0oD8iNkTE48BSYFbFmPcCiyLiPoCI\nuKu5bZqZWSPKhPt4YFNheiCfV3QAcICkX0i6XtL0aiuSNFdSn6S+wcHB4XVsZmZ1lQl3VZkXFdNj\ngSlADzAHuFDSns96UMTiiOiOiO6urq5GezUzs5LKhPsAMLEwPQHYXGXMDyLiiYi4FVhPFvZmZtYB\nZcJ9JTBF0mRJ44DZQG/FmCuB1wFI2pvsNM2GZjZqZmbl1Q33iNgCzAOWA+uAyyNijaSFkmbmw5YD\n90haC1wLfDQi7mlV02ZmNjRFVJ4+b4/u7u7o6+sb+YpU7S2BJuvQPjIzqyRpVUR01xvnv1A1M0uQ\nw93MLEEOdzOzBDnczcwS5HA3M0uQw93MLEEOdzOzBDnczcwS5HA3M0uQw93MLEEOdzOzBDnczcwS\n5HA3M0uQw93MLEEOdzOzBDnczcwS5HA3M0uQw93MLEEOdzOzBJUKd0nTJa2X1C9pfpXlp0oalLQ6\nv/1981s1M7OyxtYbIGkMsAh4PTAArJTUGxFrK4ZeFhHzWtCjmZk1qMyR+zSgPyI2RMTjwFJgVmvb\nMjOzkSgT7uOBTYXpgXxepbdJuknSFZImVluRpLmS+iT1DQ4ODqNdMzMro0y4q8q8qJj+ITApIg4C\nfgp8s9qKImJxRHRHRHdXV1djnZqZWWllwn0AKB6JTwA2FwdExD0R8Vg+eQFwaHPaMzOz4SgT7iuB\nKZImSxoHzAZ6iwMk7VOYnAmsa16LZmbWqLqflomILZLmAcuBMcBFEbFG0kKgLyJ6gTMkzQS2APcC\np7awZzMzq0MRlafP26O7uzv6+vpGviJVe0ugyTq0j8zMKklaFRHd9cb5L1TNzBLkcDczS5DD3cws\nQQ53M7MEOdzNzBLkcDczS5DD3cwsQQ53M7MEOdzNzBLkcDczS5DD3cwsQQ53M7MEOdzNzBLkcDcz\nS5DD3cwsQQ53M7MEOdzNzBLkcDczS5DD3cwsQaXCXdJ0Sesl9UuaP8S4EyWFpLrX9zMzs9apG+6S\nxgCLgBnAVGCOpKlVxu0BnAHc0OwmzcysMWWO3KcB/RGxISIeB5YCs6qMOwc4H3i0if2ZmdkwlAn3\n8cCmwvRAPu9pkg4GJkbEVU3szczMhqlMuKvKvHh6obQD8CXgI3VXJM2V1Cepb3BwsHyXZmbWkDLh\nPgBMLExPADYXpvcAXgaskLQROALorfamakQsjojuiOju6uoaftdmZjakMuG+EpgiabKkccBsoHfr\nwoh4ICL2johJETEJuB6YGRF9LenYzMzqqhvuEbEFmAcsB9YBl0fEGkkLJc1sdYNmZta4sWUGRcQy\nYFnFvAU1xvaMvC0zMxsJ/4WqmVmCHO5mZglyuJuZJcjhbmaWIIe7mVmCHO5mZglyuJuZJcjhbmaW\nIIe7mVmCHO5mZglyuJuZJcjhbmaWIIe7mVmCHO5mZglyuJuZJcjhbmaWIIe7mVmCHO5mZglyuJuZ\nJcjhbmaWoFLhLmm6pPWS+iXNr7L8dEn/J2m1pOskTW1+q2ZmVlbdcJc0BlgEzACmAnOqhPeSiHh5\nRLwSOB/4YtM7NTOz0socuU8D+iNiQ0Q8DiwFZhUHRMSfCpO7AdG8Fs3MrFFjS4wZD2wqTA8Ah1cO\nkvR+4MPAOODoaiuSNBeYC7Dffvs12quZmZVU5shdVeY968g8IhZFxIuBs4BPVVtRRCyOiO6I6O7q\n6mqsUzMzK61MuA8AEwvTE4DNQ4xfCpwwkqbMzGxkyoT7SmCKpMmSxgGzgd7iAElTCpPHA79rXotm\nZtaouufcI2KLpHnAcmAMcFFErJG0EOiLiF5gnqRjgSeA+4BTWtm0mZkNrcwbqkTEMmBZxbwFhfsf\nbHJfZmY2Av4LVTOzBDnczcwS5HA3M0uQw93MLEEOdzOzBDnczcwS5HA3M0uQw93MLEEOdzOzBDnc\nzcwS5HA3M0uQw93MLEEOdzOzBDnczcwS5HA3M0uQw93MLEEOdzOzBDnczcwS5HA3M0tQqXCXNF3S\nekn9kuZXWf5hSWsl3STpfyTt3/xWzcysrLrhLmkMsAiYAUwF5kiaWjHs10B3RBwEXAGc3+xGzcys\nvDJH7tOA/ojYEBGPA0uBWcUBEXFtRDycT14PTGhum2Zm1ogy4T4e2FSYHsjn1XIa8ONqCyTNldQn\nqW9wcLB8l2Zm1pAy4a4q86LqQOlkoBv4XLXlEbE4Irojorurq6t8l2Zm1pCxJcYMABML0xOAzZWD\nJB0LfBJ4bUQ81pz2zMxsOMocua8EpkiaLGkcMBvoLQ6QdDDwdWBmRNzV/DbNzKwRdcM9IrYA84Dl\nwDrg8ohYI2mhpJn5sM8BuwPflbRaUm+N1ZmZWRuUOS1DRCwDllXMW1C4f2yT+zIzsxHwX6iamSXI\n4W5mliCHu5lZghzuZmYJcribmSXI4W5mliCHu5lZghzuZmYJcribmSXI4W5mliCHu5lZghzuZmYJ\ncribmSXI4d4mPT099PT0dLoNMxslSn3lr9WgalcgbOJjourVDM3M6nK4t8mKTjdgZqOKT8uYmSXI\n4W5mliCHu5lZgkqFu6TpktZL6pc0v8ryoyTdKGmLpBOb36aZmTWibrhLGgMsAmYAU4E5kqZWDPsD\ncCqwpNkNmplZ48p8WmYa0B8RGwAkLQVmAWu3DoiIjfmyp1rQo5mZNajMaZnxwKbC9EA+z8zMtlFl\nwr3aX90M669rJM2V1Cepb3BwcDirMDOzEsqE+wAwsTA9Adg8nGIRsTgiuiOiu6urazirMDOzEsqE\n+0pgiqTJksYBs4He1rZlZmYjUTfcI2ILMA9YDqwDLo+INZIWSpoJIOkwSQPAScDXJa1pZdNmZja0\nUt8tExHLgGUV8xYU7q8kO11jZmbbAP+FqplZghzuZmYJcribmSXI4W5mliCHu5lZghzuZmYJcrib\nmSXI4W5mliCHu5lZghzuZmYJcribmSXI4W5mliCHu5lZgkp9K6RtY1Tt4ljV9eT/rmi0RgzrYltm\nto1wuCduRacbMLOO8GkZM7MEOdzNzBLkcLeW6enpoaenZ9TU7aTRuM02NJ9zt8Y08GbusB5T643c\nTtVt0NaAXbFiRVPWZ7V1al9vLz9jh7u1zIpU6m4nLyydsr2E3WhTKtwlTQe+DIwBLoyIcyuW7wR8\nCzgUuAd4R0RsbG6rZtu+Fc1c2fb0W1Ina28PdYeq3SJ1z7lLGgMsAmYAU4E5kqZWDDsNuC8iXgJ8\nCTiv2Y2a2bZpBf7I7baozJH7NKA/IjYASFoKzALWFsbMAs7O718BfEWSIrbz3zfNbJu1YpTVbVSZ\ncB8PbCpMDwCH1xoTEVskPQA8D7i7OEjSXGAuwH777TfMlit08vWjU7W9zaOjdgN1V3SobtNtB/t6\ne1Hmo5DVTixV7okyY4iIxRHRHRHdXV1dZfozM7NhKBPuA8DEwvQEYHOtMZLGAs8B7m1Gg2Zm1rgy\n4b4SmCJpsqRxwGygt2JML3BKfv9E4Bqfbzcz65y659zzc+jzgOVkH4W8KCLWSFoI9EVEL/AN4BJJ\n/WRH7LNb2bSZmQ2t1OfcI2IZsKxi3oLC/UeBk5rbmpmZDZe/W8bMLEEOdzOzBDnczcwS5HA3M0uQ\nOvWJRUmDwG0dKQ57U/HXs66bXG1v8+ioPdrqAuwfEXX/CrRj4d5Jkvoiott1063tbR4dtUdb3Ub4\ntIyZWYIc7mZmCRqt4b7YdZOv7W0eHbVHW93SRuU5dzOz1I3WI3czs6Q53M3MEpR0uEuaKOlaSesk\nrZH0wXz+ZZJW57eNkla3qe45km7K614tad9m1h2inzGSfi3pqnbUy2t+KN/2myVdKmnnFtaqtb8/\nJ+mWfJ//l6Q921T3pHz6KUkt/7hcrT7aXUvSByStz+ef34LaF0m6S9LNhXktfS4PUbcjz+WGRESy\nN2Af4JD8/h7Ab4GpFWO+ACxoR13grwpjzgC+1qb98GFgCXBVm+qNB24FdsmnLwdObffPGXgDMDaf\nfx5wXpvqvhQ4kOzqd91t2N91/5+3YZtfB/wU2Clf9vwW1D4KOAS4ucbypj+Xa9Xt1HO5kVvSR+4R\ncXtE3JjffxBYRxY8AEgS8Hbg0nbUjYg/FYbtRpVLETabpAnA8cCFra5VYSywS35lrl159tW7mmaI\n/X11RGzJh11PdhWxdtRdFxHrm1lrOH20udb7gHMj4rF82V0tqP1zalzhrVXP5Vp1O/FcblTS4V4k\naRJwMHBDYfZrgDsj4nftqivps5I2Ae8EFtR+ZNP8G/Ax4Kk21AIgIv4IfB74A3A78EBEXN2O2jV+\nzgB/B/y4A3Xbqp19VNQ6AHiNpBsk/UzSYa2uX6Hlz+VKHXguN2RUhLuk3YHvAWdWvOLOoQWv9EPV\njYhPRsRE4DvAvFbVzuu/CbgrIla1sk6VunsBs4DJwL7AbpJObkPdqj9nSZ8EtpDt87bVbbd29lGl\n1lhgL+AI4KPA5fnRdLu09LlcTTufy8ORfLhL2pHsP+F3IuL7hfljgbcCl7WzbsES4G2tqF3wamCm\npI3AUuBoSd9ucU2AY4FbI2IwIp4Avg+8qpUFh/g5nwK8CXhn5CdI21G33drZR41aA8D3I/Mrst8U\n925lH4V+WvpcLqEdz+WGJR3u+ZHDN4B1EfHFisXHArdExEC76kqaUhg2E7il2bWLIuLjETEhIiaR\nXdf2moho+RE02emYIyTtmu+LY8jOzbbEEPt7OnAWMDMiHm5X3XZrZx9D1LoSODofcwAwjvZ9a2LL\nnsu1tPu5PCydfke3lTfgSLI3Om4CVue34/JlFwOnt7Mu2dHOzfn8H5K9+daufdFDmz4tk9f7NNl/\n+JuBS8g/RdHm/d0PbCrMa+onGoao+xayI9nHgDuB5S3e1zX/n7dxX48Dvp3/vG8Ejm5B7UvJ3sN5\nIt+/p+XzW/ZcrlW3k8/lsjd//YCZWYKSPi1jZjZaOdzNzBLkcDczS5DD3cwsQQ53M7MEOdzNzBLk\ncDczS9D/A7FX/jyOxZ4EAAAAAElFTkSuQmCC\n",
|
||
"text/plain": [
|
||
"<Figure size 432x288 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"model = embedded_method.gbt_importance(X_train=X_train,y_train=y_train,\n",
|
||
" max_depth=10,top_n=10)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 16,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"Index(['mean perimeter', 'mean smoothness', 'mean symmetry', 'worst texture',\n",
|
||
" 'worst perimeter', 'worst area', 'worst concavity',\n",
|
||
" 'worst concave points'],\n",
|
||
" dtype='object')"
|
||
]
|
||
},
|
||
"execution_count": 16,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# select features whose importance > threshold\n",
|
||
"\n",
|
||
"# only 8 features have importance > 0.01\n",
|
||
"feature_selection = SelectFromModel(model, threshold=0.01,prefit=True) \n",
|
||
"selected_feat = X_train.columns[(feature_selection.get_support())]\n",
|
||
"selected_feat"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.6.1"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|