Files
Amazing-Feature-Engineering/4.3_Demo_Feature_Selection_Embedded.ipynb
DESKTOP-SAT83DL\yimeng.zhang 149c80cedf 2018.12.2 First commit.
2018-12-02 21:11:32 +08:00

680 lines
39 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"# import seaborn as sns\n",
"# import matplotlib.pyplot as plt\n",
"import os\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.linear_model import Lasso, LogisticRegression\n",
"from sklearn.preprocessing import RobustScaler\n",
"from sklearn.feature_selection import SelectFromModel\n",
"from feature_selection import embedded_method\n",
"# plt.style.use('seaborn-colorblind')\n",
"# %matplotlib inline\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load Dataset"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.datasets import load_breast_cancer\n",
"data = load_breast_cancer()\n",
"data = pd.DataFrame(np.c_[data['data'], data['target']],\n",
" columns= np.append(data['feature_names'], ['target']))"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>mean radius</th>\n",
" <th>mean texture</th>\n",
" <th>mean perimeter</th>\n",
" <th>mean area</th>\n",
" <th>mean smoothness</th>\n",
" <th>mean compactness</th>\n",
" <th>mean concavity</th>\n",
" <th>mean concave points</th>\n",
" <th>mean symmetry</th>\n",
" <th>mean fractal dimension</th>\n",
" <th>...</th>\n",
" <th>worst texture</th>\n",
" <th>worst perimeter</th>\n",
" <th>worst area</th>\n",
" <th>worst smoothness</th>\n",
" <th>worst compactness</th>\n",
" <th>worst concavity</th>\n",
" <th>worst concave points</th>\n",
" <th>worst symmetry</th>\n",
" <th>worst fractal dimension</th>\n",
" <th>target</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>17.99</td>\n",
" <td>10.38</td>\n",
" <td>122.80</td>\n",
" <td>1001.0</td>\n",
" <td>0.11840</td>\n",
" <td>0.27760</td>\n",
" <td>0.3001</td>\n",
" <td>0.14710</td>\n",
" <td>0.2419</td>\n",
" <td>0.07871</td>\n",
" <td>...</td>\n",
" <td>17.33</td>\n",
" <td>184.60</td>\n",
" <td>2019.0</td>\n",
" <td>0.1622</td>\n",
" <td>0.6656</td>\n",
" <td>0.7119</td>\n",
" <td>0.2654</td>\n",
" <td>0.4601</td>\n",
" <td>0.11890</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>20.57</td>\n",
" <td>17.77</td>\n",
" <td>132.90</td>\n",
" <td>1326.0</td>\n",
" <td>0.08474</td>\n",
" <td>0.07864</td>\n",
" <td>0.0869</td>\n",
" <td>0.07017</td>\n",
" <td>0.1812</td>\n",
" <td>0.05667</td>\n",
" <td>...</td>\n",
" <td>23.41</td>\n",
" <td>158.80</td>\n",
" <td>1956.0</td>\n",
" <td>0.1238</td>\n",
" <td>0.1866</td>\n",
" <td>0.2416</td>\n",
" <td>0.1860</td>\n",
" <td>0.2750</td>\n",
" <td>0.08902</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>19.69</td>\n",
" <td>21.25</td>\n",
" <td>130.00</td>\n",
" <td>1203.0</td>\n",
" <td>0.10960</td>\n",
" <td>0.15990</td>\n",
" <td>0.1974</td>\n",
" <td>0.12790</td>\n",
" <td>0.2069</td>\n",
" <td>0.05999</td>\n",
" <td>...</td>\n",
" <td>25.53</td>\n",
" <td>152.50</td>\n",
" <td>1709.0</td>\n",
" <td>0.1444</td>\n",
" <td>0.4245</td>\n",
" <td>0.4504</td>\n",
" <td>0.2430</td>\n",
" <td>0.3613</td>\n",
" <td>0.08758</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11.42</td>\n",
" <td>20.38</td>\n",
" <td>77.58</td>\n",
" <td>386.1</td>\n",
" <td>0.14250</td>\n",
" <td>0.28390</td>\n",
" <td>0.2414</td>\n",
" <td>0.10520</td>\n",
" <td>0.2597</td>\n",
" <td>0.09744</td>\n",
" <td>...</td>\n",
" <td>26.50</td>\n",
" <td>98.87</td>\n",
" <td>567.7</td>\n",
" <td>0.2098</td>\n",
" <td>0.8663</td>\n",
" <td>0.6869</td>\n",
" <td>0.2575</td>\n",
" <td>0.6638</td>\n",
" <td>0.17300</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>20.29</td>\n",
" <td>14.34</td>\n",
" <td>135.10</td>\n",
" <td>1297.0</td>\n",
" <td>0.10030</td>\n",
" <td>0.13280</td>\n",
" <td>0.1980</td>\n",
" <td>0.10430</td>\n",
" <td>0.1809</td>\n",
" <td>0.05883</td>\n",
" <td>...</td>\n",
" <td>16.67</td>\n",
" <td>152.20</td>\n",
" <td>1575.0</td>\n",
" <td>0.1374</td>\n",
" <td>0.2050</td>\n",
" <td>0.4000</td>\n",
" <td>0.1625</td>\n",
" <td>0.2364</td>\n",
" <td>0.07678</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 31 columns</p>\n",
"</div>"
],
"text/plain": [
" mean radius mean texture mean perimeter mean area mean smoothness \\\n",
"0 17.99 10.38 122.80 1001.0 0.11840 \n",
"1 20.57 17.77 132.90 1326.0 0.08474 \n",
"2 19.69 21.25 130.00 1203.0 0.10960 \n",
"3 11.42 20.38 77.58 386.1 0.14250 \n",
"4 20.29 14.34 135.10 1297.0 0.10030 \n",
"\n",
" mean compactness mean concavity mean concave points mean symmetry \\\n",
"0 0.27760 0.3001 0.14710 0.2419 \n",
"1 0.07864 0.0869 0.07017 0.1812 \n",
"2 0.15990 0.1974 0.12790 0.2069 \n",
"3 0.28390 0.2414 0.10520 0.2597 \n",
"4 0.13280 0.1980 0.10430 0.1809 \n",
"\n",
" mean fractal dimension ... worst texture worst perimeter worst area \\\n",
"0 0.07871 ... 17.33 184.60 2019.0 \n",
"1 0.05667 ... 23.41 158.80 1956.0 \n",
"2 0.05999 ... 25.53 152.50 1709.0 \n",
"3 0.09744 ... 26.50 98.87 567.7 \n",
"4 0.05883 ... 16.67 152.20 1575.0 \n",
"\n",
" worst smoothness worst compactness worst concavity worst concave points \\\n",
"0 0.1622 0.6656 0.7119 0.2654 \n",
"1 0.1238 0.1866 0.2416 0.1860 \n",
"2 0.1444 0.4245 0.4504 0.2430 \n",
"3 0.2098 0.8663 0.6869 0.2575 \n",
"4 0.1374 0.2050 0.4000 0.1625 \n",
"\n",
" worst symmetry worst fractal dimension target \n",
"0 0.4601 0.11890 0.0 \n",
"1 0.2750 0.08902 0.0 \n",
"2 0.3613 0.08758 0.0 \n",
"3 0.6638 0.17300 0.0 \n",
"4 0.2364 0.07678 0.0 \n",
"\n",
"[5 rows x 31 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((455, 30), (114, 30))"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(data.drop(labels=['target'], axis=1), \n",
" data.target, test_size=0.2,\n",
" random_state=0)\n",
"X_train.shape, X_test.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Lasso\n",
"Regularization with Lasso.Lasso (L1) has the property that is able to shrink some of the coefficients to zero. Therefore, that feature can be removed from the model"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,\n",
" with_scaling=True)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# linear models benefit from feature scaling\n",
"\n",
"scaler = RobustScaler()\n",
"scaler.fit(X_train)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
" FutureWarning)\n"
]
},
{
"data": {
"text/plain": [
"SelectFromModel(estimator=LogisticRegression(C=1, class_weight=None, dual=False, fit_intercept=True,\n",
" intercept_scaling=1, max_iter=100, multi_class='warn',\n",
" n_jobs=None, penalty='l1', random_state=None, solver='warn',\n",
" tol=0.0001, verbose=0, warm_start=False),\n",
" max_features=None, norm_order=1, prefit=False, threshold=None)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# fit the LR model\n",
"sel_ = SelectFromModel(LogisticRegression(C=1, penalty='l1'))\n",
"sel_.fit(scaler.transform(X_train), y_train)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"total features: 30\n",
"selected features: 14\n",
"features with coefficients shrank to zero: 16\n"
]
}
],
"source": [
"# make a list with the selected features\n",
"selected_feat = X_train.columns[(sel_.get_support())]\n",
"\n",
"print('total features: {}'.format((X_train.shape[1])))\n",
"print('selected features: {}'.format(len(selected_feat)))\n",
"print('features with coefficients shrank to zero: {}'.format(\n",
" np.sum(sel_.estimator_.coef_ == 0)))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['mean radius', 'mean perimeter', 'mean area', 'mean smoothness',\n",
" 'mean compactness', 'mean concavity', 'mean fractal dimension',\n",
" 'texture error', 'perimeter error', 'smoothness error',\n",
" 'concavity error', 'concave points error', 'symmetry error',\n",
" 'worst radius', 'worst perimeter', 'worst compactness'],\n",
" dtype='object')"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# we can identify the removed features like this:\n",
"removed_feats = X_train.columns[(sel_.estimator_.coef_ == 0).ravel().tolist()]\n",
"removed_feats"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((455, 14), (114, 14))"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# remove the features from the training and testing set\n",
"\n",
"X_train_selected = sel_.transform(X_train.fillna(0))\n",
"X_test_selected = sel_.transform(X_test.fillna(0))\n",
"\n",
"X_train_selected.shape, X_test_selected.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Random Forest Importance\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Feature ranking:\n",
"1. feature no:27 feature name:worst concave points (0.206316)\n",
"2. feature no:22 feature name:worst perimeter (0.147163)\n",
"3. feature no:7 feature name:mean concave points (0.100672)\n",
"4. feature no:20 feature name:worst radius (0.082449)\n",
"5. feature no:6 feature name:mean concavity (0.060420)\n",
"6. feature no:2 feature name:mean perimeter (0.048284)\n",
"7. feature no:23 feature name:worst area (0.046151)\n",
"8. feature no:3 feature name:mean area (0.038594)\n",
"9. feature no:13 feature name:area error (0.035627)\n",
"10. feature no:0 feature name:mean radius (0.030476)\n",
"11. feature no:10 feature name:radius error (0.028711)\n",
"12. feature no:26 feature name:worst concavity (0.028533)\n",
"13. feature no:12 feature name:perimeter error (0.019986)\n",
"14. feature no:21 feature name:worst texture (0.018623)\n",
"15. feature no:1 feature name:mean texture (0.013840)\n",
"16. feature no:25 feature name:worst compactness (0.013195)\n",
"17. feature no:29 feature name:worst fractal dimension (0.011840)\n",
"18. feature no:24 feature name:worst smoothness (0.008988)\n",
"19. feature no:28 feature name:worst symmetry (0.008973)\n",
"20. feature no:18 feature name:symmetry error (0.007378)\n",
"21. feature no:11 feature name:texture error (0.006736)\n",
"22. feature no:15 feature name:compactness error (0.005464)\n",
"23. feature no:19 feature name:fractal dimension error (0.005117)\n",
"24. feature no:16 feature name:concavity error (0.004957)\n",
"25. feature no:8 feature name:mean symmetry (0.004660)\n",
"26. feature no:4 feature name:mean smoothness (0.004614)\n",
"27. feature no:9 feature name:mean fractal dimension (0.003689)\n",
"28. feature no:17 feature name:concave points error (0.002993)\n",
"29. feature no:5 feature name:mean compactness (0.002844)\n",
"30. feature no:14 feature name:smoothness error (0.002706)\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEICAYAAAC3Y/QeAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAFvpJREFUeJzt3X20JHV95/H3hwF8ABSVUXkYGDQc\ndlnj44hk14d7hERAHVgjEVYNuBiW7HKMm+zGibrEYMzxYWM2OUuioxJZNwiIuzqJY9AYSWKyEEYz\nGgGJ4wScG55GQMVHHPjuH1VD2mvfmeF29b0z/t6vc/pMV9Wv6/urmr6fqv51dXeqCklSW/Za6g5I\nkhaf4S9JDTL8JalBhr8kNcjwl6QGGf6S1CDDX3ucJO9K8t+Wuh/Snszwb0iSm5J8N8m3Rm6HTLjO\nmSSzQ/VxV1TVuVX15sWsOZ8k70/ym0vdj1017f4mWZvkxiT3JzlrzPL/nOS2JN9IclGSh0yrL9ox\nw789L66q/UdutyxlZ5LsvZT1J5Fk2VL3YTf0eeA/Ap+buyDJC4A1wPHASuAJwG8sZuc0oqq8NXID\nbgJOmGfZccDfAF+n+wOeGVn2KuAG4B5gM/Af+vn7Ad8F7ge+1d8OAd4P/ObI42eA2Tn9eB3wBeD7\nwN794z4MbAX+EXjNDrbjgfVvXzfwq8AdwK3AqcDJwD8AdwGvH3nsm4ArgMv67fkc8JSR5f8SuKrf\nD9cBq+fU/QNgPfBt4BzgB8C9/bb/cd9uDfCVfv3XA/92ZB1nAZ8B/jtwd7+tJ40sfzTwh8At/fKP\njCx7EbCx79vfAE8eWfY64J/6mjcCx4/Zb/P1d2fb/C7gk/26/wI4Yheea58Bzpoz7xLgt0amjwdu\nW+q/i1ZvS94Bb4v4nz1P+AOHAnf2gbkX8NP99PJ++QuBJwIBngd8B3h6v2yGkWDv5z0QzuPa9P3Y\nCKwAHtbX/CxwPrAv3RnhZuAF82zHA+vv172tf+w+wC/QHUAuAQ4A/hXwPeAJffs39QH40r79f+kD\neJ/+tgl4fd+P5/eBd/RI3W8A/6bv80Pnbmvf7jS6g9lewMvoDhQH98vO6uv/ArAM+EW6oE+//GN0\nB6ZH9f15Xj//6XQHt2f1jzuz348PAY4GtgCH9G1XAk/c2b7rp3dlm+8BntvX+l3gM7vwXBsX/p8H\nXjYyfRBQwGOW+m+jxZvDPu35SJKv97eP9PNeAayvqvVVdX9VfRLYQHcwoKo+VlVfqc5fAJ8AnjNh\nP36vqrZU1XeBZ9IdaC6oqnurajPwHuD0XVzXD4C3VNUPgEvpQuV3q+qeqrqO7mz2ySPtP1tVV/Tt\n30kX4sf1t/2Bt/b9+HPgT4AzRh770ar6634/fW9cZ6rqQ1V1S9/mMuDLwLEjTW6uqvdU1X3AxcDB\nwOOSHAycBJxbVXdX1Q/6/Q3dweLdVXVNVd1XVRfTvWo6DriPLpiPSbJPVd1UVV/ZxX23K9v8sar6\ny6r6PvAG4KeSrNjF9Y/an+7gud32+wcsYF2akOHfnlOr6sD+dmo/7wjgtJGDwteBZ9OFEklOSnJ1\nkrv6ZSfTBewktozcPwI4ZE791wOP28V13dkHKXTDUAC3jyz/Ll3w/EjtqrqfbtjokP62pZ+33c10\nr4zG9XusJD+fZOPItjyJH95ft43U/05/d3+6V0J3VdXdY1Z7BPArc/bRCrqz/U3Aa+le1dyR5NIH\n8Ub+g9rmqvoW3VDaQi4U+BbwiJHp7ffvWcC6NCHDX9D9cX9g5KBwYFXtV1Vv7a/G+DDdGPXjqupA\nujHv9I8d97Ww3wYePjL9+DFtRh+3BfjHOfUPqKqTJ96y8R44a02yF3AY3dDLLcCKft52h9ONpY/r\n949MJzmC7lXLeXTDGQcCX+Sf99eObAEeneTAeZa9Zc4+enhVfRCgqi6pqmfTHSQKeNs8Neb2f1e2\neXR/7U/3vsRCLhS4DnjKyPRTgNur6s4FrEsTMvwF8L+BFyd5QZJlSR7aX8J5GN048EPoxtG3JTkJ\n+JmRx94OPCbJI0fmbQROTvLoJI+nOyvdkb8FvpnkdUke1vfhSUmeOdgW/rBnJHlJf6XRa+mGT64G\nrqE7cP1qkn2SzAAvphtKms/tdO9RbLcfXcBuBUjyKroz/52qqluBjwO/n+RRfR+e2y9+D3Bukmel\ns1+SFyY5IMnRSZ7fH6i/R/dK5755yszt765s88lJnp1kX+DNwDVVNfYVUJJ9kzyU7mC3T/9c2p4z\n/ws4O8kxSR4FvJHuPQUtAcNf9H/Ip9ANtWylO8v8r8BeVXUP8BrgcrqrT/4dsG7ksV8CPghs7ocj\nDgE+QPfm3k107w9ctpP699EFzlPp3nz9GvBe4JE7etwEPkr3RuzdwCuBl/Tj6/cCq+nG3b8G/D7w\n8/02zud9dGPtX0/ykaq6Hvht4P/RBe1PAn/9IPr2Srr3ML5E9wbvawGqagPduP//7Pu9ie7NY+gO\nzm/t+3wb8Fi6/8td6e+ubPMlwK/TDfc8A3j5Dvr/CbqDz78G1vb3n9tvw58Cbwc+TTe0dHO/Xi2B\n7VcYSE1I8ibgJ6rqFUvdlz1BkvfTXan1xqXui4blmb8kNcjwl6QGOewjSQ3yzF+SGrTbfqnWQQcd\nVCtXrlzqbkjSHuWzn/3s16pq+c7a7bbhv3LlSjZs2LDU3ZCkPUqSm3elncM+ktQgw1+SGmT4S1KD\nDH9JatAg4Z/kxP53OzclWTNm+VlJtvZfc7sxyauHqCtJWpiJr/bpf8f0Qrpff5oFrk2yrv+Cq1GX\nVdV5k9aTJE1uiDP/Y4FNVbW5/4bAS+m+IVKStJsaIvwP5Yd/3WiWH/4VoO1+NskXklwx30/AJTkn\nyYYkG7Zu3TpA1yRJ4wwR/uN+oWjuFwb9MbCyqp4M/Bnd75b+6IOq1lbVqqpatXz5Tj+gNhUzMzPM\nzMwsSW1JWixDhP8sIz/zxj//JN4DqurO/sefoftFomcMUFeStEBDhP+1wFFJjux/5u10Rn7pCSDJ\nwSOTq4EbBqgrSVqgia/2qaptSc4DrgSWARdV1XVJLgA2VNU64DVJVgPb6H4K7qxJ60qSFm6QL3ar\nqvXA+jnzzh+5/2vArw1RS5I0OT/hK0kNMvwlqUGGvyQ1yPCXpAYZ/pLUIMNfkhpk+EtSgwx/SWqQ\n4S9JDTL8JalBhr8kNcjwl6QGGf6S1CDDX5IaZPhLUoMMf0lqkOEvSQ0y/CWpQYa/JDXI8JekBhn+\nktQgw1+SGmT4S1KDBgn/JCcmuTHJpiRrdtDupUkqyaoh6kqSFmbi8E+yDLgQOAk4BjgjyTFj2h0A\nvAa4ZtKakqTJDHHmfyywqao2V9W9wKXAKWPavRl4O/C9AWpKkiYwRPgfCmwZmZ7t5z0gydOAFVX1\nJztaUZJzkmxIsmHr1q0DdG3PMTMzw8zMzFJ3Q1Ijhgj/jJlXDyxM9gJ+B/iVna2oqtZW1aqqWrV8\n+fIBuiZJGmeI8J8FVoxMHwbcMjJ9APAk4KokNwHHAet801eSls4Q4X8tcFSSI5PsC5wOrNu+sKq+\nUVUHVdXKqloJXA2srqoNA9SWJC3AxOFfVduA84ArgRuAy6vquiQXJFk96folScPbe4iVVNV6YP2c\neefP03ZmiJqSpIXzE76S1CDDX5IaZPhLUoMMf0lqkOEvSQ0y/CWpQYa/JDXI8JekBhn+ktQgw1+S\nGmT4S1KDDH9JapDhL0kNMvwlqUGGvyQ1yPCXpAYZ/pLUIMNfkhpk+IuZmRlmZmaWuhuSFpHhL0kN\nMvwlqUGGvyQ1yPCXpAYNEv5JTkxyY5JNSdaMWX5ukr9PsjHJZ5IcM0RdSdLCTBz+SZYBFwInAccA\nZ4wJ90uq6ier6qnA24F3TlpXkrRwQ5z5HwtsqqrNVXUvcClwymiDqvrmyOR+QA1QV5K0QHsPsI5D\ngS0j07PAs+Y2SvKfgF8G9gWeP25FSc4BzgE4/PDDB+iaJGmcIc78M2bej5zZV9WFVfVE4HXAG8et\nqKrWVtWqqlq1fPnyAbomSRpniPCfBVaMTB8G3LKD9pcCpw5QV5K0QEOE/7XAUUmOTLIvcDqwbrRB\nkqNGJl8IfHmAupKkBZp4zL+qtiU5D7gSWAZcVFXXJbkA2FBV64DzkpwA/AC4Gzhz0rqSpIUb4g1f\nqmo9sH7OvPNH7v/SEHUkScPwE76S1CDDX0vGr5KWlo7hL0kNMvwlqUGGvyQ1yPCXpAYZ/pLUIMNf\nkhpk+EtSgwx/SWqQ4S9JDTL8JalBhr8kNcjwl6QGGf6S1CDDX5IaZPhLUoMMf0lqkOEvSQ0y/CWp\nQYa/JDXI8JekBhn+ktSgQcI/yYlJbkyyKcmaMct/Ocn1Sb6Q5FNJjhiiriRpYfaedAVJlgEXAj8N\nzALXJllXVdePNPs7YFVVfSfJLwJvB142ae0H0cnpPqbqwa9fkpbQEGf+xwKbqmpzVd0LXAqcMtqg\nqj5dVd/pJ68GDhugrrRgMzMzzMzMLHU3pCUzRPgfCmwZmZ7t583nbODj4xYkOSfJhiQbtm7dOkDX\npN2LBx3tLoYI/3HjI2PHQZK8AlgFvGPc8qpaW1WrqmrV8uXLB+iaJGmcicf86c70V4xMHwbcMrdR\nkhOANwDPq6rvD1BXkrRAQ5z5XwscleTIJPsCpwPrRhskeRrwbmB1Vd0xQE1J0gQmDv+q2gacB1wJ\n3ABcXlXXJbkgyeq+2TuA/YEPJdmYZN08q5MkLYIhhn2oqvXA+jnzzh+5f8IQdSRJw/ATvlIjvNJI\nowx/SWqQ4S9JDTL8JalBhr+kqfK9ht2T4S9JDTL8JalBhr8kNcjwl6QGGf6S1KBBvt5B8/AXxCTt\npjzzl/RjaSkvMd0TLm81/CWpQYa/JDXI8JekBhn+ktQgw1+SGmT4S1KDDH9JapDhL0kNMvwlqUGG\nvyQ1aJDwT3JikhuTbEqyZszy5yb5XJJtSV46RE1J0sJNHP5JlgEXAicBxwBnJDlmTrOvAmcBl0xa\nT5I0uSG+1fNYYFNVbQZIcilwCnD99gZVdVO/7P4B6kmSJjRE+B8KbBmZngWetZAVJTkHOAfg8MMP\nn7xnLfPrpCXtwBBj/uMSY0HJUFVrq2pVVa1avnz5hN2SJM1niPCfBVaMTB8G3DLAeiVJUzJE+F8L\nHJXkyCT7AqcD6wZYryRpSiYO/6raBpwHXAncAFxeVdcluSDJaoAkz0wyC5wGvDvJdZPWlSQt3CC/\n4VtV64H1c+adP3L/WrrhIEnSbsAfcNewvMpI2iP49Q6S1CDDX5IaZPhLUoMMf0lqkOEvSQ0y/CWp\nQV7qqR8PC7nE9ME+zstM9WPEM39JapDhL0kNMvwlqUGO+UuT8isttAcy/KU9lW9yawIO+0hSgzzz\nl/TgOdS1xzP8Je05pn3QgWYOPA77SFKDPPOXpF3xYzbU5Zm/JDXI8JekBhn+ktQgw1+SGmT4S1KD\nBgn/JCcmuTHJpiRrxix/SJLL+uXXJFk5RF1J0sJMHP5JlgEXAicBxwBnJDlmTrOzgbur6ieA3wHe\nNmldSdLCDXHmfyywqao2V9W9wKXAKXPanAJc3N+/Ajg+Wei3UkmSJjXEh7wOBbaMTM8Cz5qvTVVt\nS/IN4DHA10YbJTkHOAfg8MMPH6BrvQfz4YmZme7fq67ac+suZe09oe5S1nab98y6S117CoY48x93\nBj93L+1KG6pqbVWtqqpVy5cvH6BrkqRxhgj/WWDFyPRhwC3ztUmyN/BI4K4BakuSFmCI8L8WOCrJ\nkUn2BU4H1s1psw44s7//UuDPqxr56jxJ2g1NPObfj+GfB1wJLAMuqqrrklwAbKiqdcD7gA8k2UR3\nxn/6pHUlSQs3yLd6VtV6YP2ceeeP3P8ecNoQtSRJk/MTvpLUIMNfkhpk+EtSgwx/SWqQ4S9JDTL8\nJalBhr8kNcjwl6QGGf6S1CDDX5IaZPhLUoMMf0lqkOEvSQ0y/CWpQYa/JDXI8JekBhn+ktQgw1+S\nGjTIzzhK0u7mqquuWuou7NY885ekBnnmryZ5VqjWGf5zGAqSWmD4S43wxGbx7An7eqLwT/Jo4DJg\nJXAT8HNVdfeYdn8KHAd8pqpeNElNSXuWPSEIWzTpG75rgE9V1VHAp/rpcd4BvHLCWpKkgUwa/qcA\nF/f3LwZOHdeoqj4F3DNhLUnSQCYN/8dV1a0A/b+PnWRlSc5JsiHJhq1bt07YNUnSfHY65p/kz4DH\nj1n0hqE7U1VrgbUAq1atqqHXLy01x7+1u9hp+FfVCfMtS3J7koOr6tYkBwN3DNq7hhgKkhbTpMM+\n64Az+/tnAh+dcH2SpEUw6XX+bwUuT3I28FXgNIAkq4Bzq+rV/fRfAf8C2D/JLHB2VV05YW3t4Xy1\nIy2dicK/qu4Ejh8zfwPw6pHp50xSR5I0LL/YTZIaZPhLUoP8bh859i41yDN/SWqQ4S9JDTL8JalB\nhr8kNcjwl6QGGf6S1CDDX5IaZPhLUoMMf0lqUKp2z99MSbIVuHmJyh8EfK2huktZ221uo3ZrdZey\n9hFVtXxnjXbb8F9KSTZU1apW6i5lbbe5jdqt1V3q2rvCYR9JapDhL0kNMvzHW9tY3aWs7Ta3Ubu1\nuktde6cc85ekBnnmL0kNMvwlqUFNh3+SFUk+neSGJNcl+aV+/mVJNva3m5JsXMTa70jypSRfSPJ/\nkxw4dO2RPhw9sp0bk3wzyWunWG++bX50kk8m+XL/76Om1Ye+3oFJruj38w1Jfmqa9Ubqjt3+xayV\n5M39c2tjkk8kOWRafejrPTTJ3yb5fN+P35hirYuS3JHkiyPzFnV7+5onJrkxyaYka6Zdb8Gqqtkb\ncDDw9P7+AcA/AMfMafPbwPmLVRv4GWDvfv7bgLct0r5YBtxG9wGRRd3fwNuBNf38NdPeZuBi4NX9\n/X2BA3eX59si7OtHjLR5DfCuKW9zgP37+/sA1wDHTanWc4GnA18cmbfY27sM+ArwhP659flp/R9P\nemv6zL+qbq2qz/X37wFuAA7dvjxJgJ8DPrhYtavqE1W1rW92NXDY0LXncTzwlaqa2qeqd7C/T6EL\nZPp/T51WH5I8gi4k3tf3496q+vq06o3a2fNtMWpV1TdHmu0HTPWKj+p8q5/cp79NpWZV/SVw15x5\ni7q9wLHApqraXFX3ApfSPb93O02H/6gkK4Gn0Z2ZbPcc4Paq+vIS1Ab498DHp1l7xOlM4SA3nznb\n/LiquhW60AIeO8XSTwC2An+Y5O+SvDfJflOsN9YO/s+nXivJW5JsAV4OnL8I9Zf1Q6d3AJ+sqqlv\n85z6i7m9hwJbRqZnmdIBflKGP5Bkf+DDwGvnnCmcwZQDcb7aSd4AbAP+aJr1+1r7AquBD027Vl9v\nvv29GPamGxr4g6p6GvBtuqGmRbOY2z+uVlW9oapW0D23zptm/b7efVX1VLpXsccmedK0a86pv5jb\nm3FdmHLNBWk+/JPsQ/fH8UdV9X9G5u8NvAS4bAlqnwm8CHh59QOJU3YS8Lmqun3ahebZ5tuTHNwv\nP5juDHFaZoHZkbPPK+gOBotivv/zJap1CfCz0+zDqH547SrgxMWqOcdibO8ssGJk+jDglinXXJCm\nw78f038fcENVvXPO4hOAL1XV7GLWTnIi8DpgdVV9Zxq1x5j6KxzY4f5eB5zZ3z8T+Oi0+lBVtwFb\nkhzdzzoeuH5a9Ubt5Pm2KLWSHDXSbDXwpSn3Y/n2K9aSPIz+72qaNefUX9TtBa4FjkpyZP+K+nS6\n5/dup+lP+CZ5NvBXwN8D9/ezX19V65O8H7i6qt61mLWB3wMeAtzZz7u6qs6dRh/6fjycbozyCVX1\njWnV6WvNt83XAJcDhwNfBU6rqrvGrmSYfjwVeC/d1RibgVdV1d3TqjdSd97n22LVAs4Gju7n3Qyc\nW1X/NHT9kX48me5N/GV0J5uXV9UFU6r1QWCG7quUbwd+HTiZRdzevh8nA/+Dbpsvqqq3TLPeQjUd\n/pLUqqaHfSSpVYa/JDXI8JekBhn+ktQgw1+SGmT4S1KDDH9JatD/B2TJhsaWure9AAAAAElFTkSu\nQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"model = embedded_method.rf_importance(X_train=X_train,y_train=y_train,\n",
" max_depth=10,top_n=10)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['mean concavity', 'mean concave points', 'worst radius',\n",
" 'worst perimeter', 'worst concave points'],\n",
" dtype='object')"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# select features whose importance > threshold\n",
"from sklearn.feature_selection import SelectFromModel\n",
"\n",
"# only 5 features have importance > 0.05\n",
"feature_selection = SelectFromModel(model, threshold=0.05,prefit=True) \n",
"selected_feat = X_train.columns[(feature_selection.get_support())]\n",
"selected_feat"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['mean radius', 'mean perimeter', 'mean area', 'mean concavity',\n",
" 'mean concave points', 'radius error', 'area error', 'worst radius',\n",
" 'worst perimeter', 'worst area', 'worst concavity',\n",
" 'worst concave points'],\n",
" dtype='object')"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# only 12 features have importance > 2 times median\n",
"feature_selection2 = SelectFromModel(model, threshold='2*median',prefit=True) \n",
"selected_feat2 = X_train.columns[(feature_selection2.get_support())]\n",
"selected_feat2"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Gradient Boosted Trees Importance"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Feature ranking:\n",
"1. feature no:27 feature name:worst concave points (0.694636)\n",
"2. feature no:23 feature name:worst area (0.131077)\n",
"3. feature no:4 feature name:mean smoothness (0.033800)\n",
"4. feature no:8 feature name:mean symmetry (0.018609)\n",
"5. feature no:22 feature name:worst perimeter (0.015998)\n",
"6. feature no:21 feature name:worst texture (0.013732)\n",
"7. feature no:2 feature name:mean perimeter (0.010792)\n",
"8. feature no:26 feature name:worst concavity (0.010138)\n",
"9. feature no:17 feature name:concave points error (0.008941)\n",
"10. feature no:13 feature name:area error (0.008934)\n",
"11. feature no:0 feature name:mean radius (0.007928)\n",
"12. feature no:12 feature name:perimeter error (0.006268)\n",
"13. feature no:18 feature name:symmetry error (0.005472)\n",
"14. feature no:3 feature name:mean area (0.005069)\n",
"15. feature no:1 feature name:mean texture (0.005034)\n",
"16. feature no:10 feature name:radius error (0.004299)\n",
"17. feature no:16 feature name:concavity error (0.003595)\n",
"18. feature no:6 feature name:mean concavity (0.003354)\n",
"19. feature no:19 feature name:fractal dimension error (0.003092)\n",
"20. feature no:14 feature name:smoothness error (0.002149)\n",
"21. feature no:29 feature name:worst fractal dimension (0.001952)\n",
"22. feature no:25 feature name:worst compactness (0.001149)\n",
"23. feature no:9 feature name:mean fractal dimension (0.000942)\n",
"24. feature no:11 feature name:texture error (0.000917)\n",
"25. feature no:15 feature name:compactness error (0.000671)\n",
"26. feature no:5 feature name:mean compactness (0.000636)\n",
"27. feature no:20 feature name:worst radius (0.000354)\n",
"28. feature no:24 feature name:worst smoothness (0.000305)\n",
"29. feature no:28 feature name:worst symmetry (0.000145)\n",
"30. feature no:7 feature name:mean concave points (0.000013)\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEICAYAAACktLTqAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAF71JREFUeJzt3XuUHHWZxvHvQ0K4r6CMCkkgUQPH\nrKLAEFhFHAE1AU1QQZMVBZc1i2tE1FXiZbMYdA/gbd1jXA3IomgIiC6OGDfoQvSgC2aCkSUJ0TEE\nM4bLcBXkGnj3j6pg0XRPV8/0JfnN8zmnT7qqfl3vWzXpp2uqe7oUEZiZWVp26HQDZmbWfA53M7ME\nOdzNzBLkcDczS5DD3cwsQQ53M7MEOdxtmyPpa5L+udN9mG3PHO4JkbRR0iOSHirc9h3hOnskDTSr\nxzIi4vSIOKedNWuRdLGkz3S6j7Ja3a+kxZLWS3pK0qlVln9I0h2SHpB0kaSdWtWLDc3hnp43R8Tu\nhdvmTjYjaWwn64+EpDGd7mEb9BvgH4EbKxdIeiMwHzgGmAS8CPh0O5uzgojwLZEbsBE4tsayI4Bf\nAveTPUF7CsveA6wDHgQ2AP+Qz98NeAR4Cngov+0LXAx8pvD4HmCgoo+zgJuAx4Cx+eO+BwwCtwJn\nDLEdT69/67qBjwF3AbcDJwDHAb8F7gU+UXjs2cAVwGX59twIvKKw/KXAinw/rAFmVtT9D2AZ8Gdg\nLvAE8Hi+7T/Mx80Hfp+vfy3wlsI6TgWuAz4P3Jdv64zC8ucC/wlszpdfWVj2JmB13tsvgYMKy84C\n/pjXXA8cU2W/1eq33jZ/DfhJvu6fAfuX+L92HXBqxbwlwL8Wpo8B7uj082K03jregG9N/GHWCHdg\nPHBPHog7AK/Pp7vy5ccDLwYEvBZ4GDgkX9ZDIbjzeU+Hb7UxeR+rgYnALnnNVcACYBzZEd0G4I01\ntuPp9efr3pI/dkfgvWQvEEuAPYC/Bh4FXpSPPzsPuBPz8f+UB+yO+a0f+ETex9F5oB1YqPsA8Oq8\n550rtzUfdxLZi9UOwDvIXgj2yZedmtd/LzAGeB9ZkCtf/iOyF5698n5em88/hOzF6/D8cafk+3En\n4EBgE7BvPnYS8OJ6+y6fLrPNDwJH5bW+DFxX4v9atXD/DfCOwvTeQADP6/RzYzTefFomPVdKuj+/\nXZnPOxlYFhHLIuKpiPgJ0EcW9kTEjyLi95H5GXA18JoR9vHvEbEpIh4BDiN7IVkYEY9HxAbgAmB2\nyXU9AXw2Ip4AlpKFxpcj4sGIWEN2NHpQYfyqiLgiH/9FspA+Ir/tDpyb93ENcBUwp/DYH0TEL/L9\n9Gi1ZiLiuxGxOR9zGfA7YFphyG0RcUFEPAl8E9gHeIGkfYAZwOkRcV9EPJHvb8heDL4eETdExJMR\n8U2y33qOAJ4kC96pknaMiI0R8fuS+67MNv8oIn4eEY8BnwT+RtLEkusv2p3sxXGrrff3GMa6bIQc\n7uk5ISL2zG8n5PP2B04qhP79wJFkoYOkGZKul3Rvvuw4sgAdiU2F+/sD+1bU/wTwgpLruicPSshO\nEwHcWVj+CFmwPKt2RDxFdlpn3/y2KZ+31W1kv9lU67sqSe+WtLqwLS/jmfvrjkL9h/O7u5P9JnNv\nRNxXZbX7Ax+p2EcTyY7W+4EzyX4ruUvS0gbeKG9omyPiIbJTXcN5I/4h4K8K01vvPziMddkIOdxH\nh03AJYXQ3zMidouIc/NPM3yP7BzxCyJiT7JzzsofW+1rQ/8M7FqYfmGVMcXHbQJurai/R0QcN+It\nq+7po05JOwATyE6NbAYm5vO22o/sXHa1vp81LWl/st865pGdbtgTuJm/7K+hbAKeK2nPGss+W7GP\ndo2ISwEiYklEHEn2IhDAeTVqVPZfZpuL+2t3svcFhvNG/BrgFYXpVwB3RsQ9w1iXjZDDfXT4NvBm\nSW+UNEbSzvlHHCeQnYfdiew89hZJM4A3FB57J/A8Sc8pzFsNHCfpuZJeSHZUOZRfAX+SdJakXfIe\nXibpsKZt4TMdKumt+Sd1ziQ7vXE9cAPZC9PHJO0oqQd4M9mpnlruJHuPYKvdyAJ0EEDSe8iO3OuK\niNuBHwNflbRX3sNR+eILgNMlHa7MbpKOl7SHpAMlHZ2/ED9K9pvKkzXKVPZbZpuPk3SkpHHAOcAN\nEVH1NxhJ4yTtTPZitmP+f2lrjnwLOE3SVEl7AZ8iO6dvHeBwHwXyJ+osslMhg2RHiR8FdoiIB4Ez\ngMvJPr3xt0Bv4bG3AJcCG/LTBfsCl5C9ebaR7Pz8ZXXqP0kWKK8ke3PzbuBC4DlDPW4EfkD2Rud9\nwLuAt+bntx8HZpKd974b+Crw7nwba/kG2bnu+yVdGRFrgS8A/0sWpC8HftFAb+8iew/hFrI3UM8E\niIg+svPuX8n77id7cxayF99z857vAJ5P9rMs02+ZbV4C/AvZ6ZhDgXcO0f/VZC8urwIW5/ePyrfh\nv4HzgWvJTv3clq/XOmDrO/hmSZB0NvCSiDi5071sDyRdTPZJp091uhdrLh+5m5klyOFuZpYgn5Yx\nM0uQj9zNzBLUsS912nvvvWPSpEmdKm9mtl1atWrV3RHRVW9cx8J90qRJ9PX1daq8mdl2SdJtZcb5\ntIyZWYIc7mZmCXK4m5klyOFuZpagUuEuaXp+3cR+SfOrLP9S/hWoqyX9Nv+6UjMz65C6n5bJryO5\niOzqPQPASkm9+RcoARARHyqM/wBwcAt6NTOzksocuU8D+iNiQ/4Nc0vJvmGwljlk3yJoZmYdUibc\nx/PMq9MM8MyruDwtv5DBZOCaGsvnSuqT1Dc4ONhor2ZmVlKZcK92hZlaX0gzG7iicEm0Zz4oYnFE\ndEdEd1dX3T+waomenh56eno6UtvMrF3K/IXqAIXLcPGXS5ZVMxt4/0ibaojKXN1shI/zl6uZ2Xam\nTLivBKZImkx23cXZZFfreQZJBwJ7kV2hZpu1otMNmJm1Qd3TMhGxhexiwMuBdcDlEbFG0kJJMwtD\n5wBLw98hbGbWcaW+OCwilgHLKuYtqJg+u3ltmZnZSPgvVM3MEuRwNzNLkMPdzCxBDnczswQ53M3M\nEuRwNzNLkMPdzCxBDnczswQ53M3MEuRwNzNLkMPdzCxBDnczswQ53M3MEuRwNzNLkMPdzCxBDncz\nswQ53M3MEuRwNzNLkMPdzCxBDnczswSVCndJ0yWtl9QvaX6NMW+XtFbSGklLmtummZk1Ymy9AZLG\nAIuA1wMDwEpJvRGxtjBmCvBx4NURcZ+k57eqYTMzq6/Mkfs0oD8iNkTE48BSYFbFmPcCiyLiPoCI\nuKu5bZqZWSPKhPt4YFNheiCfV3QAcICkX0i6XtL0aiuSNFdSn6S+wcHB4XVsZmZ1lQl3VZkXFdNj\ngSlADzAHuFDSns96UMTiiOiOiO6urq5GezUzs5LKhPsAMLEwPQHYXGXMDyLiiYi4FVhPFvZmZtYB\nZcJ9JTBF0mRJ44DZQG/FmCuB1wFI2pvsNM2GZjZqZmbl1Q33iNgCzAOWA+uAyyNijaSFkmbmw5YD\n90haC1wLfDQi7mlV02ZmNjRFVJ4+b4/u7u7o6+sb+YpU7S2BJuvQPjIzqyRpVUR01xvnv1A1M0uQ\nw93MLEEOdzOzBDnczcwS5HA3M0uQw93MLEEOdzOzBDnczcwS5HA3M0uQw93MLEEOdzOzBDnczcwS\n5HA3M0uQw93MLEEOdzOzBDnczcwS5HA3M0uQw93MLEEOdzOzBJUKd0nTJa2X1C9pfpXlp0oalLQ6\nv/1981s1M7OyxtYbIGkMsAh4PTAArJTUGxFrK4ZeFhHzWtCjmZk1qMyR+zSgPyI2RMTjwFJgVmvb\nMjOzkSgT7uOBTYXpgXxepbdJuknSFZImVluRpLmS+iT1DQ4ODqNdMzMro0y4q8q8qJj+ITApIg4C\nfgp8s9qKImJxRHRHRHdXV1djnZqZWWllwn0AKB6JTwA2FwdExD0R8Vg+eQFwaHPaMzOz4SgT7iuB\nKZImSxoHzAZ6iwMk7VOYnAmsa16LZmbWqLqflomILZLmAcuBMcBFEbFG0kKgLyJ6gTMkzQS2APcC\np7awZzMzq0MRlafP26O7uzv6+vpGviJVe0ugyTq0j8zMKklaFRHd9cb5L1TNzBLkcDczS5DD3cws\nQQ53M7MEOdzNzBLkcDczS5DD3cwsQQ53M7MEOdzNzBLkcDczS5DD3cwsQQ53M7MEOdzNzBLkcDcz\nS5DD3cwsQQ53M7MEOdzNzBLkcDczS5DD3cwsQaXCXdJ0Sesl9UuaP8S4EyWFpLrX9zMzs9apG+6S\nxgCLgBnAVGCOpKlVxu0BnAHc0OwmzcysMWWO3KcB/RGxISIeB5YCs6qMOwc4H3i0if2ZmdkwlAn3\n8cCmwvRAPu9pkg4GJkbEVU3szczMhqlMuKvKvHh6obQD8CXgI3VXJM2V1Cepb3BwsHyXZmbWkDLh\nPgBMLExPADYXpvcAXgaskLQROALorfamakQsjojuiOju6uoaftdmZjakMuG+EpgiabKkccBsoHfr\nwoh4ICL2johJETEJuB6YGRF9LenYzMzqqhvuEbEFmAcsB9YBl0fEGkkLJc1sdYNmZta4sWUGRcQy\nYFnFvAU1xvaMvC0zMxsJ/4WqmVmCHO5mZglyuJuZJcjhbmaWIIe7mVmCHO5mZglyuJuZJcjhbmaW\nIIe7mVmCHO5mZglyuJuZJcjhbmaWIIe7mVmCHO5mZglyuJuZJcjhbmaWIIe7mVmCHO5mZglyuJuZ\nJcjhbmaWoFLhLmm6pPWS+iXNr7L8dEn/J2m1pOskTW1+q2ZmVlbdcJc0BlgEzACmAnOqhPeSiHh5\nRLwSOB/4YtM7NTOz0socuU8D+iNiQ0Q8DiwFZhUHRMSfCpO7AdG8Fs3MrFFjS4wZD2wqTA8Ah1cO\nkvR+4MPAOODoaiuSNBeYC7Dffvs12quZmZVU5shdVeY968g8IhZFxIuBs4BPVVtRRCyOiO6I6O7q\n6mqsUzMzK61MuA8AEwvTE4DNQ4xfCpwwkqbMzGxkyoT7SmCKpMmSxgGzgd7iAElTCpPHA79rXotm\nZtaouufcI2KLpHnAcmAMcFFErJG0EOiLiF5gnqRjgSeA+4BTWtm0mZkNrcwbqkTEMmBZxbwFhfsf\nbHJfZmY2Av4LVTOzBDnczcwS5HA3M0uQw93MLEEOdzOzBDnczcwS5HA3M0uQw93MLEEOdzOzBDnc\nzcwS5HA3M0uQw93MLEEOdzOzBDnczcwS5HA3M0uQw93MLEEOdzOzBDnczcwS5HA3M0tQqXCXNF3S\nekn9kuZXWf5hSWsl3STpfyTt3/xWzcysrLrhLmkMsAiYAUwF5kiaWjHs10B3RBwEXAGc3+xGzcys\nvDJH7tOA/ojYEBGPA0uBWcUBEXFtRDycT14PTGhum2Zm1ogy4T4e2FSYHsjn1XIa8ONqCyTNldQn\nqW9wcLB8l2Zm1pAy4a4q86LqQOlkoBv4XLXlEbE4Irojorurq6t8l2Zm1pCxJcYMABML0xOAzZWD\nJB0LfBJ4bUQ81pz2zMxsOMocua8EpkiaLGkcMBvoLQ6QdDDwdWBmRNzV/DbNzKwRdcM9IrYA84Dl\nwDrg8ohYI2mhpJn5sM8BuwPflbRaUm+N1ZmZWRuUOS1DRCwDllXMW1C4f2yT+zIzsxHwX6iamSXI\n4W5mliCHu5lZghzuZmYJcribmSXI4W5mliCHu5lZghzuZmYJcribmSXI4W5mliCHu5lZghzuZmYJ\ncribmSXI4d4mPT099PT0dLoNMxslSn3lr9WgalcgbOJjourVDM3M6nK4t8mKTjdgZqOKT8uYmSXI\n4W5mliCHu5lZgkqFu6TpktZL6pc0v8ryoyTdKGmLpBOb36aZmTWibrhLGgMsAmYAU4E5kqZWDPsD\ncCqwpNkNmplZ48p8WmYa0B8RGwAkLQVmAWu3DoiIjfmyp1rQo5mZNajMaZnxwKbC9EA+z8zMtlFl\nwr3aX90M669rJM2V1Cepb3BwcDirMDOzEsqE+wAwsTA9Adg8nGIRsTgiuiOiu6urazirMDOzEsqE\n+0pgiqTJksYBs4He1rZlZmYjUTfcI2ILMA9YDqwDLo+INZIWSpoJIOkwSQPAScDXJa1pZdNmZja0\nUt8tExHLgGUV8xYU7q8kO11jZmbbAP+FqplZghzuZmYJcribmSXI4W5mliCHu5lZghzuZmYJcrib\nmSXI4W5mliCHu5lZghzuZmYJcribmSXI4W5mliCHu5lZgkp9K6RtY1Tt4ljV9eT/rmi0RgzrYltm\nto1wuCduRacbMLOO8GkZM7MEOdzNzBLkcLeW6enpoaenZ9TU7aTRuM02NJ9zt8Y08GbusB5T643c\nTtVt0NaAXbFiRVPWZ7V1al9vLz9jh7u1zIpU6m4nLyydsr2E3WhTKtwlTQe+DIwBLoyIcyuW7wR8\nCzgUuAd4R0RsbG6rZtu+Fc1c2fb0W1Ina28PdYeq3SJ1z7lLGgMsAmYAU4E5kqZWDDsNuC8iXgJ8\nCTiv2Y2a2bZpBf7I7baozJH7NKA/IjYASFoKzALWFsbMAs7O718BfEWSIrbz3zfNbJu1YpTVbVSZ\ncB8PbCpMDwCH1xoTEVskPQA8D7i7OEjSXGAuwH777TfMlit08vWjU7W9zaOjdgN1V3SobtNtB/t6\ne1Hmo5DVTixV7okyY4iIxRHRHRHdXV1dZfozM7NhKBPuA8DEwvQEYHOtMZLGAs8B7m1Gg2Zm1rgy\n4b4SmCJpsqRxwGygt2JML3BKfv9E4Bqfbzcz65y659zzc+jzgOVkH4W8KCLWSFoI9EVEL/AN4BJJ\n/WRH7LNb2bSZmQ2t1OfcI2IZsKxi3oLC/UeBk5rbmpmZDZe/W8bMLEEOdzOzBDnczcwS5HA3M0uQ\nOvWJRUmDwG0dKQ57U/HXs66bXG1v8+ioPdrqAuwfEXX/CrRj4d5Jkvoiott1063tbR4dtUdb3Ub4\ntIyZWYIc7mZmCRqt4b7YdZOv7W0eHbVHW93SRuU5dzOz1I3WI3czs6Q53M3MEpR0uEuaKOlaSesk\nrZH0wXz+ZZJW57eNkla3qe45km7K614tad9m1h2inzGSfi3pqnbUy2t+KN/2myVdKmnnFtaqtb8/\nJ+mWfJ//l6Q921T3pHz6KUkt/7hcrT7aXUvSByStz+ef34LaF0m6S9LNhXktfS4PUbcjz+WGRESy\nN2Af4JD8/h7Ab4GpFWO+ACxoR13grwpjzgC+1qb98GFgCXBVm+qNB24FdsmnLwdObffPGXgDMDaf\nfx5wXpvqvhQ4kOzqd91t2N91/5+3YZtfB/wU2Clf9vwW1D4KOAS4ucbypj+Xa9Xt1HO5kVvSR+4R\ncXtE3JjffxBYRxY8AEgS8Hbg0nbUjYg/FYbtRpVLETabpAnA8cCFra5VYSywS35lrl159tW7mmaI\n/X11RGzJh11PdhWxdtRdFxHrm1lrOH20udb7gHMj4rF82V0tqP1zalzhrVXP5Vp1O/FcblTS4V4k\naRJwMHBDYfZrgDsj4nftqivps5I2Ae8EFtR+ZNP8G/Ax4Kk21AIgIv4IfB74A3A78EBEXN2O2jV+\nzgB/B/y4A3Xbqp19VNQ6AHiNpBsk/UzSYa2uX6Hlz+VKHXguN2RUhLuk3YHvAWdWvOLOoQWv9EPV\njYhPRsRE4DvAvFbVzuu/CbgrIla1sk6VunsBs4DJwL7AbpJObkPdqj9nSZ8EtpDt87bVbbd29lGl\n1lhgL+AI4KPA5fnRdLu09LlcTTufy8ORfLhL2pHsP+F3IuL7hfljgbcCl7WzbsES4G2tqF3wamCm\npI3AUuBoSd9ucU2AY4FbI2IwIp4Avg+8qpUFh/g5nwK8CXhn5CdI21G33drZR41aA8D3I/Mrst8U\n925lH4V+WvpcLqEdz+WGJR3u+ZHDN4B1EfHFisXHArdExEC76kqaUhg2E7il2bWLIuLjETEhIiaR\nXdf2moho+RE02emYIyTtmu+LY8jOzbbEEPt7OnAWMDMiHm5X3XZrZx9D1LoSODofcwAwjvZ9a2LL\nnsu1tPu5PCydfke3lTfgSLI3Om4CVue34/JlFwOnt7Mu2dHOzfn8H5K9+daufdFDmz4tk9f7NNl/\n+JuBS8g/RdHm/d0PbCrMa+onGoao+xayI9nHgDuB5S3e1zX/n7dxX48Dvp3/vG8Ejm5B7UvJ3sN5\nIt+/p+XzW/ZcrlW3k8/lsjd//YCZWYKSPi1jZjZaOdzNzBLkcDczS5DD3cwsQQ53M7MEOdzNzBLk\ncDczS9D/A7FX/jyOxZ4EAAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"model = embedded_method.gbt_importance(X_train=X_train,y_train=y_train,\n",
" max_depth=10,top_n=10)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['mean perimeter', 'mean smoothness', 'mean symmetry', 'worst texture',\n",
" 'worst perimeter', 'worst area', 'worst concavity',\n",
" 'worst concave points'],\n",
" dtype='object')"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# select features whose importance > threshold\n",
"\n",
"# only 8 features have importance > 0.01\n",
"feature_selection = SelectFromModel(model, threshold=0.01,prefit=True) \n",
"selected_feat = X_train.columns[(feature_selection.get_support())]\n",
"selected_feat"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}