2018.12.2 First commit.

This commit is contained in:
DESKTOP-SAT83DL\yimeng.zhang
2018-12-02 21:11:32 +08:00
commit 149c80cedf
50 changed files with 14765 additions and 0 deletions

6
.gitignore vendored Normal file
View File

@@ -0,0 +1,6 @@
rule_extraction 20181014.py
__pycache__
.ipynb_checkpoints
.gitignore.bak
history
README_bk.md

656
1_Demo_Data_Explore.ipynb Normal file

File diff suppressed because one or more lines are too long

1109
2.1_Demo_Missing_Data.ipynb Normal file

File diff suppressed because it is too large Load Diff

1582
2.2_Demo_Outlier.ipynb Normal file

File diff suppressed because it is too large Load Diff

271
2.3_Demo_Rare_Values.ipynb Normal file
View File

@@ -0,0 +1,271 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"# import seaborn as sns\n",
"# import matplotlib.pyplot as plt\n",
"import os\n",
"# plt.style.use('seaborn-colorblind')\n",
"# %matplotlib inline\n",
"from feature_cleaning import rare_values as ra"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load Dataset"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Variable Pclass label proportion:\n",
"3 0.551066\n",
"1 0.242424\n",
"2 0.206510\n",
"Name: Pclass, dtype: float64\n",
"Variable SibSp label proportion:\n",
"0 0.682379\n",
"1 0.234568\n",
"2 0.031425\n",
"4 0.020202\n",
"3 0.017957\n",
"8 0.007856\n",
"5 0.005612\n",
"Name: SibSp, dtype: float64\n"
]
}
],
"source": [
"use_cols = [\n",
" 'Pclass', 'Sex', 'Age', 'Fare', 'SibSp',\n",
" 'Survived'\n",
"]\n",
"\n",
"# see column Pclass & SibSp's distributions\n",
"# SibSp has values 3/8/5 that occur rarely, under 2%\n",
"# Pclass has 3 values, but no one is under 20%\n",
"data = pd.read_csv('./data/titanic.csv', usecols=use_cols)\n",
"for i in ['Pclass','SibSp']:\n",
" print('Variable',i,'label proportion:')\n",
" print(data[i].value_counts()/len(data))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Grouping into one new category\n",
"Grouping the observations that show rare labels into a unique category ('rare')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# create the encoder and fit with our data\n",
"enc = ra.GroupingRareValues(cols=['Pclass','SibSp'],threshold=0.01).fit(data)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[{'col': 'Pclass', 'mapping': 3 3\n",
"1 1\n",
"2 2\n",
"dtype: int64, 'data_type': dtype('int64')}, {'col': 'SibSp', 'mapping': 0 0\n",
"1 1\n",
"2 2\n",
"4 4\n",
"3 3\n",
"8 rare\n",
"5 rare\n",
"dtype: object, 'data_type': dtype('int64')}]\n"
]
}
],
"source": [
"# let's see the mapping\n",
"# for SibSp, values 5 & 8 are encoded as 'rare' as they appear less than 10%\n",
"# for Pclass, nothing changed\n",
"print(enc.mapping)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# perform transformation\n",
"data2 = enc.transform(data)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 608\n",
"1 209\n",
"2 28\n",
"4 18\n",
"3 16\n",
"rare 12\n",
"Name: SibSp, dtype: int64\n"
]
}
],
"source": [
"# check the result\n",
"print(data2.SibSp.value_counts())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Mode Imputation\n",
"Replacing the rare label by most frequent label"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# create the encoder and fit with our data\n",
"enc = ra.ModeImputation(cols=['Pclass','SibSp'],threshold=0.01).fit(data)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[{'col': 'Pclass', 'mapping': 3 3\n",
"1 1\n",
"2 2\n",
"dtype: int64, 'data_type': dtype('int64')}, {'col': 'SibSp', 'mapping': 0 0\n",
"1 1\n",
"2 2\n",
"4 4\n",
"3 3\n",
"8 0\n",
"5 0\n",
"dtype: int64, 'data_type': dtype('int64')}]\n"
]
}
],
"source": [
"# let's see the mapping\n",
"# for SibSp, values 5 & 8 are encoded as 0, as label 0 is the most frequent label\n",
"# for Pclass, nothing changed\n",
"print(enc.mapping)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# perform transformation\n",
"data3 = enc.transform(data)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 620\n",
"1 209\n",
"2 28\n",
"4 18\n",
"3 16\n",
"Name: SibSp, dtype: int64\n"
]
}
],
"source": [
"# check the result\n",
"print(data3.SibSp.value_counts())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -0,0 +1,326 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"# import seaborn as sns\n",
"# import matplotlib.pyplot as plt\n",
"import os\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"# plt.style.use('seaborn-colorblind')\n",
"# %matplotlib inline\n",
"#from feature_cleaning import rare_values as ra"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load Dataset"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"use_cols = [\n",
" 'Pclass', 'Sex', 'Age', 'Fare', 'SibSp',\n",
" 'Survived'\n",
"]\n",
"\n",
"data = pd.read_csv('./data/titanic.csv', usecols=use_cols)\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Survived</th>\n",
" <th>Pclass</th>\n",
" <th>Sex</th>\n",
" <th>Age</th>\n",
" <th>SibSp</th>\n",
" <th>Fare</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>male</td>\n",
" <td>22.0</td>\n",
" <td>1</td>\n",
" <td>7.2500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>female</td>\n",
" <td>38.0</td>\n",
" <td>1</td>\n",
" <td>71.2833</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>female</td>\n",
" <td>26.0</td>\n",
" <td>0</td>\n",
" <td>7.9250</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Survived Pclass Sex Age SibSp Fare\n",
"0 0 3 male 22.0 1 7.2500\n",
"1 1 1 female 38.0 1 71.2833\n",
"2 1 3 female 26.0 0 7.9250"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((623, 6), (268, 6))"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Note that we include target variable in the X_train \n",
"# because we need it to supervise our discretization\n",
"# this is not the standard way of using train-test-split\n",
"X_train, X_test, y_train, y_test = train_test_split(data, data.Survived, test_size=0.3,\n",
" random_state=0)\n",
"X_train.shape, X_test.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Normalization - Standardization (Z-score scaling)\n",
"\n",
"removes the mean and scales the data to unit variance.<br />z = (X - X.mean) / std"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Survived Pclass Sex Age SibSp Fare Fare_zscore\n",
"857 1 1 male 51.0 0 26.5500 -0.122530\n",
"52 1 1 female 49.0 1 76.7292 0.918124\n",
"386 0 3 male 1.0 5 46.9000 0.299503\n",
"124 0 1 male 54.0 0 77.2875 0.929702\n",
"578 0 3 female NaN 1 14.4583 -0.373297\n",
"549 1 2 male 8.0 1 36.7500 0.089005\n"
]
}
],
"source": [
"# add the new created feature\n",
"from sklearn.preprocessing import StandardScaler\n",
"ss = StandardScaler().fit(X_train[['Fare']])\n",
"X_train_copy = X_train.copy(deep=True)\n",
"X_train_copy['Fare_zscore'] = ss.transform(X_train_copy[['Fare']])\n",
"print(X_train_copy.head(6))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"5.916437306188636e-17\n",
"1.0008035356861\n"
]
}
],
"source": [
"# check if it is with mean=0 std=1\n",
"print(X_train_copy['Fare_zscore'].mean())\n",
"print(X_train_copy['Fare_zscore'].std())\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Min-Max scaling\n",
"transforms features by scaling each feature to a given range. Default to [0,1].<br />X_scaled = (X - X.min / (X.max - X.min)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Survived Pclass Sex Age SibSp Fare Fare_minmax\n",
"857 1 1 male 51.0 0 26.5500 0.051822\n",
"52 1 1 female 49.0 1 76.7292 0.149765\n",
"386 0 3 male 1.0 5 46.9000 0.091543\n",
"124 0 1 male 54.0 0 77.2875 0.150855\n",
"578 0 3 female NaN 1 14.4583 0.028221\n",
"549 1 2 male 8.0 1 36.7500 0.071731\n"
]
}
],
"source": [
"# add the new created feature\n",
"from sklearn.preprocessing import MinMaxScaler\n",
"mms = MinMaxScaler().fit(X_train[['Fare']])\n",
"X_train_copy = X_train.copy(deep=True)\n",
"X_train_copy['Fare_minmax'] = mms.transform(X_train_copy[['Fare']])\n",
"print(X_train_copy.head(6))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1.0\n",
"0.0\n"
]
}
],
"source": [
"# check the range of Fare_minmax\n",
"print(X_train_copy['Fare_minmax'].max())\n",
"print(X_train_copy['Fare_minmax'].min())"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"## Robust scaling\n",
"removes the median and scales the data according to the quantile range (defaults to IQR)<br />X_scaled = (X - X.median) / IQR"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Survived Pclass Sex Age SibSp Fare Fare_robust\n",
"857 1 1 male 51.0 0 26.5500 0.492275\n",
"52 1 1 female 49.0 1 76.7292 2.630973\n",
"386 0 3 male 1.0 5 46.9000 1.359616\n",
"124 0 1 male 54.0 0 77.2875 2.654768\n",
"578 0 3 female NaN 1 14.4583 -0.023088\n",
"549 1 2 male 8.0 1 36.7500 0.927011\n"
]
}
],
"source": [
"# add the new created feature\n",
"from sklearn.preprocessing import RobustScaler\n",
"rs = RobustScaler().fit(X_train[['Fare']])\n",
"X_train_copy = X_train.copy(deep=True)\n",
"X_train_copy['Fare_robust'] = rs.transform(X_train_copy[['Fare']])\n",
"print(X_train_copy.head(6))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -0,0 +1,865 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"# import seaborn as sns\n",
"# import matplotlib.pyplot as plt\n",
"import os\n",
"from sklearn.model_selection import train_test_split\n",
"from feature_engineering import discretization as dc\n",
"\n",
"# plt.style.use('seaborn-colorblind')\n",
"# %matplotlib inline\n",
"#from feature_cleaning import rare_values as ra"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load Dataset"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"use_cols = [\n",
" 'Pclass', 'Sex', 'Age', 'Fare', 'SibSp',\n",
" 'Survived'\n",
"]\n",
"\n",
"data = pd.read_csv('./data/titanic.csv', usecols=use_cols)\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Survived</th>\n",
" <th>Pclass</th>\n",
" <th>Sex</th>\n",
" <th>Age</th>\n",
" <th>SibSp</th>\n",
" <th>Fare</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>male</td>\n",
" <td>22.0</td>\n",
" <td>1</td>\n",
" <td>7.2500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>female</td>\n",
" <td>38.0</td>\n",
" <td>1</td>\n",
" <td>71.2833</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>female</td>\n",
" <td>26.0</td>\n",
" <td>0</td>\n",
" <td>7.9250</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Survived Pclass Sex Age SibSp Fare\n",
"0 0 3 male 22.0 1 7.2500\n",
"1 1 1 female 38.0 1 71.2833\n",
"2 1 3 female 26.0 0 7.9250"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((623, 6), (268, 6))"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Note that we include target variable in the X_train \n",
"# because we need it to supervise our discretization\n",
"# this is not the standard way of using train-test-split\n",
"X_train, X_test, y_train, y_test = train_test_split(data, data.Survived, test_size=0.3,\n",
" random_state=0)\n",
"X_train.shape, X_test.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Equal width binning\n",
"divides the scope of possible values into N bins of the same width"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.preprocessing import KBinsDiscretizer\n",
"enc_equal_width = KBinsDiscretizer(n_bins=3,encode='ordinal',strategy='uniform').fit(X_train[['Fare']])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([array([ 0. , 170.7764, 341.5528, 512.3292])], dtype=object)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# equal width for every bins\n",
"enc_equal_width.bin_edges_"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.0 610\n",
"1.0 11\n",
"2.0 2\n",
"Name: 0, dtype: int64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result = enc_equal_width.transform(X_train[['Fare']])\n",
"pd.DataFrame(result)[0].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Survived Pclass Sex Age SibSp Fare Fare_equal_width\n",
"857 1 1 male 51.0 0 26.5500 0.0\n",
"52 1 1 female 49.0 1 76.7292 0.0\n",
"386 0 3 male 1.0 5 46.9000 0.0\n",
"124 0 1 male 54.0 0 77.2875 0.0\n",
"578 0 3 female NaN 1 14.4583 0.0\n",
"549 1 2 male 8.0 1 36.7500 0.0\n",
"118 0 1 male 24.0 0 247.5208 1.0\n",
"12 0 3 male 20.0 0 8.0500 0.0\n",
"157 0 3 male 30.0 0 8.0500 0.0\n",
"127 1 3 male 24.0 0 7.1417 0.0\n"
]
}
],
"source": [
"# add the new discretized variable\n",
"X_train_copy = X_train.copy(deep=True)\n",
"X_train_copy['Fare_equal_width'] = enc_equal_width.transform(X_train[['Fare']])\n",
"print(X_train_copy.head(10))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Equal frequency binning\n",
"divides the scope of possible values of the variable into N bins, \n",
"where each bin carries the same amount of observations"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"enc_equal_freq = KBinsDiscretizer(n_bins=3,encode='ordinal',strategy='quantile').fit(X_train[['Fare']])"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([array([ 0. , 8.69303333, 26.2875 , 512.3292 ])],\n",
" dtype=object)"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# check the bin edges\n",
"enc_equal_freq.bin_edges_"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2.0 209\n",
"0.0 208\n",
"1.0 206\n",
"Name: 0, dtype: int64"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# equal number of case for every bins\n",
"result = enc_equal_freq.transform(X_train[['Fare']])\n",
"pd.DataFrame(result)[0].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Survived Pclass Sex Age SibSp Fare Fare_equal_freq\n",
"857 1 1 male 51.0 0 26.5500 2.0\n",
"52 1 1 female 49.0 1 76.7292 2.0\n",
"386 0 3 male 1.0 5 46.9000 2.0\n",
"124 0 1 male 54.0 0 77.2875 2.0\n",
"578 0 3 female NaN 1 14.4583 1.0\n",
"549 1 2 male 8.0 1 36.7500 2.0\n",
"118 0 1 male 24.0 0 247.5208 2.0\n",
"12 0 3 male 20.0 0 8.0500 0.0\n",
"157 0 3 male 30.0 0 8.0500 0.0\n",
"127 1 3 male 24.0 0 7.1417 0.0\n"
]
}
],
"source": [
"# add the new discretized variable\n",
"X_train_copy = X_train.copy(deep=True)\n",
"X_train_copy['Fare_equal_freq'] = enc_equal_freq.transform(X_train[['Fare']])\n",
"print(X_train_copy.head(10))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## K-means binning\n",
"using k-means to partition values into clusters"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"enc_kmeans = KBinsDiscretizer(n_bins=3,encode='ordinal',strategy='kmeans').fit(X_train[['Fare']])"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([array([ 0. , 93.5271531 , 338.08506324, 512.3292 ])],\n",
" dtype=object)"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# check the bin edges\n",
"enc_kmeans.bin_edges_"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.0 587\n",
"1.0 34\n",
"2.0 2\n",
"Name: 0, dtype: int64"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result = enc_kmeans.transform(X_train[['Fare']])\n",
"pd.DataFrame(result)[0].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Survived Pclass Sex Age SibSp Fare Fare_kmeans\n",
"857 1 1 male 51.0 0 26.5500 0.0\n",
"52 1 1 female 49.0 1 76.7292 0.0\n",
"386 0 3 male 1.0 5 46.9000 0.0\n",
"124 0 1 male 54.0 0 77.2875 0.0\n",
"578 0 3 female NaN 1 14.4583 0.0\n",
"549 1 2 male 8.0 1 36.7500 0.0\n",
"118 0 1 male 24.0 0 247.5208 1.0\n",
"12 0 3 male 20.0 0 8.0500 0.0\n",
"157 0 3 male 30.0 0 8.0500 0.0\n",
"127 1 3 male 24.0 0 7.1417 0.0\n"
]
}
],
"source": [
"# add the new discretized variable\n",
"X_train_copy = X_train.copy(deep=True)\n",
"X_train_copy['Fare_kmeans'] = enc_kmeans.transform(X_train[['Fare']])\n",
"print(X_train_copy.head(10))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Discretisation with Decision Tree\n",
"using a decision tree to identify the optimal splitting points that would determine the bins"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"enc1 = dc.DiscretizeByDecisionTree(col='Fare',max_depth=2).fit(X=X_train,y=y_train)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=2,\n",
" max_features=None, max_leaf_nodes=None,\n",
" min_impurity_decrease=0.0, min_impurity_split=None,\n",
" min_samples_leaf=1, min_samples_split=2,\n",
" min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
" splitter='best')"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"enc1.tree_model"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data1 = enc1.transform(data)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Survived Pclass Sex Age SibSp Fare Fare_tree_discret\n",
"0 0 3 male 22.0 1 7.2500 0.107143\n",
"1 1 1 female 38.0 1 71.2833 0.442308\n",
"2 1 3 female 26.0 0 7.9250 0.255319\n",
"3 1 1 female 35.0 1 53.1000 0.442308\n",
"4 0 3 male 35.0 0 8.0500 0.255319\n",
"[0.10714286 0.44230769 0.25531915 0.74626866]\n"
]
}
],
"source": [
"# see how the new column Fare_tree_discret is distributed\n",
"# the values are corresponding to the proba of the prediction by the tree\n",
"print(data1.head(5))\n",
"\n",
"# the unique value of the discretisized column\n",
"print(data1.Fare_tree_discret.unique())"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Fare Fare\n",
"Fare_tree_discret \n",
"0.107143 0.0000 7.5208\n",
"0.255319 7.5500 10.5167\n",
"0.442308 11.1333 73.5000\n",
"0.746269 75.2500 512.3292\n"
]
}
],
"source": [
"# see how the bins are cut\n",
"# because we use a tree with max-depth of 2, we have at most 2*2=4 bins generated by the tree\n",
"col='Fare'\n",
"bins = pd.concat([data1.groupby([col+'_tree_discret'])[col].min(),\n",
" data1.groupby([col+'_tree_discret'])[col].max()], axis=1)\n",
"print(bins)\n",
"\n",
"# all values between 0 to 7.5208 in the original variable 'Fare' \n",
"# are given new value 0.107143 in the new column 'Fare_tree_discret'\n",
"# and so on"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Discretisation with Decision Tree with optimal depth search"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"result ROC-AUC for each depth\n",
" depth roc_auc_mean roc_auc_std\n",
"0 2 0.662132 0.026253\n",
"1 3 0.647950 0.045010\n",
"2 4 0.650984 0.035127\n",
"3 5 0.651180 0.027663\n",
"4 6 0.653961 0.037421\n",
"5 7 0.643688 0.033513\n",
"optimal_depth: [2]\n"
]
}
],
"source": [
"# search for the best depth from range 2-7\n",
"# we see when depth=2 we get the best roc-auc mean\n",
"enc2 = dc.DiscretizeByDecisionTree(col='Fare',max_depth=[2,3,4,5,6,7]).fit(X=X_train,y=y_train)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"DecisionTreeClassifier(class_weight=None, criterion='gini',\n",
" max_depth=array([2], dtype=int64), max_features=None,\n",
" max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
" min_impurity_split=None, min_samples_leaf=1,\n",
" min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
" presort=False, random_state=None, splitter='best')"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# using optimal depth=2 we train the model, same result as last one\n",
"enc2.tree_model"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Survived</th>\n",
" <th>Pclass</th>\n",
" <th>Sex</th>\n",
" <th>Age</th>\n",
" <th>SibSp</th>\n",
" <th>Fare</th>\n",
" <th>Fare_tree_discret</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>male</td>\n",
" <td>22.0</td>\n",
" <td>1</td>\n",
" <td>7.2500</td>\n",
" <td>0.107143</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>female</td>\n",
" <td>38.0</td>\n",
" <td>1</td>\n",
" <td>71.2833</td>\n",
" <td>0.442308</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>female</td>\n",
" <td>26.0</td>\n",
" <td>0</td>\n",
" <td>7.9250</td>\n",
" <td>0.255319</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>female</td>\n",
" <td>35.0</td>\n",
" <td>1</td>\n",
" <td>53.1000</td>\n",
" <td>0.442308</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>male</td>\n",
" <td>35.0</td>\n",
" <td>0</td>\n",
" <td>8.0500</td>\n",
" <td>0.255319</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Survived Pclass Sex Age SibSp Fare Fare_tree_discret\n",
"0 0 3 male 22.0 1 7.2500 0.107143\n",
"1 1 1 female 38.0 1 71.2833 0.442308\n",
"2 1 3 female 26.0 0 7.9250 0.255319\n",
"3 1 1 female 35.0 1 53.1000 0.442308\n",
"4 0 3 male 35.0 0 8.0500 0.255319"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data2 = enc2.transform(data)\n",
"data2.head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"## Discretisation with ChiMerge\n",
"supervised hierarchical bottom-up (merge) method that locally exploits the chi-square criterion to decide whether two adjacent intervals are similar enough to be merged"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Interval for variable Fare\n",
" variable interval flag_0 flag_1\n",
"0 Fare -inf,7.875 94.0 28.0\n",
"1 Fare 7.875,7.8792 0.0 3.0\n",
"2 Fare 7.8792,7.8958 25.0 1.0\n",
"3 Fare 7.8958,73.5 245.0 160.0\n",
"4 Fare 73.5+ 17.0 50.0\n"
]
}
],
"source": [
"enc3 = dc.ChiMerge(col='Fare',num_of_bins=5).fit(X=X_train,y='Survived')"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[-0.1, 7.875, 7.8792, 7.8958, 73.5, 512.3292]"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# the bins boundary created by ChiMerge\n",
"\n",
"enc3.bins"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data3 = enc3.transform(data)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Survived Pclass Sex Age SibSp Fare Fare_chimerge\n",
"0 0 3 male 22.0 1 7.2500 (-0.101, 7.875]\n",
"1 1 1 female 38.0 1 71.2833 (7.896, 73.5]\n",
"2 1 3 female 26.0 0 7.9250 (7.896, 73.5]\n",
"3 1 1 female 35.0 1 53.1000 (7.896, 73.5]\n",
"4 0 3 male 35.0 0 8.0500 (7.896, 73.5]\n"
]
}
],
"source": [
"print(data3.head(5))"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[(-0.101, 7.875], (7.896, 73.5], (73.5, 512.329], (7.875, 7.879], (7.879, 7.896]]\n",
"Categories (5, interval[float64]): [(-0.101, 7.875] < (7.875, 7.879] < (7.879, 7.896] < (7.896, 73.5] < (73.5, 512.329]]"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# all values are grouped into 5 intervals\n",
"data3.Fare_chimerge.unique()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -0,0 +1,688 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"# import seaborn as sns\n",
"# import matplotlib.pyplot as plt\n",
"import os\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"import category_encoders as ce\n",
"from feature_engineering import encoding\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load Dataset"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Survived</th>\n",
" <th>Pclass</th>\n",
" <th>Sex</th>\n",
" <th>Age</th>\n",
" <th>SibSp</th>\n",
" <th>Fare</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>male</td>\n",
" <td>22.0</td>\n",
" <td>1</td>\n",
" <td>7.2500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>female</td>\n",
" <td>38.0</td>\n",
" <td>1</td>\n",
" <td>71.2833</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>female</td>\n",
" <td>26.0</td>\n",
" <td>0</td>\n",
" <td>7.9250</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>female</td>\n",
" <td>35.0</td>\n",
" <td>1</td>\n",
" <td>53.1000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>male</td>\n",
" <td>35.0</td>\n",
" <td>0</td>\n",
" <td>8.0500</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Survived Pclass Sex Age SibSp Fare\n",
"0 0 3 male 22.0 1 7.2500\n",
"1 1 1 female 38.0 1 71.2833\n",
"2 1 3 female 26.0 0 7.9250\n",
"3 1 1 female 35.0 1 53.1000\n",
"4 0 3 male 35.0 0 8.0500"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"use_cols = [\n",
" 'Pclass', 'Sex', 'Age', 'Fare', 'SibSp',\n",
" 'Survived'\n",
"]\n",
"\n",
"data = pd.read_csv('./data/titanic.csv', usecols=use_cols)\n",
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((623, 6), (268, 6))"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(data, data.Survived, test_size=0.3,\n",
" random_state=0)\n",
"X_train.shape, X_test.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## One-hot encoding\n",
"replace the categorical variable by different boolean variables (0/1) to indicate whether or not certain label is true for that observation"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data1 = pd.get_dummies(data,drop_first=True)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Survived</th>\n",
" <th>Pclass</th>\n",
" <th>Age</th>\n",
" <th>SibSp</th>\n",
" <th>Fare</th>\n",
" <th>Sex_male</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>22.0</td>\n",
" <td>1</td>\n",
" <td>7.2500</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>38.0</td>\n",
" <td>1</td>\n",
" <td>71.2833</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>26.0</td>\n",
" <td>0</td>\n",
" <td>7.9250</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>35.0</td>\n",
" <td>1</td>\n",
" <td>53.1000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>35.0</td>\n",
" <td>0</td>\n",
" <td>8.0500</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Survived Pclass Age SibSp Fare Sex_male\n",
"0 0 3 22.0 1 7.2500 1\n",
"1 1 1 38.0 1 71.2833 0\n",
"2 1 3 26.0 0 7.9250 0\n",
"3 1 1 35.0 1 53.1000 0\n",
"4 0 3 35.0 0 8.0500 1"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data1.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Ordinal-encoding\n",
"replace the labels by some ordinal number if ordinal is meaningful"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"ord_enc = ce.OrdinalEncoder(cols=['Sex']).fit(X_train,y_train)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Survived Pclass Sex Age SibSp Fare\n",
"0 0 3 1 22.0 1 7.2500\n",
"1 1 1 2 38.0 1 71.2833\n",
"2 1 3 2 26.0 0 7.9250\n",
"3 1 1 2 35.0 1 53.1000\n",
"4 0 3 1 35.0 0 8.0500\n"
]
}
],
"source": [
"data4 = ord_enc.transform(data)\n",
"print(data4.head(5))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Mean encoding\n",
"replace the label by the mean of the target for that label. \n",
"(the target must be 0/1 valued or continuous)\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Sex\n",
"female 0.753488\n",
"male 0.196078\n",
"Name: Survived, dtype: float64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# cross check-- the mean of target group by Sex\n",
"X_train['Survived'].groupby(data['Sex']).mean()\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"mean_enc = encoding.MeanEncoding(cols=['Sex']).fit(X_train,y_train)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Survived Pclass Sex Age SibSp Fare\n",
"0 0 3 0.196078 22.0 1 7.2500\n",
"1 1 1 0.753488 38.0 1 71.2833\n",
"2 1 3 0.753488 26.0 0 7.9250\n",
"3 1 1 0.753488 35.0 1 53.1000\n",
"4 0 3 0.196078 35.0 0 8.0500\n"
]
}
],
"source": [
"data6 = mean_enc.transform(data)\n",
"print(data6.head(5))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Target-encoding\n",
"Similar to mean encoding, but use both posterior probability and prior probability of the target"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# create the encoder and fit with our data\n",
"target_enc = ce.TargetEncoder(cols=['Sex']).fit(X_train,y_train)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# perform transformation\n",
"# data.Survived.groupby(data['Sex']).agg(['mean'])\n",
"data2 = target_enc.transform(data)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Survived</th>\n",
" <th>Pclass</th>\n",
" <th>Sex</th>\n",
" <th>Age</th>\n",
" <th>SibSp</th>\n",
" <th>Fare</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0.196078</td>\n",
" <td>22.0</td>\n",
" <td>1</td>\n",
" <td>7.2500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0.753488</td>\n",
" <td>38.0</td>\n",
" <td>1</td>\n",
" <td>71.2833</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>0.753488</td>\n",
" <td>26.0</td>\n",
" <td>0</td>\n",
" <td>7.9250</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0.753488</td>\n",
" <td>35.0</td>\n",
" <td>1</td>\n",
" <td>53.1000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0.196078</td>\n",
" <td>35.0</td>\n",
" <td>0</td>\n",
" <td>8.0500</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Survived Pclass Sex Age SibSp Fare\n",
"0 0 3 0.196078 22.0 1 7.2500\n",
"1 1 1 0.753488 38.0 1 71.2833\n",
"2 1 3 0.753488 26.0 0 7.9250\n",
"3 1 1 0.753488 35.0 1 53.1000\n",
"4 0 3 0.196078 35.0 0 8.0500"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# check the result\n",
"data2.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## WOE-encoding\n",
"replace the label with Weight of Evidence of each label. WOE is computed from the basic odds ratio: \n",
"\n",
"ln( (Proportion of Good Outcomes) / (Proportion of Bad Outcomes))"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"woe_enc = ce.WOEEncoder(cols=['Sex']).fit(X_train,y_train)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data3 = woe_enc.transform(data)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Survived</th>\n",
" <th>Pclass</th>\n",
" <th>Sex</th>\n",
" <th>Age</th>\n",
" <th>SibSp</th>\n",
" <th>Fare</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>-0.950742</td>\n",
" <td>22.0</td>\n",
" <td>1</td>\n",
" <td>7.2500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1.555633</td>\n",
" <td>38.0</td>\n",
" <td>1</td>\n",
" <td>71.2833</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>1.555633</td>\n",
" <td>26.0</td>\n",
" <td>0</td>\n",
" <td>7.9250</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1.555633</td>\n",
" <td>35.0</td>\n",
" <td>1</td>\n",
" <td>53.1000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>-0.950742</td>\n",
" <td>35.0</td>\n",
" <td>0</td>\n",
" <td>8.0500</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Survived Pclass Sex Age SibSp Fare\n",
"0 0 3 -0.950742 22.0 1 7.2500\n",
"1 1 1 1.555633 38.0 1 71.2833\n",
"2 1 3 1.555633 26.0 0 7.9250\n",
"3 1 1 1.555633 35.0 1 53.1000\n",
"4 0 3 -0.950742 35.0 0 8.0500"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data3.head(5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,522 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"# import seaborn as sns\n",
"# import matplotlib.pyplot as plt\n",
"import os\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import roc_curve, roc_auc_score\n",
"\n",
"# plt.style.use('seaborn-colorblind')\n",
"# %matplotlib inline\n",
"#from feature_cleaning import rare_values as ra"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load Dataset"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"use_cols = [\n",
" 'Pclass', 'Sex', 'Age', 'Fare', 'SibSp',\n",
" 'Survived'\n",
"]\n",
"\n",
"data = pd.read_csv('./data/titanic.csv', usecols=use_cols)\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Survived</th>\n",
" <th>Pclass</th>\n",
" <th>Sex</th>\n",
" <th>Age</th>\n",
" <th>SibSp</th>\n",
" <th>Fare</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>male</td>\n",
" <td>22.0</td>\n",
" <td>1</td>\n",
" <td>7.2500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>female</td>\n",
" <td>38.0</td>\n",
" <td>1</td>\n",
" <td>71.2833</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>female</td>\n",
" <td>26.0</td>\n",
" <td>0</td>\n",
" <td>7.9250</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Survived Pclass Sex Age SibSp Fare\n",
"0 0 3 male 22.0 1 7.2500\n",
"1 1 1 female 38.0 1 71.2833\n",
"2 1 3 female 26.0 0 7.9250"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((623, 6), (268, 6))"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Note that we include target variable in the X_train \n",
"# because we need it to supervise our discretization\n",
"# this is not the standard way of using train-test-split\n",
"X_train, X_test, y_train, y_test = train_test_split(data, data.Survived, test_size=0.3,\n",
" random_state=0)\n",
"X_train.shape, X_test.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Polynomial Expansion\n",
"\n",
"generate a new feature set consisting of all polynomial combinations of the features with degree less than or equal to the specified degree"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Pclass SibSp Pclass^2 Pclass SibSp SibSp^2\n",
"0 1.0 0.0 1.0 0.0 0.0\n",
"1 1.0 1.0 1.0 1.0 1.0\n",
"2 3.0 5.0 9.0 15.0 25.0\n",
"3 1.0 0.0 1.0 0.0 0.0\n",
"4 3.0 1.0 9.0 3.0 1.0\n",
"5 2.0 1.0 4.0 2.0 1.0\n"
]
}
],
"source": [
"# create polynomial combinations of feature 'Pclass','SibSp' with degree 2\n",
"from sklearn.preprocessing import PolynomialFeatures\n",
"pf = PolynomialFeatures(degree=2,include_bias=False).fit(X_train[['Pclass','SibSp']])\n",
"tmp = pf.transform(X_train[['Pclass','SibSp']])\n",
"X_train_copy = pd.DataFrame(tmp,columns=pf.get_feature_names(['Pclass','SibSp']))\n",
"print(X_train_copy.head(6))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Feature Learning by Trees\n",
"GBDT derived feature + LR"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"sample's belonging node of each base tree \n",
"' [[ 7. 7. 6. ... 4. 7. 4.]\n",
" [ 7. 7. 6. ... 14. 7. 7.]\n",
" [11. 11. 11. ... 4. 6. 11.]\n",
" ...\n",
" [10. 10. 10. ... 4. 6. 10.]\n",
" [13. 14. 13. ... 4. 7. 13.]\n",
" [ 7. 7. 6. ... 6. 7. 7.]]\n",
"AUC for GBDT derived feature + LR 0.7746130952380953\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:368: FutureWarning: The handling of integer data will change in version 0.22. Currently, the categories are determined based on the range [0, max(values)], while in the future they will be determined based on the unique values.\n",
"If you want the future behaviour and silence this warning, you can specify \"categories='auto'\".\n",
"In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.\n",
" warnings.warn(msg, FutureWarning)\n"
]
}
],
"source": [
"from sklearn.ensemble import GradientBoostingClassifier,RandomForestClassifier\n",
"from sklearn.preprocessing import OneHotEncoder\n",
"\n",
"gbdt = GradientBoostingClassifier(n_estimators=20)\n",
"one_hot = OneHotEncoder()\n",
"\n",
"X_train = X_train[[ 'Pclass', 'Age', 'Fare', 'SibSp']].fillna(0)\n",
"X_test = X_test[[ 'Pclass', 'Age', 'Fare', 'SibSp']].fillna(0)\n",
"\n",
"gbdt.fit(X_train, y_train)\n",
"\n",
"X_leaf_index = gbdt.apply(X_train)[:, :, 0] # apply return the node index on each tree \n",
"print(\"sample's belonging node of each base tree \\n'\",X_leaf_index)\n",
"# fit one-hot encoder\n",
"one_hot.fit(X_leaf_index) \n",
"X_one_hot = one_hot.transform(X_leaf_index) \n",
"\n",
"\n",
"from sklearn.linear_model import LogisticRegression\n",
"lr = LogisticRegression(solver='lbfgs', max_iter=1000)\n",
"lr.fit(X_one_hot,y_train)\n",
"y_pred = lr.predict_proba(\n",
" one_hot.transform(gbdt.apply(X_test)[:, :, 0]))[:,1]\n",
"fpr_grd_lm, tpr_grd_lm, _ = roc_curve(y_test, y_pred)\n",
"print(\"AUC for GBDT derived feature + LR\", roc_auc_score(y_test, y_pred))\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Feature Learning by Trees\n",
"RandomForest derived feature + LR"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"sample's belonging node of each base tree \n",
"' [[212 35 79 ... 146 60 46]\n",
" [307 165 266 ... 136 132 44]\n",
" [285 285 320 ... 301 294 300]\n",
" ...\n",
" [ 13 177 133 ... 186 169 117]\n",
" [190 296 311 ... 282 289 297]\n",
" [264 165 243 ... 152 110 314]]\n",
"AUC for RandomForest derived feature + LR 0.759672619047619\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:368: FutureWarning: The handling of integer data will change in version 0.22. Currently, the categories are determined based on the range [0, max(values)], while in the future they will be determined based on the unique values.\n",
"If you want the future behaviour and silence this warning, you can specify \"categories='auto'\".\n",
"In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.\n",
" warnings.warn(msg, FutureWarning)\n"
]
}
],
"source": [
"rf = RandomForestClassifier(n_estimators=20)\n",
"one_hot = OneHotEncoder()\n",
"\n",
"X_train = X_train[[ 'Pclass', 'Age', 'Fare', 'SibSp']].fillna(0)\n",
"X_test = X_test[[ 'Pclass', 'Age', 'Fare', 'SibSp']].fillna(0)\n",
"\n",
"rf.fit(X_train, y_train)\n",
"\n",
"X_leaf_index = rf.apply(X_train) # apply return the node index on each tree \n",
"print(\"sample's belonging node of each base tree \\n'\",X_leaf_index)\n",
"# fit one-hot encoder\n",
"one_hot.fit(X_leaf_index) \n",
"X_one_hot = one_hot.transform(X_leaf_index) \n",
"\n",
"\n",
"lr = LogisticRegression(solver='lbfgs', max_iter=1000)\n",
"lr.fit(X_one_hot,y_train)\n",
"y_pred = lr.predict_proba(\n",
" one_hot.transform(rf.apply(X_test)))[:,1]\n",
"fpr_grd_lm, tpr_grd_lm, _ = roc_curve(y_test, y_pred)\n",
"print(\"AUC for RandomForest derived feature + LR\", roc_auc_score(y_test, y_pred))\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"## Feature Learning by Trees\n",
"GBDT derived feature + Raw feature +LR"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"AUC for GBDT derived feature + Raw feature +LR 0.7603571428571428\n"
]
}
],
"source": [
"from scipy.sparse import hstack\n",
"\n",
"X_train_ext = hstack([one_hot.transform(gbdt.apply(X_train)[:, :, 0]), X_train])\n",
"X_test_ext = hstack([one_hot.transform(gbdt.apply(X_test)[:, :, 0]), X_test])\n",
"lr = LogisticRegression(solver='lbfgs', max_iter=1000)\n",
"lr.fit(X_train_ext,y_train)\n",
"y_pred = lr.predict_proba(X_test_ext)[:,1]\n",
"fpr_grd_lm, tpr_grd_lm, _ = roc_curve(y_test, y_pred)\n",
"print(\"AUC for GBDT derived feature + Raw feature +LR\", roc_auc_score(y_test, y_pred))\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Feature Learning by Trees\n",
"RandomForest derived feature + Raw feature +LR"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"AUC for RandomForest derived feature + Raw feature + LR 0.76\n"
]
}
],
"source": [
"X_train_ext = hstack([one_hot.transform(rf.apply(X_train)), X_train])\n",
"X_test_ext = hstack([one_hot.transform(rf.apply(X_test)), X_test])\n",
"lr = LogisticRegression(solver='lbfgs', max_iter=1000)\n",
"lr.fit(X_train_ext,y_train)\n",
"y_pred = lr.predict_proba(X_test_ext)[:,1]\n",
"fpr_grd_lm, tpr_grd_lm, _ = roc_curve(y_test, y_pred)\n",
"print(\"AUC for RandomForest derived feature + Raw feature + LR\", roc_auc_score(y_test, y_pred))\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Feature Learning by Trees\n",
"Use only Raw Feature + LR"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"AUC for RandomForest derived feature + LR 0.6988690476190476\n"
]
}
],
"source": [
"lr = LogisticRegression(solver='lbfgs', max_iter=1000)\n",
"lr.fit(X_train,y_train)\n",
"y_pred = lr.predict_proba(X_test)[:,1]\n",
"fpr_grd_lm, tpr_grd_lm, _ = roc_curve(y_test, y_pred)\n",
"print(\"AUC for RandomForest derived feature + LR\", roc_auc_score(y_test, y_pred))\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Feature Learning by Trees\n",
"\n",
"Use only Raw Feature + GBDT"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"AUC for Raw feature + GBDT 0.7613988095238096\n"
]
}
],
"source": [
"gbdt = GradientBoostingClassifier(n_estimators=20)\n",
"\n",
"X_train = X_train[[ 'Pclass', 'Age', 'Fare', 'SibSp']].fillna(0)\n",
"X_test = X_test[[ 'Pclass', 'Age', 'Fare', 'SibSp']].fillna(0)\n",
"\n",
"gbdt.fit(X_train, y_train)\n",
"y_pred = gbdt.predict_proba(X_test)[:,1]\n",
"fpr_grd_lm, tpr_grd_lm, _ = roc_curve(y_test, y_pred)\n",
"print(\"AUC for Raw feature + GBDT\", roc_auc_score(y_test, y_pred))\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Feature Learning by Trees\n",
"\n",
"Use only Raw Feature + RF\n"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"AUC for Raw feature + RF 0.7235119047619047\n"
]
}
],
"source": [
"rf = RandomForestClassifier(n_estimators=20)\n",
"\n",
"X_train = X_train[[ 'Pclass', 'Age', 'Fare', 'SibSp']].fillna(0)\n",
"X_test = X_test[[ 'Pclass', 'Age', 'Fare', 'SibSp']].fillna(0)\n",
"\n",
"rf.fit(X_train, y_train)\n",
"y_pred = rf.predict_proba(X_test)[:,1]\n",
"fpr_grd_lm, tpr_grd_lm, _ = roc_curve(y_test, y_pred)\n",
"print(\"AUC for Raw feature + RF\", roc_auc_score(y_test, y_pred))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Without tuning, we can see GBDT derived feature + LR get the best result"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -0,0 +1,696 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"# import seaborn as sns\n",
"# import matplotlib.pyplot as plt\n",
"import os\n",
"from sklearn.model_selection import train_test_split\n",
"# plt.style.use('seaborn-colorblind')\n",
"# %matplotlib inline\n",
"from feature_selection import filter_method as ft"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load Dataset"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.datasets import load_breast_cancer\n",
"data = load_breast_cancer()\n",
"data = pd.DataFrame(np.c_[data['data'], data['target']],\n",
" columns= np.append(data['feature_names'], ['target']))"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>mean radius</th>\n",
" <th>mean texture</th>\n",
" <th>mean perimeter</th>\n",
" <th>mean area</th>\n",
" <th>mean smoothness</th>\n",
" <th>mean compactness</th>\n",
" <th>mean concavity</th>\n",
" <th>mean concave points</th>\n",
" <th>mean symmetry</th>\n",
" <th>mean fractal dimension</th>\n",
" <th>...</th>\n",
" <th>worst texture</th>\n",
" <th>worst perimeter</th>\n",
" <th>worst area</th>\n",
" <th>worst smoothness</th>\n",
" <th>worst compactness</th>\n",
" <th>worst concavity</th>\n",
" <th>worst concave points</th>\n",
" <th>worst symmetry</th>\n",
" <th>worst fractal dimension</th>\n",
" <th>target</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>17.99</td>\n",
" <td>10.38</td>\n",
" <td>122.80</td>\n",
" <td>1001.0</td>\n",
" <td>0.11840</td>\n",
" <td>0.27760</td>\n",
" <td>0.3001</td>\n",
" <td>0.14710</td>\n",
" <td>0.2419</td>\n",
" <td>0.07871</td>\n",
" <td>...</td>\n",
" <td>17.33</td>\n",
" <td>184.60</td>\n",
" <td>2019.0</td>\n",
" <td>0.1622</td>\n",
" <td>0.6656</td>\n",
" <td>0.7119</td>\n",
" <td>0.2654</td>\n",
" <td>0.4601</td>\n",
" <td>0.11890</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>20.57</td>\n",
" <td>17.77</td>\n",
" <td>132.90</td>\n",
" <td>1326.0</td>\n",
" <td>0.08474</td>\n",
" <td>0.07864</td>\n",
" <td>0.0869</td>\n",
" <td>0.07017</td>\n",
" <td>0.1812</td>\n",
" <td>0.05667</td>\n",
" <td>...</td>\n",
" <td>23.41</td>\n",
" <td>158.80</td>\n",
" <td>1956.0</td>\n",
" <td>0.1238</td>\n",
" <td>0.1866</td>\n",
" <td>0.2416</td>\n",
" <td>0.1860</td>\n",
" <td>0.2750</td>\n",
" <td>0.08902</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>19.69</td>\n",
" <td>21.25</td>\n",
" <td>130.00</td>\n",
" <td>1203.0</td>\n",
" <td>0.10960</td>\n",
" <td>0.15990</td>\n",
" <td>0.1974</td>\n",
" <td>0.12790</td>\n",
" <td>0.2069</td>\n",
" <td>0.05999</td>\n",
" <td>...</td>\n",
" <td>25.53</td>\n",
" <td>152.50</td>\n",
" <td>1709.0</td>\n",
" <td>0.1444</td>\n",
" <td>0.4245</td>\n",
" <td>0.4504</td>\n",
" <td>0.2430</td>\n",
" <td>0.3613</td>\n",
" <td>0.08758</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11.42</td>\n",
" <td>20.38</td>\n",
" <td>77.58</td>\n",
" <td>386.1</td>\n",
" <td>0.14250</td>\n",
" <td>0.28390</td>\n",
" <td>0.2414</td>\n",
" <td>0.10520</td>\n",
" <td>0.2597</td>\n",
" <td>0.09744</td>\n",
" <td>...</td>\n",
" <td>26.50</td>\n",
" <td>98.87</td>\n",
" <td>567.7</td>\n",
" <td>0.2098</td>\n",
" <td>0.8663</td>\n",
" <td>0.6869</td>\n",
" <td>0.2575</td>\n",
" <td>0.6638</td>\n",
" <td>0.17300</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>20.29</td>\n",
" <td>14.34</td>\n",
" <td>135.10</td>\n",
" <td>1297.0</td>\n",
" <td>0.10030</td>\n",
" <td>0.13280</td>\n",
" <td>0.1980</td>\n",
" <td>0.10430</td>\n",
" <td>0.1809</td>\n",
" <td>0.05883</td>\n",
" <td>...</td>\n",
" <td>16.67</td>\n",
" <td>152.20</td>\n",
" <td>1575.0</td>\n",
" <td>0.1374</td>\n",
" <td>0.2050</td>\n",
" <td>0.4000</td>\n",
" <td>0.1625</td>\n",
" <td>0.2364</td>\n",
" <td>0.07678</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 31 columns</p>\n",
"</div>"
],
"text/plain": [
" mean radius mean texture mean perimeter mean area mean smoothness \\\n",
"0 17.99 10.38 122.80 1001.0 0.11840 \n",
"1 20.57 17.77 132.90 1326.0 0.08474 \n",
"2 19.69 21.25 130.00 1203.0 0.10960 \n",
"3 11.42 20.38 77.58 386.1 0.14250 \n",
"4 20.29 14.34 135.10 1297.0 0.10030 \n",
"\n",
" mean compactness mean concavity mean concave points mean symmetry \\\n",
"0 0.27760 0.3001 0.14710 0.2419 \n",
"1 0.07864 0.0869 0.07017 0.1812 \n",
"2 0.15990 0.1974 0.12790 0.2069 \n",
"3 0.28390 0.2414 0.10520 0.2597 \n",
"4 0.13280 0.1980 0.10430 0.1809 \n",
"\n",
" mean fractal dimension ... worst texture worst perimeter worst area \\\n",
"0 0.07871 ... 17.33 184.60 2019.0 \n",
"1 0.05667 ... 23.41 158.80 1956.0 \n",
"2 0.05999 ... 25.53 152.50 1709.0 \n",
"3 0.09744 ... 26.50 98.87 567.7 \n",
"4 0.05883 ... 16.67 152.20 1575.0 \n",
"\n",
" worst smoothness worst compactness worst concavity worst concave points \\\n",
"0 0.1622 0.6656 0.7119 0.2654 \n",
"1 0.1238 0.1866 0.2416 0.1860 \n",
"2 0.1444 0.4245 0.4504 0.2430 \n",
"3 0.2098 0.8663 0.6869 0.2575 \n",
"4 0.1374 0.2050 0.4000 0.1625 \n",
"\n",
" worst symmetry worst fractal dimension target \n",
"0 0.4601 0.11890 0.0 \n",
"1 0.2750 0.08902 0.0 \n",
"2 0.3613 0.08758 0.0 \n",
"3 0.6638 0.17300 0.0 \n",
"4 0.2364 0.07678 0.0 \n",
"\n",
"[5 rows x 31 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((455, 30), (114, 30))"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(data.drop(labels=['target'], axis=1), \n",
" data.target, test_size=0.2,\n",
" random_state=0)\n",
"X_train.shape, X_test.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Variance method\n",
"removing features that show the same value for the majority/all of the observations (constant/quasi-constant features)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 variables are found to be almost constant\n"
]
}
],
"source": [
"# the original dataset has no constant variable\n",
"quasi_constant_feature = ft.constant_feature_detect(data=X_train,threshold=0.9)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.0 0.923077\n",
"0.0 0.068132\n",
"2.0 0.008791\n",
"Name: dummy, dtype: float64"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# lets create a duumy variable that help us do the demonstration\n",
"X_train['dummy'] = np.floor(X_train['worst smoothness']*10)\n",
"# variable dummy has> 92% of the observations show one value, 1.0\n",
"X_train.dummy.value_counts() / np.float(len(X_train))"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 variables are found to be almost constant\n"
]
},
{
"data": {
"text/plain": [
"['dummy']"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"quasi_constant_feature = ft.constant_feature_detect(data=X_train,threshold=0.9)\n",
"quasi_constant_feature"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(455, 30)\n"
]
}
],
"source": [
"# drop that variable\n",
"X_train.drop(labels=quasi_constant_feature,axis=1,inplace=True)\n",
"print(X_train.shape)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Correlation method\n",
"remove features that are highly correlated with each other"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" feature1 feature2 corr\n",
"0 mean perimeter mean radius 0.998185\n",
"6 mean perimeter mean area 0.986692\n",
"14 mean perimeter worst perimeter 0.970507\n",
"19 mean perimeter worst radius 0.969520\n",
"33 mean perimeter worst area 0.941920 \n",
"\n",
" feature1 feature2 corr\n",
"12 perimeter error radius error 0.978323\n",
"30 perimeter error area error 0.944995 \n",
"\n",
" feature1 feature2 corr\n",
"36 mean concavity mean concave points 0.914627 \n",
"\n",
" feature1 feature2 corr\n",
"38 mean texture worst texture 0.908182 \n",
"\n",
" feature1 feature2 corr\n",
"40 worst concave points mean concave points 0.906312 \n",
"\n"
]
}
],
"source": [
"corr = ft.corr_feature_detect(data=X_train,threshold=0.9)\n",
"# print all the correlated feature groups!\n",
"for i in corr:\n",
" print(i,'\\n')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"then we can decide which ones to remove."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Mutual Information Filter\n",
"Mutual information measures how much information the presence/absence of a feature contributes to making the correct prediction on Y."
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['mean concave points', 'worst perimeter', 'worst area'], dtype='object')\n"
]
}
],
"source": [
"# select the top 3 features\n",
"mi = ft.mutual_info(X=X_train,y=y_train,select_k=3)\n",
"print(mi)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['mean perimeter', 'mean concave points', 'worst radius',\n",
" 'worst perimeter', 'worst area', 'worst concave points'],\n",
" dtype='object')\n"
]
}
],
"source": [
"# select the top 20% features\n",
"mi = ft.mutual_info(X=X_train,y=y_train,select_k=0.2)\n",
"print(mi)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Chi-Square Filter\n",
"Compute chi-squared stats between each non-negative feature and class"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['mean area', 'area error', 'worst area'], dtype='object')\n"
]
}
],
"source": [
"# select the top 3 features\n",
"chi = ft.chi_square_test(X=X_train,y=y_train,select_k=3)\n",
"print(chi)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['mean perimeter', 'mean area', 'area error', 'worst radius',\n",
" 'worst perimeter', 'worst area'],\n",
" dtype='object')\n"
]
}
],
"source": [
"# select the top 20% features\n",
"chi = ft.chi_square_test(X=X_train,y=y_train,select_k=0.2)\n",
"print(chi)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Univariate ROC-AUC or MSE\n",
"builds one decision tree per feature, to predict the target, then make predictions and ranks the features according to the machine learning metric (roc-auc or mse)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"worst perimeter 0.917275\n",
"worst area 0.895840\n",
"worst radius 0.893458\n",
"worst concave points 0.863131\n",
"mean concavity 0.856939\n",
"mean radius 0.849000\n",
"mean area 0.839314\n",
"worst concavity 0.831375\n",
"mean perimeter 0.829628\n",
"mean concave points 0.826453\n",
"area error 0.812321\n",
"worst compactness 0.742299\n",
"radius error 0.740235\n",
"mean compactness 0.734360\n",
"perimeter error 0.680534\n",
"worst texture 0.647666\n",
"worst fractal dimension 0.640997\n",
"concavity error 0.640203\n",
"worst symmetry 0.620991\n",
"concave points error 0.618133\n",
"compactness error 0.607336\n",
"mean symmetry 0.591775\n",
"mean texture 0.573357\n",
"texture error 0.568593\n",
"worst smoothness 0.565100\n",
"mean smoothness 0.557637\n",
"fractal dimension error 0.542077\n",
"smoothness error 0.522706\n",
"symmetry error 0.493649\n",
"mean fractal dimension 0.475548\n",
"dtype: float64\n",
"11 out of the 30 featues are kept\n",
"mean radius 0.849000\n",
"mean perimeter 0.829628\n",
"mean area 0.839314\n",
"mean concavity 0.856939\n",
"mean concave points 0.826453\n",
"area error 0.812321\n",
"worst radius 0.893458\n",
"worst perimeter 0.917275\n",
"worst area 0.895840\n",
"worst concavity 0.831375\n",
"worst concave points 0.863131\n",
"dtype: float64\n"
]
}
],
"source": [
"uni_roc_auc = ft.univariate_roc_auc(X_train=X_train,y_train=y_train,\n",
" X_test=X_test,y_test=y_test,threshold=0.8)\n",
"print(uni_roc_auc)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"mean fractal dimension 0.491228\n",
"symmetry error 0.480750\n",
"fractal dimension error 0.456140\n",
"smoothness error 0.449561\n",
"texture error 0.412281\n",
"worst smoothness 0.403265\n",
"mean smoothness 0.399123\n",
"mean texture 0.396930\n",
"mean symmetry 0.363060\n",
"compactness error 0.361842\n",
"concave points error 0.357456\n",
"worst fractal dimension 0.355263\n",
"worst symmetry 0.350877\n",
"worst texture 0.333333\n",
"concavity error 0.333333\n",
"perimeter error 0.300439\n",
"mean compactness 0.258772\n",
"worst compactness 0.254386\n",
"radius error 0.245614\n",
"area error 0.179825\n",
"mean perimeter 0.166667\n",
"mean concave points 0.166667\n",
"worst concavity 0.162281\n",
"mean radius 0.146930\n",
"mean concavity 0.142544\n",
"mean area 0.140351\n",
"worst concave points 0.123782\n",
"worst area 0.103070\n",
"worst radius 0.100877\n",
"worst perimeter 0.098684\n",
"dtype: float64\n",
"6 out of the 30 featues are kept\n",
"mean fractal dimension 0.491228\n",
"texture error 0.412281\n",
"smoothness error 0.449561\n",
"symmetry error 0.480750\n",
"fractal dimension error 0.456140\n",
"worst smoothness 0.403265\n",
"dtype: float64\n"
]
}
],
"source": [
"uni_mse = ft.univariate_mse(X_train=X_train,y_train=y_train,\n",
" X_test=X_test,y_test=y_test,threshold=0.4)\n",
"print(uni_mse)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -0,0 +1,548 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"# import seaborn as sns\n",
"# import matplotlib.pyplot as plt\n",
"import os\n",
"from sklearn.model_selection import train_test_split\n",
"from mlxtend.feature_selection import SequentialFeatureSelector as SFS\n",
"from mlxtend.feature_selection import ExhaustiveFeatureSelector as EFS\n",
"from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\n",
"\n",
"# plt.style.use('seaborn-colorblind')\n",
"# %matplotlib inline\n",
"# from feature_selection import filter_method as ft"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load Dataset"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.datasets import load_breast_cancer\n",
"data = load_breast_cancer()\n",
"data = pd.DataFrame(np.c_[data['data'], data['target']],\n",
" columns= np.append(data['feature_names'], ['target']))"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>mean radius</th>\n",
" <th>mean texture</th>\n",
" <th>mean perimeter</th>\n",
" <th>mean area</th>\n",
" <th>mean smoothness</th>\n",
" <th>mean compactness</th>\n",
" <th>mean concavity</th>\n",
" <th>mean concave points</th>\n",
" <th>mean symmetry</th>\n",
" <th>mean fractal dimension</th>\n",
" <th>...</th>\n",
" <th>worst texture</th>\n",
" <th>worst perimeter</th>\n",
" <th>worst area</th>\n",
" <th>worst smoothness</th>\n",
" <th>worst compactness</th>\n",
" <th>worst concavity</th>\n",
" <th>worst concave points</th>\n",
" <th>worst symmetry</th>\n",
" <th>worst fractal dimension</th>\n",
" <th>target</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>17.99</td>\n",
" <td>10.38</td>\n",
" <td>122.80</td>\n",
" <td>1001.0</td>\n",
" <td>0.11840</td>\n",
" <td>0.27760</td>\n",
" <td>0.3001</td>\n",
" <td>0.14710</td>\n",
" <td>0.2419</td>\n",
" <td>0.07871</td>\n",
" <td>...</td>\n",
" <td>17.33</td>\n",
" <td>184.60</td>\n",
" <td>2019.0</td>\n",
" <td>0.1622</td>\n",
" <td>0.6656</td>\n",
" <td>0.7119</td>\n",
" <td>0.2654</td>\n",
" <td>0.4601</td>\n",
" <td>0.11890</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>20.57</td>\n",
" <td>17.77</td>\n",
" <td>132.90</td>\n",
" <td>1326.0</td>\n",
" <td>0.08474</td>\n",
" <td>0.07864</td>\n",
" <td>0.0869</td>\n",
" <td>0.07017</td>\n",
" <td>0.1812</td>\n",
" <td>0.05667</td>\n",
" <td>...</td>\n",
" <td>23.41</td>\n",
" <td>158.80</td>\n",
" <td>1956.0</td>\n",
" <td>0.1238</td>\n",
" <td>0.1866</td>\n",
" <td>0.2416</td>\n",
" <td>0.1860</td>\n",
" <td>0.2750</td>\n",
" <td>0.08902</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>19.69</td>\n",
" <td>21.25</td>\n",
" <td>130.00</td>\n",
" <td>1203.0</td>\n",
" <td>0.10960</td>\n",
" <td>0.15990</td>\n",
" <td>0.1974</td>\n",
" <td>0.12790</td>\n",
" <td>0.2069</td>\n",
" <td>0.05999</td>\n",
" <td>...</td>\n",
" <td>25.53</td>\n",
" <td>152.50</td>\n",
" <td>1709.0</td>\n",
" <td>0.1444</td>\n",
" <td>0.4245</td>\n",
" <td>0.4504</td>\n",
" <td>0.2430</td>\n",
" <td>0.3613</td>\n",
" <td>0.08758</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11.42</td>\n",
" <td>20.38</td>\n",
" <td>77.58</td>\n",
" <td>386.1</td>\n",
" <td>0.14250</td>\n",
" <td>0.28390</td>\n",
" <td>0.2414</td>\n",
" <td>0.10520</td>\n",
" <td>0.2597</td>\n",
" <td>0.09744</td>\n",
" <td>...</td>\n",
" <td>26.50</td>\n",
" <td>98.87</td>\n",
" <td>567.7</td>\n",
" <td>0.2098</td>\n",
" <td>0.8663</td>\n",
" <td>0.6869</td>\n",
" <td>0.2575</td>\n",
" <td>0.6638</td>\n",
" <td>0.17300</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>20.29</td>\n",
" <td>14.34</td>\n",
" <td>135.10</td>\n",
" <td>1297.0</td>\n",
" <td>0.10030</td>\n",
" <td>0.13280</td>\n",
" <td>0.1980</td>\n",
" <td>0.10430</td>\n",
" <td>0.1809</td>\n",
" <td>0.05883</td>\n",
" <td>...</td>\n",
" <td>16.67</td>\n",
" <td>152.20</td>\n",
" <td>1575.0</td>\n",
" <td>0.1374</td>\n",
" <td>0.2050</td>\n",
" <td>0.4000</td>\n",
" <td>0.1625</td>\n",
" <td>0.2364</td>\n",
" <td>0.07678</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 31 columns</p>\n",
"</div>"
],
"text/plain": [
" mean radius mean texture mean perimeter mean area mean smoothness \\\n",
"0 17.99 10.38 122.80 1001.0 0.11840 \n",
"1 20.57 17.77 132.90 1326.0 0.08474 \n",
"2 19.69 21.25 130.00 1203.0 0.10960 \n",
"3 11.42 20.38 77.58 386.1 0.14250 \n",
"4 20.29 14.34 135.10 1297.0 0.10030 \n",
"\n",
" mean compactness mean concavity mean concave points mean symmetry \\\n",
"0 0.27760 0.3001 0.14710 0.2419 \n",
"1 0.07864 0.0869 0.07017 0.1812 \n",
"2 0.15990 0.1974 0.12790 0.2069 \n",
"3 0.28390 0.2414 0.10520 0.2597 \n",
"4 0.13280 0.1980 0.10430 0.1809 \n",
"\n",
" mean fractal dimension ... worst texture worst perimeter worst area \\\n",
"0 0.07871 ... 17.33 184.60 2019.0 \n",
"1 0.05667 ... 23.41 158.80 1956.0 \n",
"2 0.05999 ... 25.53 152.50 1709.0 \n",
"3 0.09744 ... 26.50 98.87 567.7 \n",
"4 0.05883 ... 16.67 152.20 1575.0 \n",
"\n",
" worst smoothness worst compactness worst concavity worst concave points \\\n",
"0 0.1622 0.6656 0.7119 0.2654 \n",
"1 0.1238 0.1866 0.2416 0.1860 \n",
"2 0.1444 0.4245 0.4504 0.2430 \n",
"3 0.2098 0.8663 0.6869 0.2575 \n",
"4 0.1374 0.2050 0.4000 0.1625 \n",
"\n",
" worst symmetry worst fractal dimension target \n",
"0 0.4601 0.11890 0.0 \n",
"1 0.2750 0.08902 0.0 \n",
"2 0.3613 0.08758 0.0 \n",
"3 0.6638 0.17300 0.0 \n",
"4 0.2364 0.07678 0.0 \n",
"\n",
"[5 rows x 31 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((455, 30), (114, 30))"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(data.drop(labels=['target'], axis=1), \n",
" data.target, test_size=0.2,\n",
" random_state=0)\n",
"X_train.shape, X_test.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Forward Selection\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n",
"[Parallel(n_jobs=1)]: Done 30 out of 30 | elapsed: 11.4s finished\n",
"Features: 1/10[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n",
"[Parallel(n_jobs=1)]: Done 29 out of 29 | elapsed: 11.2s finished\n",
"Features: 2/10[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n",
"[Parallel(n_jobs=1)]: Done 28 out of 28 | elapsed: 10.7s finished\n",
"Features: 3/10[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n",
"[Parallel(n_jobs=1)]: Done 27 out of 27 | elapsed: 10.3s finished\n",
"Features: 4/10[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n",
"[Parallel(n_jobs=1)]: Done 26 out of 26 | elapsed: 10.0s finished\n",
"Features: 5/10[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n",
"[Parallel(n_jobs=1)]: Done 25 out of 25 | elapsed: 9.6s finished\n",
"Features: 6/10[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n",
"[Parallel(n_jobs=1)]: Done 24 out of 24 | elapsed: 9.2s finished\n",
"Features: 7/10[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n",
"[Parallel(n_jobs=1)]: Done 23 out of 23 | elapsed: 8.8s finished\n",
"Features: 8/10[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n",
"[Parallel(n_jobs=1)]: Done 22 out of 22 | elapsed: 8.4s finished\n",
"Features: 9/10[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n",
"[Parallel(n_jobs=1)]: Done 21 out of 21 | elapsed: 8.1s finished\n",
"Features: 10/10"
]
}
],
"source": [
"# step forward feature selection\n",
"# select top 10 features based on the optimal roc_auc and RandomForest Classifier\n",
"\n",
"sfs1 = SFS(RandomForestClassifier(n_jobs=-1,n_estimators=5), \n",
" k_features=10, \n",
" forward=True, \n",
" floating=False, \n",
" verbose=1,\n",
" scoring='roc_auc',\n",
" cv=3)\n",
"\n",
"sfs1 = sfs1.fit(np.array(X_train), y_train)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['mean texture', 'mean perimeter', 'mean concavity',\n",
" 'mean fractal dimension', 'area error', 'compactness error',\n",
" 'worst perimeter', 'worst area', 'worst smoothness', 'worst symmetry'],\n",
" dtype='object')"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"selected_feat1= X_train.columns[list(sfs1.k_feature_idx_)]\n",
"selected_feat1"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Backward Elimination"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n",
"[Parallel(n_jobs=1)]: Done 30 out of 30 | elapsed: 11.5s finished\n",
"Features: 1/10[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n",
"[Parallel(n_jobs=1)]: Done 29 out of 29 | elapsed: 11.2s finished\n",
"Features: 2/10[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n",
"[Parallel(n_jobs=1)]: Done 28 out of 28 | elapsed: 10.7s finished\n",
"Features: 3/10[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n",
"[Parallel(n_jobs=1)]: Done 27 out of 27 | elapsed: 10.2s finished\n",
"Features: 4/10[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n",
"[Parallel(n_jobs=1)]: Done 26 out of 26 | elapsed: 10.1s finished\n",
"Features: 5/10[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n",
"[Parallel(n_jobs=1)]: Done 25 out of 25 | elapsed: 9.6s finished\n",
"Features: 6/10[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n",
"[Parallel(n_jobs=1)]: Done 24 out of 24 | elapsed: 9.2s finished\n",
"Features: 7/10[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n",
"[Parallel(n_jobs=1)]: Done 23 out of 23 | elapsed: 8.8s finished\n",
"Features: 8/10[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n",
"[Parallel(n_jobs=1)]: Done 22 out of 22 | elapsed: 8.5s finished\n",
"Features: 9/10[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n",
"[Parallel(n_jobs=1)]: Done 21 out of 21 | elapsed: 8.2s finished\n",
"Features: 10/10"
]
}
],
"source": [
"# step backward feature selection\n",
"# select top 10 features based on the optimal roc_auc and RandomForest Classifier\n",
"\n",
"sfs2 = SFS(RandomForestClassifier(n_jobs=-1,n_estimators=5), \n",
" k_features=10, \n",
" forward=False, \n",
" floating=False, \n",
" verbose=1,\n",
" scoring='roc_auc',\n",
" cv=3)\n",
"\n",
"sfs2 = sfs1.fit(np.array(X_train.fillna(0)), y_train)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['mean area', 'mean compactness', 'texture error', 'area error',\n",
" 'compactness error', 'concavity error', 'worst texture',\n",
" 'worst perimeter', 'worst smoothness', 'worst concavity'],\n",
" dtype='object')"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"selected_feat2= X_train.columns[list(sfs2.k_feature_idx_)]\n",
"selected_feat2\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Note that SFS and SBE return different results"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Exhaustive Feature Selection"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 847/847"
]
}
],
"source": [
"efs1 = EFS(RandomForestClassifier(n_jobs=-1,n_estimators=5, random_state=0), \n",
" min_features=1,\n",
" max_features=6, \n",
" scoring='roc_auc',\n",
" print_progress=True,\n",
" cv=2)\n",
"\n",
"# in order to shorter search time for the demonstration\n",
"# we only try all possible 1,2,3,4,5,6\n",
"# feature combinations from a dataset of 10 features\n",
"\n",
"efs1 = efs1.fit(np.array(X_train[X_train.columns[0:10]].fillna(0)), y_train)"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['mean radius', 'mean texture', 'mean area', 'mean smoothness',\n",
" 'mean concavity'],\n",
" dtype='object')"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"selected_feat3= X_train.columns[list(efs1.best_idx_)]\n",
"selected_feat3"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,595 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"# import seaborn as sns\n",
"# import matplotlib.pyplot as plt\n",
"import os\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.feature_selection import SelectFromModel\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"# plt.style.use('seaborn-colorblind')\n",
"# %matplotlib inline\n",
"from feature_selection import feature_shuffle\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load Dataset"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.datasets import load_breast_cancer\n",
"data = load_breast_cancer()\n",
"data = pd.DataFrame(np.c_[data['data'], data['target']],\n",
" columns= np.append(data['feature_names'], ['target']))"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>mean radius</th>\n",
" <th>mean texture</th>\n",
" <th>mean perimeter</th>\n",
" <th>mean area</th>\n",
" <th>mean smoothness</th>\n",
" <th>mean compactness</th>\n",
" <th>mean concavity</th>\n",
" <th>mean concave points</th>\n",
" <th>mean symmetry</th>\n",
" <th>mean fractal dimension</th>\n",
" <th>...</th>\n",
" <th>worst texture</th>\n",
" <th>worst perimeter</th>\n",
" <th>worst area</th>\n",
" <th>worst smoothness</th>\n",
" <th>worst compactness</th>\n",
" <th>worst concavity</th>\n",
" <th>worst concave points</th>\n",
" <th>worst symmetry</th>\n",
" <th>worst fractal dimension</th>\n",
" <th>target</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>17.99</td>\n",
" <td>10.38</td>\n",
" <td>122.80</td>\n",
" <td>1001.0</td>\n",
" <td>0.11840</td>\n",
" <td>0.27760</td>\n",
" <td>0.3001</td>\n",
" <td>0.14710</td>\n",
" <td>0.2419</td>\n",
" <td>0.07871</td>\n",
" <td>...</td>\n",
" <td>17.33</td>\n",
" <td>184.60</td>\n",
" <td>2019.0</td>\n",
" <td>0.1622</td>\n",
" <td>0.6656</td>\n",
" <td>0.7119</td>\n",
" <td>0.2654</td>\n",
" <td>0.4601</td>\n",
" <td>0.11890</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>20.57</td>\n",
" <td>17.77</td>\n",
" <td>132.90</td>\n",
" <td>1326.0</td>\n",
" <td>0.08474</td>\n",
" <td>0.07864</td>\n",
" <td>0.0869</td>\n",
" <td>0.07017</td>\n",
" <td>0.1812</td>\n",
" <td>0.05667</td>\n",
" <td>...</td>\n",
" <td>23.41</td>\n",
" <td>158.80</td>\n",
" <td>1956.0</td>\n",
" <td>0.1238</td>\n",
" <td>0.1866</td>\n",
" <td>0.2416</td>\n",
" <td>0.1860</td>\n",
" <td>0.2750</td>\n",
" <td>0.08902</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>19.69</td>\n",
" <td>21.25</td>\n",
" <td>130.00</td>\n",
" <td>1203.0</td>\n",
" <td>0.10960</td>\n",
" <td>0.15990</td>\n",
" <td>0.1974</td>\n",
" <td>0.12790</td>\n",
" <td>0.2069</td>\n",
" <td>0.05999</td>\n",
" <td>...</td>\n",
" <td>25.53</td>\n",
" <td>152.50</td>\n",
" <td>1709.0</td>\n",
" <td>0.1444</td>\n",
" <td>0.4245</td>\n",
" <td>0.4504</td>\n",
" <td>0.2430</td>\n",
" <td>0.3613</td>\n",
" <td>0.08758</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11.42</td>\n",
" <td>20.38</td>\n",
" <td>77.58</td>\n",
" <td>386.1</td>\n",
" <td>0.14250</td>\n",
" <td>0.28390</td>\n",
" <td>0.2414</td>\n",
" <td>0.10520</td>\n",
" <td>0.2597</td>\n",
" <td>0.09744</td>\n",
" <td>...</td>\n",
" <td>26.50</td>\n",
" <td>98.87</td>\n",
" <td>567.7</td>\n",
" <td>0.2098</td>\n",
" <td>0.8663</td>\n",
" <td>0.6869</td>\n",
" <td>0.2575</td>\n",
" <td>0.6638</td>\n",
" <td>0.17300</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>20.29</td>\n",
" <td>14.34</td>\n",
" <td>135.10</td>\n",
" <td>1297.0</td>\n",
" <td>0.10030</td>\n",
" <td>0.13280</td>\n",
" <td>0.1980</td>\n",
" <td>0.10430</td>\n",
" <td>0.1809</td>\n",
" <td>0.05883</td>\n",
" <td>...</td>\n",
" <td>16.67</td>\n",
" <td>152.20</td>\n",
" <td>1575.0</td>\n",
" <td>0.1374</td>\n",
" <td>0.2050</td>\n",
" <td>0.4000</td>\n",
" <td>0.1625</td>\n",
" <td>0.2364</td>\n",
" <td>0.07678</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 31 columns</p>\n",
"</div>"
],
"text/plain": [
" mean radius mean texture mean perimeter mean area mean smoothness \\\n",
"0 17.99 10.38 122.80 1001.0 0.11840 \n",
"1 20.57 17.77 132.90 1326.0 0.08474 \n",
"2 19.69 21.25 130.00 1203.0 0.10960 \n",
"3 11.42 20.38 77.58 386.1 0.14250 \n",
"4 20.29 14.34 135.10 1297.0 0.10030 \n",
"\n",
" mean compactness mean concavity mean concave points mean symmetry \\\n",
"0 0.27760 0.3001 0.14710 0.2419 \n",
"1 0.07864 0.0869 0.07017 0.1812 \n",
"2 0.15990 0.1974 0.12790 0.2069 \n",
"3 0.28390 0.2414 0.10520 0.2597 \n",
"4 0.13280 0.1980 0.10430 0.1809 \n",
"\n",
" mean fractal dimension ... worst texture worst perimeter worst area \\\n",
"0 0.07871 ... 17.33 184.60 2019.0 \n",
"1 0.05667 ... 23.41 158.80 1956.0 \n",
"2 0.05999 ... 25.53 152.50 1709.0 \n",
"3 0.09744 ... 26.50 98.87 567.7 \n",
"4 0.05883 ... 16.67 152.20 1575.0 \n",
"\n",
" worst smoothness worst compactness worst concavity worst concave points \\\n",
"0 0.1622 0.6656 0.7119 0.2654 \n",
"1 0.1238 0.1866 0.2416 0.1860 \n",
"2 0.1444 0.4245 0.4504 0.2430 \n",
"3 0.2098 0.8663 0.6869 0.2575 \n",
"4 0.1374 0.2050 0.4000 0.1625 \n",
"\n",
" worst symmetry worst fractal dimension target \n",
"0 0.4601 0.11890 0.0 \n",
"1 0.2750 0.08902 0.0 \n",
"2 0.3613 0.08758 0.0 \n",
"3 0.6638 0.17300 0.0 \n",
"4 0.2364 0.07678 0.0 \n",
"\n",
"[5 rows x 31 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((455, 30), (114, 30))"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(data.drop(labels=['target'], axis=1), \n",
" data.target, test_size=0.2,\n",
" random_state=0)\n",
"X_train.shape, X_test.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Feature Shuffling\n",
"permute the values of each feature, one at the time, and measure how much the permutation decreases the accuracy, or the roc_auc, or the mse of the machine learning model.\n",
"If the variables are important, this is, highly predictive, a random permutation of their values will decrease dramatically any of these metrics."
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"auc_drop, selected_features = feature_shuffle.feature_shuffle_rf(X_train=X_train,\n",
" y_train=y_train,\n",
" random_state=0)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>feature</th>\n",
" <th>auc_drop</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>worst perimeter</td>\n",
" <td>8.359457e-05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>worst concave points</td>\n",
" <td>3.134796e-05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>worst area</td>\n",
" <td>1.110223e-16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>perimeter error</td>\n",
" <td>1.110223e-16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>mean radius</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>concavity error</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>worst symmetry</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>worst concavity</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>worst compactness</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>worst smoothness</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>worst texture</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>worst radius</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>fractal dimension error</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>symmetry error</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>concave points error</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>compactness error</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>mean texture</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>smoothness error</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>area error</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>texture error</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>radius error</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>mean fractal dimension</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>mean symmetry</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>mean concave points</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>mean concavity</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>mean compactness</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>mean smoothness</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>mean area</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>mean perimeter</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>worst fractal dimension</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" feature auc_drop\n",
"22 worst perimeter 8.359457e-05\n",
"27 worst concave points 3.134796e-05\n",
"23 worst area 1.110223e-16\n",
"12 perimeter error 1.110223e-16\n",
"0 mean radius 0.000000e+00\n",
"16 concavity error 0.000000e+00\n",
"28 worst symmetry 0.000000e+00\n",
"26 worst concavity 0.000000e+00\n",
"25 worst compactness 0.000000e+00\n",
"24 worst smoothness 0.000000e+00\n",
"21 worst texture 0.000000e+00\n",
"20 worst radius 0.000000e+00\n",
"19 fractal dimension error 0.000000e+00\n",
"18 symmetry error 0.000000e+00\n",
"17 concave points error 0.000000e+00\n",
"15 compactness error 0.000000e+00\n",
"1 mean texture 0.000000e+00\n",
"14 smoothness error 0.000000e+00\n",
"13 area error 0.000000e+00\n",
"11 texture error 0.000000e+00\n",
"10 radius error 0.000000e+00\n",
"9 mean fractal dimension 0.000000e+00\n",
"8 mean symmetry 0.000000e+00\n",
"7 mean concave points 0.000000e+00\n",
"6 mean concavity 0.000000e+00\n",
"5 mean compactness 0.000000e+00\n",
"4 mean smoothness 0.000000e+00\n",
"3 mean area 0.000000e+00\n",
"2 mean perimeter 0.000000e+00\n",
"29 worst fractal dimension 0.000000e+00"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# we select features that have auc_drop > 0\n",
"auc_drop"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"22 worst perimeter\n",
"27 worst concave points\n",
"23 worst area\n",
"12 perimeter error\n",
"Name: feature, dtype: object"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"selected_features"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -0,0 +1,884 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"# import seaborn as sns\n",
"# import matplotlib.pyplot as plt\n",
"import os\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.feature_selection import SelectFromModel\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"# plt.style.use('seaborn-colorblind')\n",
"# %matplotlib inline\n",
"from sklearn.feature_selection import RFE\n",
"from feature_selection import hybrid\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load Dataset"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.datasets import load_breast_cancer\n",
"data = load_breast_cancer()\n",
"data = pd.DataFrame(np.c_[data['data'], data['target']],\n",
" columns= np.append(data['feature_names'], ['target']))"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>mean radius</th>\n",
" <th>mean texture</th>\n",
" <th>mean perimeter</th>\n",
" <th>mean area</th>\n",
" <th>mean smoothness</th>\n",
" <th>mean compactness</th>\n",
" <th>mean concavity</th>\n",
" <th>mean concave points</th>\n",
" <th>mean symmetry</th>\n",
" <th>mean fractal dimension</th>\n",
" <th>...</th>\n",
" <th>worst texture</th>\n",
" <th>worst perimeter</th>\n",
" <th>worst area</th>\n",
" <th>worst smoothness</th>\n",
" <th>worst compactness</th>\n",
" <th>worst concavity</th>\n",
" <th>worst concave points</th>\n",
" <th>worst symmetry</th>\n",
" <th>worst fractal dimension</th>\n",
" <th>target</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>17.99</td>\n",
" <td>10.38</td>\n",
" <td>122.80</td>\n",
" <td>1001.0</td>\n",
" <td>0.11840</td>\n",
" <td>0.27760</td>\n",
" <td>0.3001</td>\n",
" <td>0.14710</td>\n",
" <td>0.2419</td>\n",
" <td>0.07871</td>\n",
" <td>...</td>\n",
" <td>17.33</td>\n",
" <td>184.60</td>\n",
" <td>2019.0</td>\n",
" <td>0.1622</td>\n",
" <td>0.6656</td>\n",
" <td>0.7119</td>\n",
" <td>0.2654</td>\n",
" <td>0.4601</td>\n",
" <td>0.11890</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>20.57</td>\n",
" <td>17.77</td>\n",
" <td>132.90</td>\n",
" <td>1326.0</td>\n",
" <td>0.08474</td>\n",
" <td>0.07864</td>\n",
" <td>0.0869</td>\n",
" <td>0.07017</td>\n",
" <td>0.1812</td>\n",
" <td>0.05667</td>\n",
" <td>...</td>\n",
" <td>23.41</td>\n",
" <td>158.80</td>\n",
" <td>1956.0</td>\n",
" <td>0.1238</td>\n",
" <td>0.1866</td>\n",
" <td>0.2416</td>\n",
" <td>0.1860</td>\n",
" <td>0.2750</td>\n",
" <td>0.08902</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>19.69</td>\n",
" <td>21.25</td>\n",
" <td>130.00</td>\n",
" <td>1203.0</td>\n",
" <td>0.10960</td>\n",
" <td>0.15990</td>\n",
" <td>0.1974</td>\n",
" <td>0.12790</td>\n",
" <td>0.2069</td>\n",
" <td>0.05999</td>\n",
" <td>...</td>\n",
" <td>25.53</td>\n",
" <td>152.50</td>\n",
" <td>1709.0</td>\n",
" <td>0.1444</td>\n",
" <td>0.4245</td>\n",
" <td>0.4504</td>\n",
" <td>0.2430</td>\n",
" <td>0.3613</td>\n",
" <td>0.08758</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11.42</td>\n",
" <td>20.38</td>\n",
" <td>77.58</td>\n",
" <td>386.1</td>\n",
" <td>0.14250</td>\n",
" <td>0.28390</td>\n",
" <td>0.2414</td>\n",
" <td>0.10520</td>\n",
" <td>0.2597</td>\n",
" <td>0.09744</td>\n",
" <td>...</td>\n",
" <td>26.50</td>\n",
" <td>98.87</td>\n",
" <td>567.7</td>\n",
" <td>0.2098</td>\n",
" <td>0.8663</td>\n",
" <td>0.6869</td>\n",
" <td>0.2575</td>\n",
" <td>0.6638</td>\n",
" <td>0.17300</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>20.29</td>\n",
" <td>14.34</td>\n",
" <td>135.10</td>\n",
" <td>1297.0</td>\n",
" <td>0.10030</td>\n",
" <td>0.13280</td>\n",
" <td>0.1980</td>\n",
" <td>0.10430</td>\n",
" <td>0.1809</td>\n",
" <td>0.05883</td>\n",
" <td>...</td>\n",
" <td>16.67</td>\n",
" <td>152.20</td>\n",
" <td>1575.0</td>\n",
" <td>0.1374</td>\n",
" <td>0.2050</td>\n",
" <td>0.4000</td>\n",
" <td>0.1625</td>\n",
" <td>0.2364</td>\n",
" <td>0.07678</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 31 columns</p>\n",
"</div>"
],
"text/plain": [
" mean radius mean texture mean perimeter mean area mean smoothness \\\n",
"0 17.99 10.38 122.80 1001.0 0.11840 \n",
"1 20.57 17.77 132.90 1326.0 0.08474 \n",
"2 19.69 21.25 130.00 1203.0 0.10960 \n",
"3 11.42 20.38 77.58 386.1 0.14250 \n",
"4 20.29 14.34 135.10 1297.0 0.10030 \n",
"\n",
" mean compactness mean concavity mean concave points mean symmetry \\\n",
"0 0.27760 0.3001 0.14710 0.2419 \n",
"1 0.07864 0.0869 0.07017 0.1812 \n",
"2 0.15990 0.1974 0.12790 0.2069 \n",
"3 0.28390 0.2414 0.10520 0.2597 \n",
"4 0.13280 0.1980 0.10430 0.1809 \n",
"\n",
" mean fractal dimension ... worst texture worst perimeter worst area \\\n",
"0 0.07871 ... 17.33 184.60 2019.0 \n",
"1 0.05667 ... 23.41 158.80 1956.0 \n",
"2 0.05999 ... 25.53 152.50 1709.0 \n",
"3 0.09744 ... 26.50 98.87 567.7 \n",
"4 0.05883 ... 16.67 152.20 1575.0 \n",
"\n",
" worst smoothness worst compactness worst concavity worst concave points \\\n",
"0 0.1622 0.6656 0.7119 0.2654 \n",
"1 0.1238 0.1866 0.2416 0.1860 \n",
"2 0.1444 0.4245 0.4504 0.2430 \n",
"3 0.2098 0.8663 0.6869 0.2575 \n",
"4 0.1374 0.2050 0.4000 0.1625 \n",
"\n",
" worst symmetry worst fractal dimension target \n",
"0 0.4601 0.11890 0.0 \n",
"1 0.2750 0.08902 0.0 \n",
"2 0.3613 0.08758 0.0 \n",
"3 0.6638 0.17300 0.0 \n",
"4 0.2364 0.07678 0.0 \n",
"\n",
"[5 rows x 31 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((455, 30), (114, 30))"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(data.drop(labels=['target'], axis=1), \n",
" data.target, test_size=0.2,\n",
" random_state=0)\n",
"X_train.shape, X_test.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Recursive Feature Elimination \n",
"### with Random Forests Importance\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Example 1\n",
"This method is slightly **different from the guide**, as it use a different stopping criterion: the desired number of features to select is eventually reached."
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"RFE(estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
" max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
" min_impurity_decrease=0.0, min_impurity_split=None,\n",
" min_samples_leaf=1, min_samples_split=2,\n",
" min_weight_fraction_leaf=0.0, n_estimators=20, n_jobs=None,\n",
" oob_score=False, random_state=None, verbose=0,\n",
" warm_start=False),\n",
" n_features_to_select=10, step=1, verbose=0)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# n_features_to_select decide the stopping criterion\n",
"# we stop till 10 features remaining\n",
"\n",
"sel_ = RFE(RandomForestClassifier(n_estimators=20), n_features_to_select=10)\n",
"sel_.fit(X_train.fillna(0), y_train)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['mean texture', 'mean perimeter', 'mean area', 'mean concavity',\n",
" 'mean concave points', 'worst radius', 'worst perimeter', 'worst area',\n",
" 'worst concave points', 'worst symmetry'],\n",
" dtype='object')\n"
]
}
],
"source": [
"selected_feat = X_train.columns[(sel_.get_support())]\n",
"print(selected_feat)"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"### Example 2\n",
"recursive feature elimination with RandomForest\n",
"with the method same as the guide\n",
"1. Rank the features according to their importance derived from a machine learning algorithm: it can be tree importance, or LASSO / Ridge, or the linear / logistic regression coefficients.\n",
"2. Remove one feature -the least important- and build a machine learning algorithm utilizing the remaining features.\n",
"3. Calculate a performance metric of your choice: roc-auc, mse, rmse, accuracy.\n",
"4. If the metric decreases by more of an arbitrarily set threshold, then that feature is important and should be kept. Otherwise, we can remove that feature.\n",
"5. Repeat steps 2-4 until all features have been removed (and therefore evaluated) and the drop in performance assessed.\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"testing feature: mean radius which is feature 1 out of 30\n",
"New Test ROC AUC=0.9941251190854239\n",
"All features Test ROC AUC=0.9968243886948238\n",
"Drop in ROC AUC=0.0026992696093999236\n",
"keep: mean radius\n",
"\n",
"testing feature: mean texture which is feature 2 out of 30\n",
"New Test ROC AUC=0.9936487773896475\n",
"All features Test ROC AUC=0.9968243886948238\n",
"Drop in ROC AUC=0.0031756113051762958\n",
"keep: mean texture\n",
"\n",
"testing feature: mean perimeter which is feature 3 out of 30\n",
"New Test ROC AUC=0.9968243886948238\n",
"All features Test ROC AUC=0.9968243886948238\n",
"Drop in ROC AUC=0.0\n",
"remove: mean perimeter\n",
"\n",
"testing feature: mean area which is feature 4 out of 30\n",
"New Test ROC AUC=0.9960304858685297\n",
"All features Test ROC AUC=0.9968243886948238\n",
"Drop in ROC AUC=0.0007939028262941017\n",
"remove: mean area\n",
"\n",
"testing feature: mean smoothness which is feature 5 out of 30\n",
"New Test ROC AUC=0.9965068275643061\n",
"All features Test ROC AUC=0.9960304858685297\n",
"Drop in ROC AUC=-0.0004763416957763722\n",
"remove: mean smoothness\n",
"\n",
"testing feature: mean compactness which is feature 6 out of 30\n",
"New Test ROC AUC=0.9942838996506828\n",
"All features Test ROC AUC=0.9965068275643061\n",
"Drop in ROC AUC=0.0022229279136233293\n",
"keep: mean compactness\n",
"\n",
"testing feature: mean concavity which is feature 7 out of 30\n",
"New Test ROC AUC=0.9957129247380121\n",
"All features Test ROC AUC=0.9965068275643061\n",
"Drop in ROC AUC=0.0007939028262939907\n",
"remove: mean concavity\n",
"\n",
"testing feature: mean concave points which is feature 8 out of 30\n",
"New Test ROC AUC=0.9976182915211178\n",
"All features Test ROC AUC=0.9957129247380121\n",
"Drop in ROC AUC=-0.0019053667831057108\n",
"remove: mean concave points\n",
"\n",
"testing feature: mean symmetry which is feature 9 out of 30\n",
"New Test ROC AUC=0.9953953636074945\n",
"All features Test ROC AUC=0.9976182915211178\n",
"Drop in ROC AUC=0.0022229279136233293\n",
"keep: mean symmetry\n",
"\n",
"testing feature: mean fractal dimension which is feature 10 out of 30\n",
"New Test ROC AUC=0.9949190219117181\n",
"All features Test ROC AUC=0.9976182915211178\n",
"Drop in ROC AUC=0.0026992696093997015\n",
"keep: mean fractal dimension\n",
"\n",
"testing feature: radius error which is feature 11 out of 30\n",
"New Test ROC AUC=0.9952365830422356\n",
"All features Test ROC AUC=0.9976182915211178\n",
"Drop in ROC AUC=0.002381708478882194\n",
"keep: radius error\n",
"\n",
"testing feature: texture error which is feature 12 out of 30\n",
"New Test ROC AUC=0.9952365830422356\n",
"All features Test ROC AUC=0.9976182915211178\n",
"Drop in ROC AUC=0.002381708478882194\n",
"keep: texture error\n",
"\n",
"testing feature: perimeter error which is feature 13 out of 30\n",
"New Test ROC AUC=0.9939663385201651\n",
"All features Test ROC AUC=0.9976182915211178\n",
"Drop in ROC AUC=0.003651953000952668\n",
"keep: perimeter error\n",
"\n",
"testing feature: area error which is feature 14 out of 30\n",
"New Test ROC AUC=0.994919021911718\n",
"All features Test ROC AUC=0.9976182915211178\n",
"Drop in ROC AUC=0.0026992696093998125\n",
"keep: area error\n",
"\n",
"testing feature: smoothness error which is feature 15 out of 30\n",
"New Test ROC AUC=0.995871705303271\n",
"All features Test ROC AUC=0.9976182915211178\n",
"Drop in ROC AUC=0.001746586217846846\n",
"keep: smoothness error\n",
"\n",
"testing feature: compactness error which is feature 16 out of 30\n",
"New Test ROC AUC=0.9958717053032708\n",
"All features Test ROC AUC=0.9976182915211178\n",
"Drop in ROC AUC=0.0017465862178469571\n",
"keep: compactness error\n",
"\n",
"testing feature: concavity error which is feature 17 out of 30\n",
"New Test ROC AUC=0.9961892664337886\n",
"All features Test ROC AUC=0.9976182915211178\n",
"Drop in ROC AUC=0.0014290250873292276\n",
"keep: concavity error\n",
"\n",
"testing feature: concave points error which is feature 18 out of 30\n",
"New Test ROC AUC=0.9961892664337885\n",
"All features Test ROC AUC=0.9976182915211178\n",
"Drop in ROC AUC=0.0014290250873293386\n",
"keep: concave points error\n",
"\n",
"testing feature: symmetry error which is feature 19 out of 30\n",
"New Test ROC AUC=0.9968243886948238\n",
"All features Test ROC AUC=0.9976182915211178\n",
"Drop in ROC AUC=0.0007939028262939907\n",
"remove: symmetry error\n",
"\n",
"testing feature: fractal dimension error which is feature 20 out of 30\n",
"New Test ROC AUC=0.9946014607812005\n",
"All features Test ROC AUC=0.9968243886948238\n",
"Drop in ROC AUC=0.0022229279136233293\n",
"keep: fractal dimension error\n",
"\n",
"testing feature: worst radius which is feature 21 out of 30\n",
"New Test ROC AUC=0.9955541441727532\n",
"All features Test ROC AUC=0.9968243886948238\n",
"Drop in ROC AUC=0.001270244522070585\n",
"keep: worst radius\n",
"\n",
"testing feature: worst texture which is feature 22 out of 30\n",
"New Test ROC AUC=0.9958717053032708\n",
"All features Test ROC AUC=0.9968243886948238\n",
"Drop in ROC AUC=0.0009526833915529664\n",
"remove: worst texture\n",
"\n",
"testing feature: worst perimeter which is feature 23 out of 30\n",
"New Test ROC AUC=0.995871705303271\n",
"All features Test ROC AUC=0.9958717053032708\n",
"Drop in ROC AUC=-1.1102230246251565e-16\n",
"remove: worst perimeter\n",
"\n",
"testing feature: worst area which is feature 24 out of 30\n",
"New Test ROC AUC=0.9938075579549063\n",
"All features Test ROC AUC=0.995871705303271\n",
"Drop in ROC AUC=0.0020641473483646866\n",
"keep: worst area\n",
"\n",
"testing feature: worst smoothness which is feature 25 out of 30\n",
"New Test ROC AUC=0.9939663385201651\n",
"All features Test ROC AUC=0.995871705303271\n",
"Drop in ROC AUC=0.0019053667831058219\n",
"keep: worst smoothness\n",
"\n",
"testing feature: worst compactness which is feature 26 out of 30\n",
"New Test ROC AUC=0.9960304858685296\n",
"All features Test ROC AUC=0.995871705303271\n",
"Drop in ROC AUC=-0.0001587805652586427\n",
"remove: worst compactness\n",
"\n",
"testing feature: worst concavity which is feature 27 out of 30\n",
"New Test ROC AUC=0.9966656081295648\n",
"All features Test ROC AUC=0.9960304858685296\n",
"Drop in ROC AUC=-0.0006351222610352369\n",
"remove: worst concavity\n",
"\n",
"testing feature: worst concave points which is feature 28 out of 30\n",
"New Test ROC AUC=0.9936487773896475\n",
"All features Test ROC AUC=0.9966656081295648\n",
"Drop in ROC AUC=0.00301683073991732\n",
"keep: worst concave points\n",
"\n",
"testing feature: worst symmetry which is feature 29 out of 30\n",
"New Test ROC AUC=0.9976182915211178\n",
"All features Test ROC AUC=0.9966656081295648\n",
"Drop in ROC AUC=-0.0009526833915529664\n",
"remove: worst symmetry\n",
"\n",
"testing feature: worst fractal dimension which is feature 30 out of 30\n",
"New Test ROC AUC=0.9973007303906002\n",
"All features Test ROC AUC=0.9976182915211178\n",
"Drop in ROC AUC=0.00031756113051761847\n",
"remove: worst fractal dimension\n",
"DONE!!\n",
"total features to remove: 12\n",
"total features to keep: 18\n"
]
}
],
"source": [
"# tol decide whether we should drop or keep the feature in current round\n",
"features_to_keep = hybrid.recursive_feature_elimination_rf(X_train=X_train,\n",
" y_train=y_train,\n",
" X_test=X_test,\n",
" y_test=y_test,\n",
" tol=0.001)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['mean radius',\n",
" 'mean texture',\n",
" 'mean compactness',\n",
" 'mean symmetry',\n",
" 'mean fractal dimension',\n",
" 'radius error',\n",
" 'texture error',\n",
" 'perimeter error',\n",
" 'area error',\n",
" 'smoothness error',\n",
" 'compactness error',\n",
" 'concavity error',\n",
" 'concave points error',\n",
" 'fractal dimension error',\n",
" 'worst radius',\n",
" 'worst area',\n",
" 'worst smoothness',\n",
" 'worst concave points']"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"features_to_keep"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Recursive Feature Addition\n",
"### with Random Forests Importance"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Example 1\n",
"recursive feature addition with RandomForest\n",
"with the method same as the guide\n",
"1. Rank the features according to their importance derived from a machine learning algorithm: it can be tree importance, or LASSO / Ridge, or the linear / logistic regression coefficients.\n",
"2. Build a machine learning model with only 1 feature, the most important one, and calculate the model metric for performance.\n",
"3. Add one feature -the most important- and build a machine learning algorithm utilizing the added and any feature from previous rounds.\n",
"4. Calculate a performance metric of your choice: roc-auc, mse, rmse, accuracy.\n",
"5. If the metric increases by more than an arbitrarily set threshold, then that feature is important and should be kept. Otherwise, we can remove that feature.\n",
"6. Repeat steps 2-5 until all features have been removed (and therefore evaluated) and the drop in performance assessed.\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"testing feature: mean texture which is feature 1 out of 30\n",
"New Test ROC AUC=0.9558590028580501\n",
"All features Test ROC AUC=0.9009209272785013\n",
"Increase in ROC AUC=0.054938075579548884\n",
"keep: mean texture\n",
"\n",
"testing feature: mean perimeter which is feature 2 out of 30\n",
"New Test ROC AUC=0.9609399809463322\n",
"All features Test ROC AUC=0.9558590028580501\n",
"Increase in ROC AUC=0.005080978088282007\n",
"keep: mean perimeter\n",
"\n",
"testing feature: mean area which is feature 3 out of 30\n",
"New Test ROC AUC=0.9609399809463322\n",
"All features Test ROC AUC=0.9609399809463322\n",
"Increase in ROC AUC=0.0\n",
"remove: mean area\n",
"\n",
"testing feature: mean smoothness which is feature 4 out of 30\n",
"New Test ROC AUC=0.9684026675134964\n",
"All features Test ROC AUC=0.9609399809463322\n",
"Increase in ROC AUC=0.007462686567164201\n",
"keep: mean smoothness\n",
"\n",
"testing feature: mean compactness which is feature 5 out of 30\n",
"New Test ROC AUC=0.9750714512543665\n",
"All features Test ROC AUC=0.9684026675134964\n",
"Increase in ROC AUC=0.006668783740870099\n",
"keep: mean compactness\n",
"\n",
"testing feature: mean concavity which is feature 6 out of 30\n",
"New Test ROC AUC=0.9933312162591298\n",
"All features Test ROC AUC=0.9750714512543665\n",
"Increase in ROC AUC=0.01825976500476334\n",
"keep: mean concavity\n",
"\n",
"testing feature: mean concave points which is feature 7 out of 30\n",
"New Test ROC AUC=0.9925373134328358\n",
"All features Test ROC AUC=0.9933312162591298\n",
"Increase in ROC AUC=-0.0007939028262939907\n",
"remove: mean concave points\n",
"\n",
"testing feature: mean symmetry which is feature 8 out of 30\n",
"New Test ROC AUC=0.9895204826929185\n",
"All features Test ROC AUC=0.9933312162591298\n",
"Increase in ROC AUC=-0.0038107335662113107\n",
"remove: mean symmetry\n",
"\n",
"testing feature: mean fractal dimension which is feature 9 out of 30\n",
"New Test ROC AUC=0.9892029215624007\n",
"All features Test ROC AUC=0.9933312162591298\n",
"Increase in ROC AUC=-0.00412829469672904\n",
"remove: mean fractal dimension\n",
"\n",
"testing feature: radius error which is feature 10 out of 30\n",
"New Test ROC AUC=0.9895204826929184\n",
"All features Test ROC AUC=0.9933312162591298\n",
"Increase in ROC AUC=-0.0038107335662114217\n",
"remove: radius error\n",
"\n",
"testing feature: texture error which is feature 11 out of 30\n",
"New Test ROC AUC=0.9868212130835186\n",
"All features Test ROC AUC=0.9933312162591298\n",
"Increase in ROC AUC=-0.006510003175611234\n",
"remove: texture error\n",
"\n",
"testing feature: perimeter error which is feature 12 out of 30\n",
"New Test ROC AUC=0.9890441409971419\n",
"All features Test ROC AUC=0.9933312162591298\n",
"Increase in ROC AUC=-0.004287075261987905\n",
"remove: perimeter error\n",
"\n",
"testing feature: area error which is feature 13 out of 30\n",
"New Test ROC AUC=0.989044140997142\n",
"All features Test ROC AUC=0.9933312162591298\n",
"Increase in ROC AUC=-0.004287075261987794\n",
"remove: area error\n",
"\n",
"testing feature: smoothness error which is feature 14 out of 30\n",
"New Test ROC AUC=0.988091457605589\n",
"All features Test ROC AUC=0.9933312162591298\n",
"Increase in ROC AUC=-0.00523975865354076\n",
"remove: smoothness error\n",
"\n",
"testing feature: compactness error which is feature 15 out of 30\n",
"New Test ROC AUC=0.9895204826929184\n",
"All features Test ROC AUC=0.9933312162591298\n",
"Increase in ROC AUC=-0.0038107335662114217\n",
"remove: compactness error\n",
"\n",
"testing feature: concavity error which is feature 16 out of 30\n",
"New Test ROC AUC=0.9911082883455065\n",
"All features Test ROC AUC=0.9933312162591298\n",
"Increase in ROC AUC=-0.0022229279136233293\n",
"remove: concavity error\n",
"\n",
"testing feature: concave points error which is feature 17 out of 30\n",
"New Test ROC AUC=0.9906319466497301\n",
"All features Test ROC AUC=0.9933312162591298\n",
"Increase in ROC AUC=-0.0026992696093997015\n",
"remove: concave points error\n",
"\n",
"testing feature: symmetry error which is feature 18 out of 30\n",
"New Test ROC AUC=0.9876151159098127\n",
"All features Test ROC AUC=0.9933312162591298\n",
"Increase in ROC AUC=-0.0057161003493171325\n",
"remove: symmetry error\n",
"\n",
"testing feature: fractal dimension error which is feature 19 out of 30\n",
"New Test ROC AUC=0.9896792632581772\n",
"All features Test ROC AUC=0.9933312162591298\n",
"Increase in ROC AUC=-0.003651953000952557\n",
"remove: fractal dimension error\n",
"\n",
"testing feature: worst radius which is feature 20 out of 30\n",
"New Test ROC AUC=0.994125119085424\n",
"All features Test ROC AUC=0.9933312162591298\n",
"Increase in ROC AUC=0.0007939028262942127\n",
"remove: worst radius\n",
"\n",
"testing feature: worst texture which is feature 21 out of 30\n",
"New Test ROC AUC=0.9906319466497301\n",
"All features Test ROC AUC=0.9933312162591298\n",
"Increase in ROC AUC=-0.0026992696093997015\n",
"remove: worst texture\n",
"\n",
"testing feature: worst perimeter which is feature 22 out of 30\n",
"New Test ROC AUC=0.9933312162591299\n",
"All features Test ROC AUC=0.9933312162591298\n",
"Increase in ROC AUC=1.1102230246251565e-16\n",
"remove: worst perimeter\n",
"\n",
"testing feature: worst area which is feature 23 out of 30\n",
"New Test ROC AUC=0.9931724356938711\n",
"All features Test ROC AUC=0.9933312162591298\n",
"Increase in ROC AUC=-0.0001587805652586427\n",
"remove: worst area\n",
"\n",
"testing feature: worst smoothness which is feature 24 out of 30\n",
"New Test ROC AUC=0.9933312162591299\n",
"All features Test ROC AUC=0.9933312162591298\n",
"Increase in ROC AUC=1.1102230246251565e-16\n",
"remove: worst smoothness\n",
"\n",
"testing feature: worst compactness which is feature 25 out of 30\n",
"New Test ROC AUC=0.9895204826929184\n",
"All features Test ROC AUC=0.9933312162591298\n",
"Increase in ROC AUC=-0.0038107335662114217\n",
"remove: worst compactness\n",
"\n",
"testing feature: worst concavity which is feature 26 out of 30\n",
"New Test ROC AUC=0.9938075579549063\n",
"All features Test ROC AUC=0.9933312162591298\n",
"Increase in ROC AUC=0.0004763416957764832\n",
"remove: worst concavity\n",
"\n",
"testing feature: worst concave points which is feature 27 out of 30\n",
"New Test ROC AUC=0.9971419498253413\n",
"All features Test ROC AUC=0.9933312162591298\n",
"Increase in ROC AUC=0.0038107335662115327\n",
"keep: worst concave points\n",
"\n",
"testing feature: worst symmetry which is feature 28 out of 30\n",
"New Test ROC AUC=0.9957129247380121\n",
"All features Test ROC AUC=0.9971419498253413\n",
"Increase in ROC AUC=-0.0014290250873292276\n",
"remove: worst symmetry\n",
"\n",
"testing feature: worst fractal dimension which is feature 29 out of 30\n",
"New Test ROC AUC=0.9950778024769769\n",
"All features Test ROC AUC=0.9971419498253413\n",
"Increase in ROC AUC=-0.0020641473483644646\n",
"remove: worst fractal dimension\n",
"DONE!!\n",
"total features to keep: 7\n"
]
}
],
"source": [
"features_to_keep = hybrid.recursive_feature_addition_rf(X_train=X_train,\n",
" y_train=y_train,\n",
" X_test=X_test,\n",
" y_test=y_test,\n",
" tol=0.001)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['mean radius',\n",
" 'mean texture',\n",
" 'mean perimeter',\n",
" 'mean smoothness',\n",
" 'mean compactness',\n",
" 'mean concavity',\n",
" 'worst concave points']"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"features_to_keep"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -0,0 +1,824 @@
**Table of Contents**:
[TOC]
# A Short Guide for Feature Engineering and Feature Selection
Feature engineering and selection is the art/science of converting data to the best way possible, which involve an elegant blend of domain expertise, intuition and mathematics. This guide is a concise reference for beginners with most simple yet widely used techniques for feature engineering and selection. Any comments and commits are most welcome.
## 0. Basic Concepts
### 0.1 What is Machine Learning
> Machine Learning is the science of getting computers to act without being explicitly programmed - [Arthur Samuel](https://simple.wikipedia.org/wiki/Machine_learning)
> Machine Learning is a technique of data science that helps computers learn from existing data in order to forecast future behaviors, outcomes and trends - [Microsoft](https://docs.microsoft.com/en-us/azure/machine-learning/service/overview-what-is-azure-ml)
> The field of Machine Learning seeks to answer the question “How can we build computer systems that automatically improve with experience, and what are the fundamental laws that govern all learning processes?“ - [Carnegie Mellon University](http://www.cs.cmu.edu/~tom/pubs/MachineLearning.pdf)
Narrowly speaking, in data mining context, machine learning (ML) is the process of letting computers to learn from historical data, recognize pattern/relationship within data, and then make predictions.
### 0.2 Methodology
A typical ML workflow/pipeline looks like this:
![workflow](/images/workflow2.png)
Source: Practical Machine Learning with Python, Springer
There can be many ways to divide the tasks that make up the ML workflow into phases. But generally the basic steps are similar as the graph above.
### 0.3 Typical Tasks
| Task | Definition | Example |
| ----------------- | --------------------------------------------- | ------------------------------------ |
| Classification | predict what category new instance belongs to | is the tumor malign/benign? |
| Regression | predict a continuous numeric value | predict house/stock prices in future |
| Anomaly Detection | identify outliers | fraud detection |
| Clustering | separate similar data points into groups | customer segmentation |
### 0.4 Terminology
- **Feature**: also known as Attribute/ Independent Variable/ Predictor/ Input Variable. It's an individual measurable property/characteristic of a phenomenon being observed [[wiki]](https://en.wikipedia.org/wiki/Feature_(machine_learning)). The age of a person, etc.
- **Target**: also known as Dependent Variable/ Response Variable/ Output Variable. It's the variable being predicted in supervised learning.
- **Algorithm**: the specific procedure used to implement a particular ML technique. Linear Regression, etc.
- **Model**: the algorithm applied to a dataset, complete with its settings (its parameters). Y=4.5x+0.8, etc. We want the model that best captures the relationship between features and the target.
- **Supervised learning** : train the model with labeled data to generate reasonable predictions for the response to new data.
- **Unsupervised learning** : train the model with un-labeled data to find intrinsic structures/ patterns within the data.
- **Reinforcement learning**: the model is learned from a series of actions by maximizing a reward function, which can either be maximized by penalizing bad actions and/or rewarding good actions. Self-driving, etc.
## 1. Data Exploration
### 1.1 Variables
**Definition**: any measurable property/characteristic of a phenomenon being observed. They are called 'variables' because the value they take may vary (and it usually does) in a population.
**Types of Variable**
| Type | Sub-type | Definition | Example |
| ----------- | ---------- | ------------------------------------------------------------ | ------------------------------ |
| Categorical | Nominal | Variables with values selected from a group of categories, while not having any kind of natural order. [ref](http://www-ist.massey.ac.nz/dstirlin/CAST/CAST/Hstructures/structures_c2.html) | Gender, car types |
| | Ordinal | A categorical variable whose categories can be meaningfully ordered. [ref](http://www-ist.massey.ac.nz/dstirlin/CAST/CAST/Hstructures/structures_c2.html) | Grade of an exam |
| Numerical | Discrete | Variables whose values are either finite or countably infinite. [wiki](https://en.wikipedia.org/wiki/Continuous_or_discrete_variable) | Number of children in a family |
| | Continuous | Variable which can take on infinitely many, uncountable values. [wiki](https://en.wikipedia.org/wiki/Continuous_or_discrete_variable) | House prices, time passed |
### 1.2 Variable Identification
**Definition**: Identify the data types of each variable.
**Note**: In reality we may have mixed type of variable for a variety of reasons. For example, in credit scoring "Missed payment status" is a common variable that can take values 1, 2, 3 meaning that the customer has missed 1-3 payments in their account. And it can also take the value D, if the customer defaulted on that account. We may have to convert data types after certain steps of data cleaning.
### 1.3 Univariate Analysis
Descriptive statistics on one single variable.
| Variable | What to look |
| ----------- | ------------------------------------------------------------ |
| Categorical | **Shape**:<br />Histogram/ Frequency table... |
| Numerical | **Central Tendency**:<br />Mean/ Median/ Mode<br />**Dispersion**:<br />Min/ Max/ Range/ Quantile/ IQR/ MAD/ Variance/ Standard Deviation/ <br />**Shape**:<br />Skewness/ Histogram/ Boxplot... |
Below are some methods that can give us the basic stats on the variable:
- pandas.Dataframe.describe()
- pandas.Dataframe.dtypes
- Barplot
- Countplot
- Boxplot
- Distplot
### 1.4 Bi-variate Analysis
Descriptive statistics between two or more variables.
- Scatter Plot
- Correlation Plot
- Heat Map
**Scatter Plot** is a type of plot or mathematical diagram using Cartesian coordinates to display values for typically two variables for a set of data. If the pattern of dots slopes from lower left to upper right, it indicates a positive correlation between the variables being studied. If the pattern of dots slopes from upper left to lower right, it indicates a negative correlation. [[wiki]](https://en.wikipedia.org/wiki/Scatter_plot)
**Correlation plot** can be used to quickly find insights. It is used to investigate the dependence between multiple variables at the same time and to highlight the most correlated variables in a data table.
**Heat map** (or heatmap) is a graphical representation of data where the individual values contained in a matrix are represented as colors.
## 2. Feature Cleaning
### 2.1 Missing Values
**Definition**: no value is stored in a certain observation within a variable.
#### 2.1.1 Why Missing Data Matters
- certain algorithms cannot work when missing value are present
- even for algorithm that handle missing data, without treatment the model can lead to inaccurate conclusion
A study on the impact of missing data on different ML algorithm can be found [here](http://core.ecu.edu/omgt/krosj/IMDSDataMining2003.pdf).
#### 2.1.2 Missing Mechanisms[^1]
It is important to understand the mechanisms by which missing fields are introduced in a dataset. Depending on the mechanism, we may choose to process the missing values differently. The mechanisms were first introduced by Rubin[^2].
**Missing Completely at Random**
A variable is missing completely at random (MCAR) if the probability of being missing is the same for all the observations. When data is MCAR, there is absolutely no relationship between the data missing and any other values, observed or missing, within the dataset. In other words, those missing data points are a random subset of the data. There is nothing systematic going on that makes some data more likely to be missing than other.
If values for observations are missing completely at random, then disregarding those cases would not bias the inferences made.
**Missing at Random**
Missing as Random (MAR) occurs when there is a systematic relationship between the propensity of missing values and the observed data. In other words, the probability an observation being missing depends only on available information (other variables in the dataset), but not on the variable itself.
For example, if men are more likely to disclose their weight than women, weight is MAR (on variable gender). The weight information will be missing at random for those men and women that decided not to disclose their weight, but as men are more prone to disclose it, there will be more missing values for women than for men.
In a situation like the above, if we decide to proceed with the variable with missing values, we might benefit from including gender to control the bias in weight for the missing observations.
**Missing Not At Random - Depends on Unobserved Predictors**
Missingness depends on information that has not been recorded, and this information also predicts the missing values. E.g., if a particular treatment causes discomfort, a patient is more likely to drop out of the study (and 'discomfort' is not measured).
In this situation, data sample is biased if we drop those missing cases.
**Missing Not At Random - Depends on Missing Value Itself**
Missingness depends on the (potentially missing) variable itself. E.g., people with higher earnings are less likely to reveal them.
#### 2.1.3 How to Assume a Missing Mechanism
- By **business understanding**. In many situations we can assume the mechanism by probing into the business logic behind that variable.
- By **statistical test**. Divide the dataset into ones with/without missing and perform t-test to see if there's significant differences. If there is, we can assume that missing is not completed at random.
But we should keep in mind that we can hardly 100% be sure that data are MCAR, MAR, or MNAR because unobserved predictors (lurking variables) are unobserved.
#### 2.1.4 How to Handle Missing Data
| Method | Definition | Pros | Cons |
| ------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------ | :----------------------------------------------------------- |
| Listwise Deletion | excluding all cases (listwise) that have missing values | preserve distribution if MCAR | 1. may discard too much data and hurt the model<br>2. may yield biased estimates if not MCAR (as we keep a special subsample from the population) |
| Mean/Median/Mode Imputation | replacing the NA by mean/median/most frequent values (for categorical feature) of that variable | good practice if MCAR | 1. distort distribution<br>2. distort relationship with other variables |
| End of distribution Imputation | replacing the NA by values that are at the far end of the distribution of that variable, calculated by mean + 3*std | Captures the importance of missingness if there is one | 1. distort distribution<br />2. may be considered outlier if NA is few or mask true outlier if NA is many.<br />3. if missingness is not important this may mask the predictive power of the original variable |
| Random Imputation | replacing the NA by taking a random value from the pool of available observations of that variable | preserve distribution if MCAR | not recommended in business settings for its randomness (different result for same input) |
| Arbitrary Value Imputation | replacing the NA by arbitrary values | Captures the importance of missingness if there is one | 1. distort distribution<br />2. typical used value: -9999/9999. But be aware it may be regarded as outliers. |
| Add a variable to denote NA | creating an additional variable indicating whether the data was missing for that observation | Captures the importance of missingness if there is one | expand feature space |
In real settings, when it's hard to decide the missing mechanism or there's few time to study deeply about each missing variables, the popular way is to adopt:
- Mean/Median/Mode Imputation (depend on the distribution)
- End of distribution Imputation
- Add a variable to denote NA
simultaneously, so that we both catch the value of missingness and obtain a complete dataset.
**Note**: Some algorithms like XGboost incorporate missing data treatment into its model building process, so you don't need to do the step. However it's important to make sure you understand how the algorithm treat them and explain to the business team.
### 2.2 Outliers
**Definition**: An outlier is an observation which deviates so much from the other observations as to arouse suspicions that it was generated by a different mechanism.[^3]
**Note**: Outliers, depending on the context, either deserve special attention or should be completely ignored. For example, an unusual transaction on a credit card is usually a sign of fraudulent activity, while a height of 1600cm of a person is very likely due to measurement error and should be filter out or impute with something else.
#### 2.2.1 Why Outlier Matters
The presence of outliers may:
- make algorithm not work properly
- introduce noises to dataset
- make samples less representative
Some algorithms are very sensitive to outliers, For example, Adaboost may treat outliers as "hard" cases and put tremendous weights on outliers, therefore producing a model with bad generalization. Any algorithms that rely on means/variance are sensitive to outliers as those stats are greatly influenced by extreme values.
On the other hand some algorithm are more robust to outliers. For example, decision trees tend to ignore the presence of outliers when creating the branches of their trees. Typically, trees make splits by asking if variable x >= value t, and therefore the outlier will fall on each side of the branch, but it will be treated equally as the remaining values, regardless of its magnitude.
#### 2.2.2 Outlier Detection
In fact outlier analysis and anomaly detection is a huge field of research. Charu's book "Outlier Analysis"[^4] offer a great insight into the topic. PyOD[^5] is a comprehensive Python toolkit which contains many of the advanced methods in this field.
All the methods here listed are for univariate outlier detection. Multivariate outlier detection is beyond the scope of this guide.
| Method | Definition | Pros | Cons |
| ---------------------------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
| Detect by arbitrary boundary | identify outliers based on arbitrary boundaries | flexiable | require business understanding |
| Mean & Standard Deviation method[^6][^7] | outlier detection by Mean & Standard Deviation Method | good for variable with Gaussian distribution (68-95-99 rule) | sensitive to extreme value itself (as the outlier increase the sd) |
| IQR method[^8] | outlier detection by Interquartile Ranges Rule | robust than Mean & SD method as it use quantile & IQR. Resilient to extremes. | can be too aggressive |
| MAD method[^6][^7] | outlier detection by Median and Median Absolute Deviation Method | robust than Mean & SD method. Resilient to extremes. | can be too aggressive |
However, beyond these methods, it's more important to keep in mind that the business context should govern how you define and react to these outliers. The meanings of your findings should be dictated by the underlying context, rather than the number itself.
#### 2.2.3 How to Handle Outliers
| Method | Definition | Pros | Cons |
| ------------------------------- | ------------------------------------------------------------ | -------------------------------- | ------------------------------------------- |
| Mean/Median/Mode Imputation | replacing the outlier by mean/median/most frequent values of that variable | preserve distribution | lose information of outlier if there is one |
| Discretization | transform continuous variables into discrete variables | minimize the impact from outlier | lose information of outlier if there is one |
| Imputation with arbitrary value | impute outliers with arbitrary value. | flexiable | hard to decide the value |
| Windsorization | top-coding & bottom coding (capping the maximum of a distribution at an arbitrarily set value, vice versa). | prevent model over-fitting | distort distribution |
| Discard outliers | drop all the observations that are outliers | / | lose information of outlier if there is one |
**Note**: A detailed guide of doing windsorization can be found [here](https://www.statisticshowto.datasciencecentral.com/winsorize/).
There are many strategies for dealing with outliers in data, and depending on the context and data set, any could be the right or the wrong way. Its important to investigate the nature of the outlier before deciding.
### 2.3 Rare Values
**Definition**: Categorical variable with some of its values appear only seldomly.
**Note**: In some situations rare values, like outliers, may contains valuable information of the dataset and therefore need particular attention. For example, a rare value in transaction may denote fraudulent.
#### 2.3.1 Why Rare Value Matters
- Rare values in categorical variables tend to cause over-fitting, particularly in **tree based** methods.
- A big number of infrequent labels adds noise, with little information, therefore causing over-fitting.
- Rare labels may be present in training set, but not in test set, therefore causing over-fitting to the train set.
- Rare labels may appear in the test set, and not in the train set. Thus, the model will not know how to evaluate it.
#### 2.3.2 How to Handle Rare Value
| Method | Definition |
| ------------------------------ | ------------------------------------------------------------ |
| Mode Imputation | Replacing the rare label by most frequent label |
| Grouping into one new category | Grouping the observations that show rare labels into a unique category |
Depending on the situation, we may use different strategies:
- when **there's one predominant category (over 90%)** in the variable: observe the relationship between that variable and the target, then either discard that variable, or keep it as it was. In this case, variable often is not useful for prediction as it is quasi-constant (as we will later see in Feature Selection part).
- when **there's a small number of categories**: keep it as it was. Because only few categories are unlikely to bring so much noise.
- when **there's high cardinality**: try the 2 methods above. But it does not guarantee better results than original variable.
### 2.4 High Cardinality
**Definition**: The number of labels within a categorical variable is known as cardinality. A high number of labels within a variable is known as high cardinality.
#### 2.4.1 Why High Cardinality Matters
- Variables with too many labels tend to dominate over those with only a few labels, particularly in **tree based** algorithms.
- A big number of labels within a variable may introduce noise with little if any information, therefore making the machine learning models prone to over-fit.
- Some of the labels may only be present in the training data set, but not in the test set, therefore causing algorithms to over-fit the training set.
- Contrarily, new labels may appear in the test set that were not present in the training set, therefore leaving algorithm unable to perform a calculation over the new observation.
#### 2.4.2 How to Handle High Cardinality
| Method |
| ------------------------------------------------------ |
| Grouping labels with business understanding |
| Grouping labels with rare occurrence into one category |
| Grouping labels with decision tree |
All these methods attempt to group some of the labels and reduce cardinality. Grouping labels with decision tree is equivalent to the method introduced in section 3.2.2 Discretization with decision tree, which aims to merge labels into more homogenous groups. Grouping labels with rare occurrence into one category is equivalent to method in section 2.3.2.
## 3. Feature Engineering
### 3.1 Feature Scaling
**Definition**: Feature scaling is a method used to standardize the range of independent variables or features of data. In data processing, it is also known as data normalization and is generally performed during the data preprocessing step.
#### 3.1.1 Why Feature Scaling Matters
- If range of inputs varies, in some algorithms, object functions will not work properly.
- **Gradient descent** converges much faster with feature scaling done. Gradient descent is a common optimization algorithm used in logistic regression, SVMs, neural networks etc.
- Algorithms that involve **distance calculation** like KNN, Clustering are also affected by the magnitude of the feature. Just consider how Euclidean distance is calculated: taking the square root of the sum of the squared differences between observations. This distance can be greatly affected by differences in scale among the variables. Variables with large variances have a larger effect on this measure than variables with small variances.
**Note**: Tree-based algorithms are almost the only algorithms that are not affected by the magnitude of the input, as we can easily see from how trees are built. When deciding how to make a split, tree algorithm look for decisions like "whether feature value X>3.0" and compute the purity of the child node after the split, so the scale of the feature does not count.
#### 3.1.2 How to Handle Feature Scaling
| Method | Definition | Pros | Cons |
| ------------------------------------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
| Normalization - Standardization (Z-score scaling) | removes the mean and scales the data to unit variance.<br />z = (X - X.mean) / std | feature is rescaled to have a standard normal distribution that centered around 0 with SD of 1 | compress the observations in the narrow range if the variable is skewed or has outliers, thus impair the predictive power. |
| Min-Max scaling | transforms features by scaling each feature to a given range. Default to [0,1].<br />X_scaled = (X - X.min / (X.max - X.min) | / | compress the observations in the narrow range if the variable is skewed or has outliers, thus impair the predictive power. |
| Robust scaling | removes the median and scales the data according to the quantile range (defaults to IQR)<br />X_scaled = (X - X.median) / IQR | better at preserving the spread of the variable after transformation for skewed variables | / |
A comparison of three methods when facing outliers:
<div align=center>
![scaling](/images/scaling.png)
[img source](https://stackoverflow.com/questions/51841506/data-standardization-vs-normalization-vs-robust-scaler)
As we can see, Normalization - Standardization and Min-Max method will compress most data to a narrow range, while robust scaler does a better job at keeping the spread of the data, although it cannot **remove** the outlier from the processed result. Remember removing/imputing outliers is another topic in data cleaning and should be done beforehand.
Experience on how to choose feature scaling method:
- if your feature is not Gaussian like, say, has a skewed distribution or has outliers, Normalization - Standardization is not a good choice as it will compress most data to a narrow range.
- However, we can transform the feature into Gaussian like and then use Normalization - Standardization. Feature transformation will be discussed in section 3.4
- When performing distance or covariance calculation (algorithm like Clustering, PCA and LDA), it is better to use Normalization - Standardization as it will remove the effect of scales on variance and covariance. Explanation [here](https://blog.csdn.net/zbc1090549839/article/details/44103801).
- Min-Max scaling has the same drawbacks as Normalization - Standardization, and also new data may not be bounded to [0,1] as they can be out of the original range. Some algorithms, for example some deep learning network prefer input on a 0-1 scale so this is a good choice.
Below is some additional resource on this topic:
- A comparison of the three methods when facing skewed variables can be found [here](https://scikit-learn.org/stable/auto_examples/preprocessing/plot_all_scaling.html#sphx-glr-auto-examples-preprocessing-plot-all-scaling-py).
- An in-depth study of feature scaling can be found [here](http://sebastianraschka.com/Articles/2014_about_feature_scaling.html).
### 3.2 Discretize
**Definition**: Discretization is the process of transforming continuous variables into discrete variables by creating a set of contiguous intervals that spans the range of the variable's values.
#### 3.2.1 Why Discretize Matters
- help to improve model performance by grouping of similar attributes with similar predictive strengths
- enhance interpretability with grouped values
- minimize the impact of **extreme values/seldom reversal patterns**
- prevent overfitting possible with numerical variables
- allow feature interaction between continuous variables (section 3.5.5)
#### 3.2.2 How to Handle Discretization
| Method | Definition | Pros | Cons |
| ----------------------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
| Equal width binning | divides the scope of possible values into N bins of the same width | / | sensitive to skewed distribution |
| Equal frequency binning | divides the scope of possible values of the variable into N bins, where each bin carries the same amount of observations | may help boost the algorithm's performance | this arbitrary binning may disrupt the relationship with the target |
| K-means binning | using k-means to partition values into clusters | / | needs hyper-parameter tuning |
| Discretization using decision trees | using a decision tree to identify the optimal splitting points that would determine the bins | observations within each bin are more similar to themselves than to those of other bins | 1. may cause over-fitting<br>2. may not get a good performing tree |
| ChiMerge[^11] | supervised hierarchical bottom-up (merge) method that locally exploits the chi-square criterion to decide whether two adjacent intervals are similar enough to be merged | robust and make use of a priori knowledge | cannot handle unlabeled data |
In general there's no best choice of discretization method. It really depends on the dataset and the following learning algorithm. Study carefully about your features and context before deciding. You can also try different methods and compare the model performance.
Some literature reviews on feature discretization can be found [here1](https://pdfs.semanticscholar.org/94c3/d92eccbb66f571153f99b7ae6c6167a00923.pdf), [here2](http://robotics.stanford.edu/users/sahami/papers-dir/disc.pdf), [here3](http://axon.cs.byu.edu/papers/ventura.thesis.ps).
### 3.3 Feature Encoding
#### 3.3.1 Why Feature Encoding Matters
We must transform strings of categorical variables into numbers so that algorithms can handle those values. Even if you see an algorithm can take into categorical inputs, it's most likely that the algorithm incorporate the encoding process within.
#### 3.3.2 How to Handle Feature Encoding
| Method | Definition | Pros | Cons |
| ------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
| One-hot encoding | replace the categorical variable by different boolean variables (0/1) to indicate whether or not certain label is true for that observation | keep all information of that variable | 1. expand feature space dramatically if too many labels in that variable<br />2. does not add additional value to make the variable more predictive |
| Ordinal-encoding | replace the labels by some ordinal number if ordinal is meaningful | straightforward | does not add additional value to make the variable more predictive |
| Count/frequency encoding | replace each label of the categorical variable by the count/frequency within that category | / | 1. may yield same encoding for two different labels (if they appear same times) and lose valuable info.<br />2. may not add predictive power |
| Mean encoding | replace the label by the mean of the target for that label. (the target must be 0/1 valued or continuous) | 1. Capture information within the label, therefore rendering more predictive features<br/>2. Create a monotonic relationship between the variable and the target<br>3. Do not expand the feature space | Prone to cause over-fitting |
| WOE encoding[^9] | replace the label with Weight of Evidence of each label. WOE is computed from the basic odds ratio: ln( (Proportion of Good Outcomes) / (Proportion of Bad Outcomes)) | 1. Establishes a monotonic relationship to the dependent variable<br/>2. Orders the categories on a "logistic" scale which is natural for logistic regression<br>3The transformed variables, can then be compared because they are on the same scale. Therefore, it is possible to determine which one is more predictive. | 1. May incur in loss of information (variation) due to binning to few categories<br/>2. Prone to cause over-fitting |
| Target encoding[^10] | Similar to mean encoding, but use both posterior probability and prior probability of the target | 1. Capture information within the label, therefore rendering more predictive features<br/>2. Create a monotonic relationship between the variable and the target<br/>3. Do not expand the feature space | Prone to cause over-fitting |
**Note**: if we are using one-hot encoding in linear regression, we should keep k-1 binary variable to avoid multicollinearity. This is true for any algorithms that look at all features at the same time during training. Including SVM, neural network and clustering. Tree-based algorithm, on the other hand, need the entire set of binary variable to select the best split.
An in-detail intro to WOE can be found [here](http://documentation.statsoft.com/StatisticaHelp.aspx?path=WeightofEvidence/WeightofEvidenceWoEIntroductoryOverview).
### 3.4 Feature Transformation
#### 3.4.1 Why Feature Transformation Matters
##### 3.4.1.1 Linear Assumption
**Regression**
Linear regression is a straightforward approach for predicting a quantitative response Y on the basis of a different predictor variable X1, X2, ... Xn. It assumes that there is a linear relationship between X(s) and Y. Mathematically, we can write this linear relationship as Y ≈ β0 + β1X1 + β2X2 + ... + βnXn.
**Classification**
Similarly, for classification, Logistic Regression assumes a linear relationship between the variables and the log of the odds.
Odds = p / (1 - p), where p is the probability of y = 1
log(odds) = β0 + β1X1 + β2X2 + ... + βnXn
**Why it's important to follow linear assumption**
If the machine learning model assumes a linear dependency between the predictors Xs and the outcome Y, when there is not such a linear relationship, the model will have a poor performance. In such cases, we are better off trying another machine learning model that does not make such assumption.
If there is no linear relationship and we have to use the linear/logistic regression models, mathematical transformation/discretization may help create the relationship, though it cannot guarantee a better result.
##### 3.4.1.2 Variable Distribution
**Linear Regression Assumptions**
Linear Regression has the following assumptions over the predictor variables X:
- Linear relationship with the outcome Y
- Multivariate normality
- No or little multicollinearity
- Homoscedasticity
Normality assumption means that every variable X should follow a Gaussian distribution.
Homoscedasticity, also known as homogeneity of variance, describes a situation in which the error term (that is, the “noise” or random disturbance in the relationship between the independent variables (Xs) and the dependent variable (Y)) is the same across all values of the independent variables.
Violations in the assumptions of homoscedasticity and / or normality (assuming a distribution of data is homoscedastic or Gaussian, when in reality it is not) may result in poor model performance.
The remaining machine learning models, including Neural Networks, Support Vector Machines, Tree based methods and PCA do not make any assumption over the distribution of the independent variables. However, in many occasions the model performance may **benefit from a "Gaussian-like" distribution**.
Why may models benefit from a "Gaussian-like" distributions? In variables with a normal distribution, the observations of X available to predict Y vary across a greater range of values, that is, the values of X are "spread" over a greater range.
In the situations above, transformation of the original variable can help give the variable more of a bell-shape of the Gaussian distribution.
#### 3.4.2 How to Handle Feature Transformation
| Method | Definition |
| --------------------------- | -------------------------------------------------------- |
| Logarithmic transformation | log(x+1). We use (x+1) instead of x to avoid value of 0 |
| Reciprocal transformation | 1/x. Warning that x should not be 0. |
| Square root transformation | x**(1/2) |
| Exponential transformation | X**(m) |
| Box-cox transformation[^12] | (X**λ-1)/λ |
| Quantile transformation | transform features using quantiles information |
**Log transformation** is useful when applied to skewed distributions as they tend to expand the values which fall in the range of lower magnitudes and tend to compress or reduce the values which fall in the range of higher magnitudes, which helps to make the skewed distribution as normal-like as possible. **Square root transformation** does a similar thing in this sense.
**Box-Cox transformation** in sklearn[^13] is another popular function belonging to the power transform family of functions. This function has a pre-requisite that the numeric values to be transformed must be positive (similar to what log transform expects). In case they are negative, shifting using a constant value helps. Mathematically, the Box-Cox transform function can be denoted as follows.
![](images/box-cox.png)
**Quantile transformation** in sklearn[^14] transforms the features to follow a uniform or a normal distribution. Therefore, for a given feature, this transformation tends to spread out the most frequent values. It also reduces the impact of (marginal) outliers: this is therefore a robust preprocessing scheme. However, this transform is non-linear. It may distort linear correlations between variables measured at the same scale but renders variables measured at different scales more directly comparable.
We can use **Q-Q plot** to check if the variable is normally distributed (a 45 degree straight line of the values over the theoretical quantiles) after transformation.
Below is an example showing the effect of sklearn's Box-plot/Yeo-johnson/Quantile transform to map data from various distributions to a normal distribution.
<div align=center>
![sphx_glr_plot_map_data_to_normal_001](.\images\sphx_glr_plot_map_data_to_normal_001.png)
[img source](https://scikit-learn.org/stable/auto_examples/preprocessing/plot_map_data_to_normal.html#sphx-glr-auto-examples-preprocessing-plot-map-data-to-normal-py)
On “small” datasets (less than a few hundred points), the quantile transformer is prone to overfitting. The use of the power transform is then recommended.
### 3.5 Feature Generation
**Definition**: Creating new features as a combination of existing ones. It's a great way to add domain knowledge to the dataset.
#### 3.5.1 Missing Data Derived Feature
As mentioned in section 2.1, we can create new binary feature denoting whether the observations have missing value on raw feature with value 0/1.
#### 3.5.2 Simple Statistical Derived Feature
Creating new features by performing simple statistical calculations on the raw features, including:
- count/sum
- average/median/mode
- max/min/stddev/variance/range/IQR/Coefficient of Variation
- time span/interval
Take call log for example, we can create new features like: number of calls, number of call-in/call-out, average calling duration, monthly average calling duration, max calling duration, etc.
#### 3.5.3 Feature Crossing
After having some simple statistical derived features, we can have them crossed together. Common dimensions used for crossing include:
- time
- region
- business types
Still take call log for example, we can have crossed features like: number of calls during night times/day times, number of calls under different business types (banks/taxi services/travelling/hospitalities), number of calls during the past 3 months, etc. Many of the statistical calculations mentioned in section 3.5.2 can be used again to create more features.
**Note**: An open-source python framework named **Featuretools** that helps automatically generate such features can be found [here](https://github.com/Featuretools/featuretools).
![featuretools](images/featuretools.png)
Personally I haven't used it in practice. You may try and discover if it can be of industry usage.
#### 3.5.4 Ratios and Proportions
Common techniques. For example, in order to predict future performance of credit card sales of a branch, ratios like credit card sales / sales person or credit card sales / marketing spend would be more powerful than just using absolute number of card sold in the branch.
#### 3.5.5 Cross Products between Categorical Features
Consider a categorical feature A, with two possible values {A1, A2}. Let B be a feature with possibilities {B1, B2}. Then, a feature-cross between A & B would take one of the following values: {(A1, B1), (A1, B2), (A2, B1), (A2, B2)}. You can basically give these combinations any names you like. Just remember that every combination denotes a synergy between the information contained by the corresponding values of A and B.
This is an extremely useful technique, when certain features together denote a property better than individually by themselves. Mathematically speaking, you are doing a cross product between all possible values of the categorical features. The concepts is similar to Feature Crossing of section 3.5.3, but this one particularly refers to the crossing between 2 categorical features.
#### 3.5.6 Polynomial Expansion
The cross product can also be applied to numerical features, which results in a new interaction feature between A and B. This can be done easily be sklearn's [PolynomialFeatures](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html#sklearn.preprocessing.PolynomialFeatures), which generate a new feature set consisting of all polynomial combinations of the features with degree less than or equal to the specified degree. For example, three raw features {X1, X2, X3} can generate a feature set of {1, X1X2, X1X3, X2X3, X1X2X3} with a degree of 2.
#### 3.5.7 Feature Learning by Trees
In tree-based algorithms, each sample will be assigned to a particular leaf node. The decision path to each node can be seen as a new non-linear feature, and we can create N new binary features where n equals to the total number of leaf nodes in a tree or tree ensembles. The features can then be fed into other algorithms such as logistic regression.
The idea of using tree algorithm to generate new features is first introduced by Facebook in this [paper](http://quinonero.net/Publications/predicting-clicks-facebook.pdf).
The good things about this method is that we can get a complex combinations of several features together, which is informative (as is constructed by the tree's learning algorithm). This saves us much time compared to doing feature crossing manually, and is widely used in CTR (click-through rate) of online advertising industry.
#### 3.5.8 Feature Learning by Deep Networks
As we can see from all above, feature generation by manual takes lots of effort and may not guarantee good returns, particular when we have huge amounts of features to work with. Feature learning with trees can be seen as an early attempt in creating features automatically, and with the deep learning methods come into fashion from around 2016, they also have achieved some success in this area, such as **autoencoders** and **restricted Boltzmann machines**. They have been shown to automatically and in a unsupervised or semi-supervised way, learn abstract representations of features (a compressed form), that in turn have supported state-of-the-art results in domains such as speech recognition, image classification, object recognition and other areas. However, such features have limited interpretability and deep learning require much more data to be able to extract high quality result.
## 4. Feature Selection
**Definition**: Feature Selection is the process of selecting a subset of relevant features for use in machine learning model building.
It is not always the truth that the more data, the better the result will be. Including irrelevant features (the ones that are just unhelpful to the prediction) and redundant features (irrelevant in the presence of others) will only make the learning process overwhelmed and easy to cause overfitting.
With feature selection, we can have:
- simplification of models to make them easier to interpret
- shorter training times and lesser computational cost
- lesser cost in data collection
- avoid the curse of dimensionality
- enhanced generalization by reducing overfitting
We should keep in mind that different feature subsets render optimal performance for different algorithms. So it's not a separate process along with the machine learning model training. Therefore, if we are selecting features for a linear model, it is better to use selection procedures targeted to those models, like importance by regression coefficient or Lasso. And if we are selecting features for trees, it is better to use tree derived importance.
### 4.1 Filter Method
Filter methods select features based on a performance measure regardless of the ML algorithm later employed.
Univariate filters evaluate and rank a single feature according to a certain criteria, while multivariate filters evaluate the entire feature space. Filter methods are:
- selecting variable regardless of the model
- less computationally expensive
- usually give lower prediction performance
As a result, filter methods are suited for a first step quick screen and removal of irrelevant features.
| Method | Definition |
| ------------------------- | ------------------------------------------------------------ |
| Variance | removing features that show the same value for the majority/all of the observations (constant/quasi-constant features) |
| Correlation | remove features that are highly correlated with each other |
| Chi-Square | Compute chi-squared stats between each non-negative feature and class |
| Mutual Information Filter | Mutual information measures how much information the presence/absence of a feature contributes to making the correct prediction on Y. |
| Univariate ROC-AUC or MSE | builds one decision tree per feature, to predict the target, then make predictions and ranks the features according to the machine learning metric (roc-auc or mse) |
| Information Value (IV) | a byproduct of WOE. <br>IV = Σ(Proportion of Good Outcomes - Proportion of Bad Outcomes) * WOE |
WOE encoding (see section 3.3.2) and IV often go hand in hand in scorecard development. The two concepts both derived from logistic regression and is kind of standard practice in credit card industry. IV is a popular and widely used measure as there are very convenient rules of thumb for variables selection associated with IV as below:
![sphx_glr_plot_map_data_to_normal_001](.\images\IV.png)
However, all these filtering methods fail to consider the interaction between features and may reduce our predict power. Personally I only use variance and correlation to filter some absolutely unnecessary features.
**Note**: One thing to keep in mind when using chi-square test or univariate selection methods, is that in very big datasets, most of the features will show a small p_value, and therefore look like they are highly predictive. This is in fact an effect of the sample size. So care should be taken when selecting features using these procedures. An ultra tiny p_value does not highlight an ultra-important feature, it rather indicates that the dataset contains too many samples.
**Note**: Correlated features do not necessarily affect model performance (trees, etc), but high dimensionality does and too many features hurt model interpretability. So it's always better to reduce correlated features.
### 4.2 Wrapper Method
Wrappers use a search strategy to search through the space of possible feature subsets and evaluate each subset by the quality of the performance on a ML algorithm. Practically any combination of search strategy and algorithm can be used as a wrapper. It is featured as:
- use ML models to score the feature subset
- train a new model on each subset
- very computationally expensive
- usually provide the best performing subset for a give ML algorithm, but probably not for another
- need an arbitrary defined stopping criteria
The most common **search strategy** group is Sequential search, including Forward Selection, Backward Elimination and Exhaustive Search. Randomized search is another popular choice, including Evolutionary computation algorithms such as genetic, and Simulated annealing.
Another key element in wrappers is **stopping criteria**. When to stop the search? In general there're three:
- performance increase
- performance decrease
- predefined number of features is reached
#### 4.2.1 Forward Selection
Step forward feature selection starts by evaluating all features individually and selects the one that generates the best performing algorithm, according to a pre-set evaluation criteria. In the second step, it evaluates all possible combinations of the selected feature and a second feature, and selects the pair that produce the best performing algorithm based on the same pre-set criteria.
The pre-set criteria can be the roc_auc for classification and the r squared for regression for example.
This selection procedure is called greedy, because it evaluates all possible single, double, triple and so on feature combinations. Therefore, it is quite computationally expensive, and sometimes, if feature space is big, even unfeasible.
There is a special package for python that implements this type of feature selection: [mlxtend](https://github.com/rasbt/mlxtend).
#### 4.2.2 Backward Elimination
Step backward feature selection starts by fitting a model using all features. Then it removes one feature. It will remove the one that produces the highest performing algorithm (least statistically significant) for a certain evaluation criteria. In the second step, it will remove a second feature, the one that again produces the best performing algorithm. And it proceeds, removing feature after feature, until a certain criteria is met.
The pre-set criteria can be the roc_auc for classification and the r squared for regression for example.
#### 4.2.3 Exhaustive Feature Selection
In an exhaustive feature selection the best subset of features is selected, over all possible feature subsets, by optimizing a specified performance metric for a certain machine learning algorithm. For example, if the classifier is a logistic regression and the dataset consists of **4** features, the algorithm will evaluate all **15** feature combinations as follows:
- all possible combinations of 1 feature
- all possible combinations of 2 features
- all possible combinations of 3 features
- all the 4 features
and select the one that results in the best performance (e.g., classification accuracy) of the logistic regression classifier.
This exhaustive search is very computationally expensive. In practice for this computational cost, it is rarely used.
#### 4.2.4 Genetic Algorithm
TODO
### 4.3 Embedded Method
Embedded Method combine the advantages of the filter and wrapper methods. A learning algorithm takes advantage of its own variable selection process and performs feature selection and classification at same time. Common embedded methods include Lasso and various types of tree-based algorithms. It is featured as:
- perform feature selection as part of the model building process
- consider interactions between features
- less computationally expensive as it only train the model once, compared to Wrappers
- usually provide the best performing subset for a give ML algorithm, but probably not for another
#### 4.3.1 Regularization with Lasso
Regularization consists in adding a penalty to the different parameters of the machine learning model to reduce the freedom of the model. Hence, the model will be less likely to fit the noise of the training data so less likely to be overfitting.
In linear model regularization, the penalty is applied over the coefficients that multiply each of the predictors. For linear models there are in general 3 types of regularization:
- L1 regularization (Lasso)
- L2 regularization (Ridge)
- L1/L2 (Elastic net)
From the different types of regularization, **Lasso (L1)** has the property that is able to shrink some of the coefficients to zero. Therefore, that feature can be removed from the model.
Both for linear and logistic regression we can use the Lasso regularization to remove non-important features. Keep in mind that increasing the penalization will increase the number of features removed. Therefore, you will need to keep an eye and monitor that you don't set a penalty too high so that to remove even important features, or too low and then not remove non-important features.
Having said this, if the penalty is too high and important features are removed, you should notice a drop in the performance of the algorithm and then realize that you need to decrease the regularization.
Regularization is a large topic. For for information you can refer to here:
- [Least angle and l1 penalised regression: A review](https://projecteuclid.org/download/pdfview_1/euclid.ssu/1211317636)
- [Penalised feature selection and classification in bioinformatics](https://www.ncbi.nlm.nih.gov/pubmed/18562478)
- [Feature selection for classification: A review](https://web.archive.org/web/20160314145552/http://www.public.asu.edu/~jtang20/publication/feature_selection_for_classification.pdf)
- [Machine Learning Explained: Regularization](https://www.r-bloggers.com/machine-learning-explained-regularization/)
#### 4.3.2 Random Forest Importance
Random forests are one of the most popular machine learning algorithms. They are so successful because they provide in general a good predictive performance, low overfitting and easy interpretability. This interpretability is given by the fact that it is straightforward to derive the importance of each variable on the tree decision. In other words, it is easy to compute how much each variable is contributing to the decision.
Random forest is a bagging algorithm consists a bunch of base estimators (decision trees), each of them built over a random extraction of the observations from the dataset and a random extraction of the features. Not every tree sees all the features or all the observations, and this guarantees that the trees are **de-correlated** and therefore **less prone to over-fitting.**
Each tree is also a sequence of yes-no questions based on a single or combination of features. At each split, the question divides the dataset into 2 buckets, each of them hosting observations that are more similar among themselves and different from the ones in the other bucket. Therefore, the importance of each feature is derived by how "**pure**" each of the buckets is.
For classification, the measure of impurity is either the **Gini impurity** or the **information gain/entropy**. For regression the measure of impurity is **variance**. Therefore, when training a tree, it is possible to compute how much each feature decreases the impurity. The more a feature decreases the impurity, the more important the feature is. In random forests, the impurity decrease from each feature can be averaged across trees to determine the final importance of the variable.
Selecting features by using tree derived feature importance is a very straightforward, fast and generally accurate way of selecting good features for machine learning. In particular, if you are going to build tree methods.
However, correlated features will show in a tree similar and lowered importance, compared to what their importance would be if the tree was built without correlated counterparts.
**Limitation**
- correlated features show similar importance
- correlated features importance is lower than real importance, when tree is build without its correlated counterparts
- high carinal variable tend to show higher importance
#### 4.3.3 Gradient Boosted Trees Importance
Similarly to selecting features using Random Forests derived feature importance, we can select features based on the importance derived by gradient boosted trees. And we can do that in one go, or in a recursive manner, depending on how much time we have, how many features are in the dataset, and whether they are correlated or not.
### 4.4 Feature Shuffling
A popular method of feature selection consists in random shuffling the values of a specific variable and determining how that permutation affects the performance metric of the machine learning algorithm. In other words, the idea is to permute the values of each feature, one at the time, and measure how much the permutation decreases the accuracy, or the roc_auc, or the mse of the machine learning model. If the variables are important, this is, highly predictive, a random permutation of their values will decrease dramatically any of these metrics. Contrarily, non-important / non-predictive variables, should have little to no effect on the model performance metric we are assessing.
### 4.5 Hybrid Method
#### 4.5.1 Recursive Feature Elimination
This method consists of the following steps:
1. Rank the features according to their importance derived from a machine learning algorithm: it can be tree importance, or LASSO / Ridge, or the linear / logistic regression coefficients.
2. Remove one feature -the least important- and build a machine learning algorithm utilizing the remaining features.
3. Calculate a performance metric of your choice: roc-auc, mse, rmse, accuracy.
4. If the metric decreases by more of an arbitrarily set threshold, then that feature is important and should be kept. Otherwise, we can remove that feature.
5. Repeat steps 2-4 until all features have been removed (and therefore evaluated) and the drop in performance assessed.
The method combines the selection process like wrappers and feature importance derivation from ML models like embedded methods so it's called hybrid.
The difference between this method and the step backwards feature selection lies in that it does not remove all features first in order to determine which one to remove. It removes the least important one, based on the machine learning model derived importance. And then, it makes an assessment as to whether that feature should be removed or not. So it removes each feature only once during selection, whereas step backward feature selection removes all the features at each step of selection.
This method is therefore faster than wrapper methods and generally better than embedded methods. In practice it works extremely well. It does also account for correlations (depending on how stringent you set the arbitrary performance drop threshold). On the downside, the drop in performance assessed to decide whether the feature should be kept or removed, is set arbitrarily. The smaller the drop the more features will be selected, and vice versa.
**Example: Recursive Feature Elimination with Random Forests Importance**
As we talked about in section 4.3.2, Random Forests assign equal or similar importance to features that are highly correlated. In addition, when features are correlated, the importance assigned is lower than the importance attributed to the feature itself, should the tree be built without the correlated counterparts.
Therefore, instead of eliminating features based on importance **at one time** (from all initial features), we may get a better selection by removing one feature **recursively**, and recalculating the importance on each round.
In this situation, when a feature that is highly correlated to another one is removed, then, the importance of the remaining feature increases. This may lead to a better subset feature space selection. On the downside, building several random forests is quite time consuming, in particular if the dataset contains a high number of features.
#### 4.5.2 Recursive Feature Addition
This method consists of the following steps:
1. Rank the features according to their importance derived from a machine learning algorithm: it can be tree importance, or LASSO / Ridge, or the linear / logistic regression coefficients.
2. Build a machine learning model with only 1 feature, the most important one, and calculate the model metric for performance.
3. Add one feature -the most important- and build a machine learning algorithm utilizing the added and any feature from previous rounds.
4. Calculate a performance metric of your choice: roc-auc, mse, rmse, accuracy.
5. If the metric increases by more than an arbitrarily set threshold, then that feature is important and should be kept. Otherwise, we can remove that feature.
6. Repeat steps 2-5 until all features have been removed (and therefore evaluated) and the drop in performance assessed.
The difference between this method and the step forward feature selection is similar. It does not look for all features first in order to determine which one to add, so it's faster than wrappers.
### 4.6 Dimensionality Reduction
- PCA主成分分析
- SVD奇异值分解
## 5. Data Leakage
This section is a remainder to myself as I have had made huge mistakes because of not aware of the problem. Data leakage is when information from outside the training dataset is used to create the model[^15]. The result is that you may be creating overly optimistic models that are practically useless and cannot be used in production. The model shows great result on both your training and testing data but in fact it's not because your model really has a good generalizability but it uses information from the test data.
While it is well known to use cross-validation or at least separate a validation set in training and evaluating the models, people may easily forget to do the same during the feature engineering & selection process. Keep in mind that the test dataset must not be used in any way to make choices about the model, including feature engineering & selection.
------
**Reference**
[^1]: http://www.simonqueenborough.info/R/basic/missing-data
[^2]: Rubin, D. B. (1976). Inference and missing data. Biometrika 63(3): 581-592.
[^3]: D. Hawkins. Identification of Outliers, Chapman and Hall , 1980.
[^4]: https://www.springer.com/gp/book/9781461463955
[^5]: https://github.com/yzhao062/pyod
[^6]: https://docs.oracle.com/cd/E40248_01/epm.1112/cb_statistical/frameset.htm?ch07s02s10s01.html
[^7]: https://www.academia.edu/5324493/Detecting_outliers_Do_not_use_standard_deviation_around_the_mean_use_absolute_deviation_around_the_median
[^8]: https://www.purplemath.com/modules/boxwhisk3.htm
[^9]: http://documentation.statsoft.com/StatisticaHelp.aspx?path=WeightofEvidence/WeightofEvidenceWoEIntroductoryOverview
[^10]: A Preprocessing Scheme for High-Cardinality Categorical Attributes in Classification and Prediction Problems. https://kaggle2.blob.core.windows.net/forum-message-attachments/225952/7441/high%20cardinality%20categoricals.pdf
[^11]: https://www.aaai.org/Papers/AAAI/1992/AAAI92-019.pdf
[^12]: http://onlinestatbook.com/2/transformations/box-cox.html
[^13]: https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PowerTransformer.html#sklearn.preprocessing.PowerTransformer
[^14]: https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.QuantileTransformer.html#sklearn.preprocessing.QuantileTransformer
[^15]: https://machinelearningmastery.com/data-leakage-machine-learning/

204
README.md Normal file
View File

@@ -0,0 +1,204 @@
# Feature Engineering & Feature Selection
## About
A comprehensive [guide]() for **Feature Engineering** and **Feature Selection**, with implementations and examples in Python.
## What You'll Learn
Not only a collection of hands-on functions, but also explanation on **Why**, **How** and **When** to adopt **Which** techniques of feature engineering in data mining.
- the nature and risk of data problem we often encounter
- explanation of the various feature engineering & selection techniques
- rationale to use it
- pros & cons of each method
- code & example
## Getting Started
This repo is mainly used as a reference for anyone who are doing feature engineering, and most of the modules are implemented through scikit-learn or its communities.
To run the demos or use the customized function, please download the ZIP file from the repo or just copy-paste any part of the code you find helpful. They should all be very easy to understand.
**Required Dependencies**:
- Python 3.5, 3.6 or 3.7
- numpy>=1.15
- pandas>=0.23
- scipy>=1.1.0
- scikit_learn>=0.20.1
- seaborn>=0.9.0
## Table of Contents and Code Examples
Below is a list of methods currently implemented in the repo. The complete guide can be found [here]().
**1. Data Exploration**
1.1 Variables
1.2 Variable Identification
Check Data Types
1.3 Univariate Analysis
Descriptive Analysis
Discrete Variable Barplot
Discrete Variable Countplot
Discrete Variable Boxplot
Continuous Variable Distplot
1.4 Bi-variate Analysis
Scatter Plot
Correlation Plot
Heat Map
**2. Feature Cleaning**
2.1 Missing Values
Missing Value Check
Listwise Deletion
Mean/Median/Mode Imputation
End of distribution Imputation
Random Imputation
Arbitrary Value Imputation
Add a variable to denote NA
2.2 Outliers
Detect by Arbitrary Boundary
Detect by Mean & Standard Deviation
Detect by IQR
Detect by MAD
Mean/Median/Mode Imputation
Discretization
Imputation with Arbitrary Value
Windsorization
Discard Outliers
2.3 Rare Values
Mode Imputation
Grouping into One New Category
2.4 High Cardinality
Grouping Labels with Business Understanding
Grouping Labels with Rare Occurrence into One Category
Grouping Labels with Decision Tree
**3. Feature Engineering**
3.1 Feature Scaling
Normalization - Standardization
Min-Max Scaling
Robust Scaling
3.2 Discretize
Equal Width Binning
Equal Frequency Binning
K-means Binning
Discretization by Decision Trees
ChiMerge
3.3 Feature Encoding
One-hot Encoding
Ordinal-Encoding
Count/frequency Encoding
Mean Encoding
WOE Encoding
Target Encoding
3.4 Feature Transformation
Logarithmic Transformation
Reciprocal Transformation
Square Root Transformation
Exponential Transformation
Box-cox Transformation
Quantile Transformation
3.5 Feature Generation
Missing Data Derived
Simple Stats
Crossing
Ratio & Proportion
Cross Product
Polynomial
Feature Leanring by Tree
Feature Leanring by Deep Network
**4. Feature Selection**
4.1 Filter Method
Variance
Correlation
Chi-Square
Mutual Information Filter
Univariate ROC-AUC or MSE
Information Value (IV)
4.2 Wrapper Method
Forward Selection
Backward Elimination
Exhaustive Feature Selection
Genetic Algorithm
4.3 Embedded Method
Lasso (L1)
Random Forest Importance
Gradient Boosted Trees Importance
4.4 Feature Shuffling
Random Shuffling
4.5 Hybrid Method
Recursive Feature Selection
Recursive Feature Addition
## Motivation
Feature Engineering & Selection is the most essential part of building a useable machine learning project, even though hundreds of cutting-edge machine learning algorithms coming in these days like deep learning and transfer learning. Indeed, like what Prof Domingos, the author of  *'The Master Algorithm'* says:
> “At the end of the day, some machine learning projects succeed and some fail. What makes the difference? Easily the most important factor is the features used.”
>
> — Prof. Pedro Domingos
![001](./images/001.png)
Data and feature determine the upper limit of a ML project, while models and algorithms are just approaching that limit. However, few materials could be found that systematically introduce the art of feature engineering, and even fewer could explain the rationale behind. This repo aims at teaching you a good guide for Feature Engineering & Selection.
## Key Links and Resources
- Udemy's Feature Engineering online course
https://www.udemy.com/feature-engineering-for-machine-learning/
- Udemy's Feature Selection online course
https://www.udemy.com/feature-selection-for-machine-learning
- JMLR Special Issue on Variable and Feature Selection
http://jmlr.org/papers/special/feature03.html
- Data Analysis Using Regression and Multilevel/Hierarchical Models, Chapter 25: Missing data
http://www.stat.columbia.edu/~gelman/arm/missing.pdf
- Data mining and the impact of missing data
http://core.ecu.edu/omgt/krosj/IMDSDataMining2003.pdf
- PyOD: A Python Toolkit for Scalable Outlier Detection
https://github.com/yzhao062/pyod
- Weight of Evidence (WoE) Introductory Overview
http://documentation.statsoft.com/StatisticaHelp.aspx?path=WeightofEvidence/WeightofEvidenceWoEIntroductoryOverview
- About Feature Scaling and Normalization
http://sebastianraschka.com/Articles/2014_about_feature_scaling.html
- Feature Generation with RF, GBDT and Xgboost
https://blog.csdn.net/anshuai_aw1/article/details/82983997
- A review of feature selection methods with applications
https://ieeexplore.ieee.org/iel7/7153596/7160221/07160458.pdf

506
data/housing.data.txt Normal file
View File

@@ -0,0 +1,506 @@
0.00632 18.00 2.310 0 0.5380 6.5750 65.20 4.0900 1 296.0 15.30 396.90 4.98 24.00
0.02731 0.00 7.070 0 0.4690 6.4210 78.90 4.9671 2 242.0 17.80 396.90 9.14 21.60
0.02729 0.00 7.070 0 0.4690 7.1850 61.10 4.9671 2 242.0 17.80 392.83 4.03 34.70
0.03237 0.00 2.180 0 0.4580 6.9980 45.80 6.0622 3 222.0 18.70 394.63 2.94 33.40
0.06905 0.00 2.180 0 0.4580 7.1470 54.20 6.0622 3 222.0 18.70 396.90 5.33 36.20
0.02985 0.00 2.180 0 0.4580 6.4300 58.70 6.0622 3 222.0 18.70 394.12 5.21 28.70
0.08829 12.50 7.870 0 0.5240 6.0120 66.60 5.5605 5 311.0 15.20 395.60 12.43 22.90
0.14455 12.50 7.870 0 0.5240 6.1720 96.10 5.9505 5 311.0 15.20 396.90 19.15 27.10
0.21124 12.50 7.870 0 0.5240 5.6310 100.00 6.0821 5 311.0 15.20 386.63 29.93 16.50
0.17004 12.50 7.870 0 0.5240 6.0040 85.90 6.5921 5 311.0 15.20 386.71 17.10 18.90
0.22489 12.50 7.870 0 0.5240 6.3770 94.30 6.3467 5 311.0 15.20 392.52 20.45 15.00
0.11747 12.50 7.870 0 0.5240 6.0090 82.90 6.2267 5 311.0 15.20 396.90 13.27 18.90
0.09378 12.50 7.870 0 0.5240 5.8890 39.00 5.4509 5 311.0 15.20 390.50 15.71 21.70
0.62976 0.00 8.140 0 0.5380 5.9490 61.80 4.7075 4 307.0 21.00 396.90 8.26 20.40
0.63796 0.00 8.140 0 0.5380 6.0960 84.50 4.4619 4 307.0 21.00 380.02 10.26 18.20
0.62739 0.00 8.140 0 0.5380 5.8340 56.50 4.4986 4 307.0 21.00 395.62 8.47 19.90
1.05393 0.00 8.140 0 0.5380 5.9350 29.30 4.4986 4 307.0 21.00 386.85 6.58 23.10
0.78420 0.00 8.140 0 0.5380 5.9900 81.70 4.2579 4 307.0 21.00 386.75 14.67 17.50
0.80271 0.00 8.140 0 0.5380 5.4560 36.60 3.7965 4 307.0 21.00 288.99 11.69 20.20
0.72580 0.00 8.140 0 0.5380 5.7270 69.50 3.7965 4 307.0 21.00 390.95 11.28 18.20
1.25179 0.00 8.140 0 0.5380 5.5700 98.10 3.7979 4 307.0 21.00 376.57 21.02 13.60
0.85204 0.00 8.140 0 0.5380 5.9650 89.20 4.0123 4 307.0 21.00 392.53 13.83 19.60
1.23247 0.00 8.140 0 0.5380 6.1420 91.70 3.9769 4 307.0 21.00 396.90 18.72 15.20
0.98843 0.00 8.140 0 0.5380 5.8130 100.00 4.0952 4 307.0 21.00 394.54 19.88 14.50
0.75026 0.00 8.140 0 0.5380 5.9240 94.10 4.3996 4 307.0 21.00 394.33 16.30 15.60
0.84054 0.00 8.140 0 0.5380 5.5990 85.70 4.4546 4 307.0 21.00 303.42 16.51 13.90
0.67191 0.00 8.140 0 0.5380 5.8130 90.30 4.6820 4 307.0 21.00 376.88 14.81 16.60
0.95577 0.00 8.140 0 0.5380 6.0470 88.80 4.4534 4 307.0 21.00 306.38 17.28 14.80
0.77299 0.00 8.140 0 0.5380 6.4950 94.40 4.4547 4 307.0 21.00 387.94 12.80 18.40
1.00245 0.00 8.140 0 0.5380 6.6740 87.30 4.2390 4 307.0 21.00 380.23 11.98 21.00
1.13081 0.00 8.140 0 0.5380 5.7130 94.10 4.2330 4 307.0 21.00 360.17 22.60 12.70
1.35472 0.00 8.140 0 0.5380 6.0720 100.00 4.1750 4 307.0 21.00 376.73 13.04 14.50
1.38799 0.00 8.140 0 0.5380 5.9500 82.00 3.9900 4 307.0 21.00 232.60 27.71 13.20
1.15172 0.00 8.140 0 0.5380 5.7010 95.00 3.7872 4 307.0 21.00 358.77 18.35 13.10
1.61282 0.00 8.140 0 0.5380 6.0960 96.90 3.7598 4 307.0 21.00 248.31 20.34 13.50
0.06417 0.00 5.960 0 0.4990 5.9330 68.20 3.3603 5 279.0 19.20 396.90 9.68 18.90
0.09744 0.00 5.960 0 0.4990 5.8410 61.40 3.3779 5 279.0 19.20 377.56 11.41 20.00
0.08014 0.00 5.960 0 0.4990 5.8500 41.50 3.9342 5 279.0 19.20 396.90 8.77 21.00
0.17505 0.00 5.960 0 0.4990 5.9660 30.20 3.8473 5 279.0 19.20 393.43 10.13 24.70
0.02763 75.00 2.950 0 0.4280 6.5950 21.80 5.4011 3 252.0 18.30 395.63 4.32 30.80
0.03359 75.00 2.950 0 0.4280 7.0240 15.80 5.4011 3 252.0 18.30 395.62 1.98 34.90
0.12744 0.00 6.910 0 0.4480 6.7700 2.90 5.7209 3 233.0 17.90 385.41 4.84 26.60
0.14150 0.00 6.910 0 0.4480 6.1690 6.60 5.7209 3 233.0 17.90 383.37 5.81 25.30
0.15936 0.00 6.910 0 0.4480 6.2110 6.50 5.7209 3 233.0 17.90 394.46 7.44 24.70
0.12269 0.00 6.910 0 0.4480 6.0690 40.00 5.7209 3 233.0 17.90 389.39 9.55 21.20
0.17142 0.00 6.910 0 0.4480 5.6820 33.80 5.1004 3 233.0 17.90 396.90 10.21 19.30
0.18836 0.00 6.910 0 0.4480 5.7860 33.30 5.1004 3 233.0 17.90 396.90 14.15 20.00
0.22927 0.00 6.910 0 0.4480 6.0300 85.50 5.6894 3 233.0 17.90 392.74 18.80 16.60
0.25387 0.00 6.910 0 0.4480 5.3990 95.30 5.8700 3 233.0 17.90 396.90 30.81 14.40
0.21977 0.00 6.910 0 0.4480 5.6020 62.00 6.0877 3 233.0 17.90 396.90 16.20 19.40
0.08873 21.00 5.640 0 0.4390 5.9630 45.70 6.8147 4 243.0 16.80 395.56 13.45 19.70
0.04337 21.00 5.640 0 0.4390 6.1150 63.00 6.8147 4 243.0 16.80 393.97 9.43 20.50
0.05360 21.00 5.640 0 0.4390 6.5110 21.10 6.8147 4 243.0 16.80 396.90 5.28 25.00
0.04981 21.00 5.640 0 0.4390 5.9980 21.40 6.8147 4 243.0 16.80 396.90 8.43 23.40
0.01360 75.00 4.000 0 0.4100 5.8880 47.60 7.3197 3 469.0 21.10 396.90 14.80 18.90
0.01311 90.00 1.220 0 0.4030 7.2490 21.90 8.6966 5 226.0 17.90 395.93 4.81 35.40
0.02055 85.00 0.740 0 0.4100 6.3830 35.70 9.1876 2 313.0 17.30 396.90 5.77 24.70
0.01432 100.00 1.320 0 0.4110 6.8160 40.50 8.3248 5 256.0 15.10 392.90 3.95 31.60
0.15445 25.00 5.130 0 0.4530 6.1450 29.20 7.8148 8 284.0 19.70 390.68 6.86 23.30
0.10328 25.00 5.130 0 0.4530 5.9270 47.20 6.9320 8 284.0 19.70 396.90 9.22 19.60
0.14932 25.00 5.130 0 0.4530 5.7410 66.20 7.2254 8 284.0 19.70 395.11 13.15 18.70
0.17171 25.00 5.130 0 0.4530 5.9660 93.40 6.8185 8 284.0 19.70 378.08 14.44 16.00
0.11027 25.00 5.130 0 0.4530 6.4560 67.80 7.2255 8 284.0 19.70 396.90 6.73 22.20
0.12650 25.00 5.130 0 0.4530 6.7620 43.40 7.9809 8 284.0 19.70 395.58 9.50 25.00
0.01951 17.50 1.380 0 0.4161 7.1040 59.50 9.2229 3 216.0 18.60 393.24 8.05 33.00
0.03584 80.00 3.370 0 0.3980 6.2900 17.80 6.6115 4 337.0 16.10 396.90 4.67 23.50
0.04379 80.00 3.370 0 0.3980 5.7870 31.10 6.6115 4 337.0 16.10 396.90 10.24 19.40
0.05789 12.50 6.070 0 0.4090 5.8780 21.40 6.4980 4 345.0 18.90 396.21 8.10 22.00
0.13554 12.50 6.070 0 0.4090 5.5940 36.80 6.4980 4 345.0 18.90 396.90 13.09 17.40
0.12816 12.50 6.070 0 0.4090 5.8850 33.00 6.4980 4 345.0 18.90 396.90 8.79 20.90
0.08826 0.00 10.810 0 0.4130 6.4170 6.60 5.2873 4 305.0 19.20 383.73 6.72 24.20
0.15876 0.00 10.810 0 0.4130 5.9610 17.50 5.2873 4 305.0 19.20 376.94 9.88 21.70
0.09164 0.00 10.810 0 0.4130 6.0650 7.80 5.2873 4 305.0 19.20 390.91 5.52 22.80
0.19539 0.00 10.810 0 0.4130 6.2450 6.20 5.2873 4 305.0 19.20 377.17 7.54 23.40
0.07896 0.00 12.830 0 0.4370 6.2730 6.00 4.2515 5 398.0 18.70 394.92 6.78 24.10
0.09512 0.00 12.830 0 0.4370 6.2860 45.00 4.5026 5 398.0 18.70 383.23 8.94 21.40
0.10153 0.00 12.830 0 0.4370 6.2790 74.50 4.0522 5 398.0 18.70 373.66 11.97 20.00
0.08707 0.00 12.830 0 0.4370 6.1400 45.80 4.0905 5 398.0 18.70 386.96 10.27 20.80
0.05646 0.00 12.830 0 0.4370 6.2320 53.70 5.0141 5 398.0 18.70 386.40 12.34 21.20
0.08387 0.00 12.830 0 0.4370 5.8740 36.60 4.5026 5 398.0 18.70 396.06 9.10 20.30
0.04113 25.00 4.860 0 0.4260 6.7270 33.50 5.4007 4 281.0 19.00 396.90 5.29 28.00
0.04462 25.00 4.860 0 0.4260 6.6190 70.40 5.4007 4 281.0 19.00 395.63 7.22 23.90
0.03659 25.00 4.860 0 0.4260 6.3020 32.20 5.4007 4 281.0 19.00 396.90 6.72 24.80
0.03551 25.00 4.860 0 0.4260 6.1670 46.70 5.4007 4 281.0 19.00 390.64 7.51 22.90
0.05059 0.00 4.490 0 0.4490 6.3890 48.00 4.7794 3 247.0 18.50 396.90 9.62 23.90
0.05735 0.00 4.490 0 0.4490 6.6300 56.10 4.4377 3 247.0 18.50 392.30 6.53 26.60
0.05188 0.00 4.490 0 0.4490 6.0150 45.10 4.4272 3 247.0 18.50 395.99 12.86 22.50
0.07151 0.00 4.490 0 0.4490 6.1210 56.80 3.7476 3 247.0 18.50 395.15 8.44 22.20
0.05660 0.00 3.410 0 0.4890 7.0070 86.30 3.4217 2 270.0 17.80 396.90 5.50 23.60
0.05302 0.00 3.410 0 0.4890 7.0790 63.10 3.4145 2 270.0 17.80 396.06 5.70 28.70
0.04684 0.00 3.410 0 0.4890 6.4170 66.10 3.0923 2 270.0 17.80 392.18 8.81 22.60
0.03932 0.00 3.410 0 0.4890 6.4050 73.90 3.0921 2 270.0 17.80 393.55 8.20 22.00
0.04203 28.00 15.040 0 0.4640 6.4420 53.60 3.6659 4 270.0 18.20 395.01 8.16 22.90
0.02875 28.00 15.040 0 0.4640 6.2110 28.90 3.6659 4 270.0 18.20 396.33 6.21 25.00
0.04294 28.00 15.040 0 0.4640 6.2490 77.30 3.6150 4 270.0 18.20 396.90 10.59 20.60
0.12204 0.00 2.890 0 0.4450 6.6250 57.80 3.4952 2 276.0 18.00 357.98 6.65 28.40
0.11504 0.00 2.890 0 0.4450 6.1630 69.60 3.4952 2 276.0 18.00 391.83 11.34 21.40
0.12083 0.00 2.890 0 0.4450 8.0690 76.00 3.4952 2 276.0 18.00 396.90 4.21 38.70
0.08187 0.00 2.890 0 0.4450 7.8200 36.90 3.4952 2 276.0 18.00 393.53 3.57 43.80
0.06860 0.00 2.890 0 0.4450 7.4160 62.50 3.4952 2 276.0 18.00 396.90 6.19 33.20
0.14866 0.00 8.560 0 0.5200 6.7270 79.90 2.7778 5 384.0 20.90 394.76 9.42 27.50
0.11432 0.00 8.560 0 0.5200 6.7810 71.30 2.8561 5 384.0 20.90 395.58 7.67 26.50
0.22876 0.00 8.560 0 0.5200 6.4050 85.40 2.7147 5 384.0 20.90 70.80 10.63 18.60
0.21161 0.00 8.560 0 0.5200 6.1370 87.40 2.7147 5 384.0 20.90 394.47 13.44 19.30
0.13960 0.00 8.560 0 0.5200 6.1670 90.00 2.4210 5 384.0 20.90 392.69 12.33 20.10
0.13262 0.00 8.560 0 0.5200 5.8510 96.70 2.1069 5 384.0 20.90 394.05 16.47 19.50
0.17120 0.00 8.560 0 0.5200 5.8360 91.90 2.2110 5 384.0 20.90 395.67 18.66 19.50
0.13117 0.00 8.560 0 0.5200 6.1270 85.20 2.1224 5 384.0 20.90 387.69 14.09 20.40
0.12802 0.00 8.560 0 0.5200 6.4740 97.10 2.4329 5 384.0 20.90 395.24 12.27 19.80
0.26363 0.00 8.560 0 0.5200 6.2290 91.20 2.5451 5 384.0 20.90 391.23 15.55 19.40
0.10793 0.00 8.560 0 0.5200 6.1950 54.40 2.7778 5 384.0 20.90 393.49 13.00 21.70
0.10084 0.00 10.010 0 0.5470 6.7150 81.60 2.6775 6 432.0 17.80 395.59 10.16 22.80
0.12329 0.00 10.010 0 0.5470 5.9130 92.90 2.3534 6 432.0 17.80 394.95 16.21 18.80
0.22212 0.00 10.010 0 0.5470 6.0920 95.40 2.5480 6 432.0 17.80 396.90 17.09 18.70
0.14231 0.00 10.010 0 0.5470 6.2540 84.20 2.2565 6 432.0 17.80 388.74 10.45 18.50
0.17134 0.00 10.010 0 0.5470 5.9280 88.20 2.4631 6 432.0 17.80 344.91 15.76 18.30
0.13158 0.00 10.010 0 0.5470 6.1760 72.50 2.7301 6 432.0 17.80 393.30 12.04 21.20
0.15098 0.00 10.010 0 0.5470 6.0210 82.60 2.7474 6 432.0 17.80 394.51 10.30 19.20
0.13058 0.00 10.010 0 0.5470 5.8720 73.10 2.4775 6 432.0 17.80 338.63 15.37 20.40
0.14476 0.00 10.010 0 0.5470 5.7310 65.20 2.7592 6 432.0 17.80 391.50 13.61 19.30
0.06899 0.00 25.650 0 0.5810 5.8700 69.70 2.2577 2 188.0 19.10 389.15 14.37 22.00
0.07165 0.00 25.650 0 0.5810 6.0040 84.10 2.1974 2 188.0 19.10 377.67 14.27 20.30
0.09299 0.00 25.650 0 0.5810 5.9610 92.90 2.0869 2 188.0 19.10 378.09 17.93 20.50
0.15038 0.00 25.650 0 0.5810 5.8560 97.00 1.9444 2 188.0 19.10 370.31 25.41 17.30
0.09849 0.00 25.650 0 0.5810 5.8790 95.80 2.0063 2 188.0 19.10 379.38 17.58 18.80
0.16902 0.00 25.650 0 0.5810 5.9860 88.40 1.9929 2 188.0 19.10 385.02 14.81 21.40
0.38735 0.00 25.650 0 0.5810 5.6130 95.60 1.7572 2 188.0 19.10 359.29 27.26 15.70
0.25915 0.00 21.890 0 0.6240 5.6930 96.00 1.7883 4 437.0 21.20 392.11 17.19 16.20
0.32543 0.00 21.890 0 0.6240 6.4310 98.80 1.8125 4 437.0 21.20 396.90 15.39 18.00
0.88125 0.00 21.890 0 0.6240 5.6370 94.70 1.9799 4 437.0 21.20 396.90 18.34 14.30
0.34006 0.00 21.890 0 0.6240 6.4580 98.90 2.1185 4 437.0 21.20 395.04 12.60 19.20
1.19294 0.00 21.890 0 0.6240 6.3260 97.70 2.2710 4 437.0 21.20 396.90 12.26 19.60
0.59005 0.00 21.890 0 0.6240 6.3720 97.90 2.3274 4 437.0 21.20 385.76 11.12 23.00
0.32982 0.00 21.890 0 0.6240 5.8220 95.40 2.4699 4 437.0 21.20 388.69 15.03 18.40
0.97617 0.00 21.890 0 0.6240 5.7570 98.40 2.3460 4 437.0 21.20 262.76 17.31 15.60
0.55778 0.00 21.890 0 0.6240 6.3350 98.20 2.1107 4 437.0 21.20 394.67 16.96 18.10
0.32264 0.00 21.890 0 0.6240 5.9420 93.50 1.9669 4 437.0 21.20 378.25 16.90 17.40
0.35233 0.00 21.890 0 0.6240 6.4540 98.40 1.8498 4 437.0 21.20 394.08 14.59 17.10
0.24980 0.00 21.890 0 0.6240 5.8570 98.20 1.6686 4 437.0 21.20 392.04 21.32 13.30
0.54452 0.00 21.890 0 0.6240 6.1510 97.90 1.6687 4 437.0 21.20 396.90 18.46 17.80
0.29090 0.00 21.890 0 0.6240 6.1740 93.60 1.6119 4 437.0 21.20 388.08 24.16 14.00
1.62864 0.00 21.890 0 0.6240 5.0190 100.00 1.4394 4 437.0 21.20 396.90 34.41 14.40
3.32105 0.00 19.580 1 0.8710 5.4030 100.00 1.3216 5 403.0 14.70 396.90 26.82 13.40
4.09740 0.00 19.580 0 0.8710 5.4680 100.00 1.4118 5 403.0 14.70 396.90 26.42 15.60
2.77974 0.00 19.580 0 0.8710 4.9030 97.80 1.3459 5 403.0 14.70 396.90 29.29 11.80
2.37934 0.00 19.580 0 0.8710 6.1300 100.00 1.4191 5 403.0 14.70 172.91 27.80 13.80
2.15505 0.00 19.580 0 0.8710 5.6280 100.00 1.5166 5 403.0 14.70 169.27 16.65 15.60
2.36862 0.00 19.580 0 0.8710 4.9260 95.70 1.4608 5 403.0 14.70 391.71 29.53 14.60
2.33099 0.00 19.580 0 0.8710 5.1860 93.80 1.5296 5 403.0 14.70 356.99 28.32 17.80
2.73397 0.00 19.580 0 0.8710 5.5970 94.90 1.5257 5 403.0 14.70 351.85 21.45 15.40
1.65660 0.00 19.580 0 0.8710 6.1220 97.30 1.6180 5 403.0 14.70 372.80 14.10 21.50
1.49632 0.00 19.580 0 0.8710 5.4040 100.00 1.5916 5 403.0 14.70 341.60 13.28 19.60
1.12658 0.00 19.580 1 0.8710 5.0120 88.00 1.6102 5 403.0 14.70 343.28 12.12 15.30
2.14918 0.00 19.580 0 0.8710 5.7090 98.50 1.6232 5 403.0 14.70 261.95 15.79 19.40
1.41385 0.00 19.580 1 0.8710 6.1290 96.00 1.7494 5 403.0 14.70 321.02 15.12 17.00
3.53501 0.00 19.580 1 0.8710 6.1520 82.60 1.7455 5 403.0 14.70 88.01 15.02 15.60
2.44668 0.00 19.580 0 0.8710 5.2720 94.00 1.7364 5 403.0 14.70 88.63 16.14 13.10
1.22358 0.00 19.580 0 0.6050 6.9430 97.40 1.8773 5 403.0 14.70 363.43 4.59 41.30
1.34284 0.00 19.580 0 0.6050 6.0660 100.00 1.7573 5 403.0 14.70 353.89 6.43 24.30
1.42502 0.00 19.580 0 0.8710 6.5100 100.00 1.7659 5 403.0 14.70 364.31 7.39 23.30
1.27346 0.00 19.580 1 0.6050 6.2500 92.60 1.7984 5 403.0 14.70 338.92 5.50 27.00
1.46336 0.00 19.580 0 0.6050 7.4890 90.80 1.9709 5 403.0 14.70 374.43 1.73 50.00
1.83377 0.00 19.580 1 0.6050 7.8020 98.20 2.0407 5 403.0 14.70 389.61 1.92 50.00
1.51902 0.00 19.580 1 0.6050 8.3750 93.90 2.1620 5 403.0 14.70 388.45 3.32 50.00
2.24236 0.00 19.580 0 0.6050 5.8540 91.80 2.4220 5 403.0 14.70 395.11 11.64 22.70
2.92400 0.00 19.580 0 0.6050 6.1010 93.00 2.2834 5 403.0 14.70 240.16 9.81 25.00
2.01019 0.00 19.580 0 0.6050 7.9290 96.20 2.0459 5 403.0 14.70 369.30 3.70 50.00
1.80028 0.00 19.580 0 0.6050 5.8770 79.20 2.4259 5 403.0 14.70 227.61 12.14 23.80
2.30040 0.00 19.580 0 0.6050 6.3190 96.10 2.1000 5 403.0 14.70 297.09 11.10 23.80
2.44953 0.00 19.580 0 0.6050 6.4020 95.20 2.2625 5 403.0 14.70 330.04 11.32 22.30
1.20742 0.00 19.580 0 0.6050 5.8750 94.60 2.4259 5 403.0 14.70 292.29 14.43 17.40
2.31390 0.00 19.580 0 0.6050 5.8800 97.30 2.3887 5 403.0 14.70 348.13 12.03 19.10
0.13914 0.00 4.050 0 0.5100 5.5720 88.50 2.5961 5 296.0 16.60 396.90 14.69 23.10
0.09178 0.00 4.050 0 0.5100 6.4160 84.10 2.6463 5 296.0 16.60 395.50 9.04 23.60
0.08447 0.00 4.050 0 0.5100 5.8590 68.70 2.7019 5 296.0 16.60 393.23 9.64 22.60
0.06664 0.00 4.050 0 0.5100 6.5460 33.10 3.1323 5 296.0 16.60 390.96 5.33 29.40
0.07022 0.00 4.050 0 0.5100 6.0200 47.20 3.5549 5 296.0 16.60 393.23 10.11 23.20
0.05425 0.00 4.050 0 0.5100 6.3150 73.40 3.3175 5 296.0 16.60 395.60 6.29 24.60
0.06642 0.00 4.050 0 0.5100 6.8600 74.40 2.9153 5 296.0 16.60 391.27 6.92 29.90
0.05780 0.00 2.460 0 0.4880 6.9800 58.40 2.8290 3 193.0 17.80 396.90 5.04 37.20
0.06588 0.00 2.460 0 0.4880 7.7650 83.30 2.7410 3 193.0 17.80 395.56 7.56 39.80
0.06888 0.00 2.460 0 0.4880 6.1440 62.20 2.5979 3 193.0 17.80 396.90 9.45 36.20
0.09103 0.00 2.460 0 0.4880 7.1550 92.20 2.7006 3 193.0 17.80 394.12 4.82 37.90
0.10008 0.00 2.460 0 0.4880 6.5630 95.60 2.8470 3 193.0 17.80 396.90 5.68 32.50
0.08308 0.00 2.460 0 0.4880 5.6040 89.80 2.9879 3 193.0 17.80 391.00 13.98 26.40
0.06047 0.00 2.460 0 0.4880 6.1530 68.80 3.2797 3 193.0 17.80 387.11 13.15 29.60
0.05602 0.00 2.460 0 0.4880 7.8310 53.60 3.1992 3 193.0 17.80 392.63 4.45 50.00
0.07875 45.00 3.440 0 0.4370 6.7820 41.10 3.7886 5 398.0 15.20 393.87 6.68 32.00
0.12579 45.00 3.440 0 0.4370 6.5560 29.10 4.5667 5 398.0 15.20 382.84 4.56 29.80
0.08370 45.00 3.440 0 0.4370 7.1850 38.90 4.5667 5 398.0 15.20 396.90 5.39 34.90
0.09068 45.00 3.440 0 0.4370 6.9510 21.50 6.4798 5 398.0 15.20 377.68 5.10 37.00
0.06911 45.00 3.440 0 0.4370 6.7390 30.80 6.4798 5 398.0 15.20 389.71 4.69 30.50
0.08664 45.00 3.440 0 0.4370 7.1780 26.30 6.4798 5 398.0 15.20 390.49 2.87 36.40
0.02187 60.00 2.930 0 0.4010 6.8000 9.90 6.2196 1 265.0 15.60 393.37 5.03 31.10
0.01439 60.00 2.930 0 0.4010 6.6040 18.80 6.2196 1 265.0 15.60 376.70 4.38 29.10
0.01381 80.00 0.460 0 0.4220 7.8750 32.00 5.6484 4 255.0 14.40 394.23 2.97 50.00
0.04011 80.00 1.520 0 0.4040 7.2870 34.10 7.3090 2 329.0 12.60 396.90 4.08 33.30
0.04666 80.00 1.520 0 0.4040 7.1070 36.60 7.3090 2 329.0 12.60 354.31 8.61 30.30
0.03768 80.00 1.520 0 0.4040 7.2740 38.30 7.3090 2 329.0 12.60 392.20 6.62 34.60
0.03150 95.00 1.470 0 0.4030 6.9750 15.30 7.6534 3 402.0 17.00 396.90 4.56 34.90
0.01778 95.00 1.470 0 0.4030 7.1350 13.90 7.6534 3 402.0 17.00 384.30 4.45 32.90
0.03445 82.50 2.030 0 0.4150 6.1620 38.40 6.2700 2 348.0 14.70 393.77 7.43 24.10
0.02177 82.50 2.030 0 0.4150 7.6100 15.70 6.2700 2 348.0 14.70 395.38 3.11 42.30
0.03510 95.00 2.680 0 0.4161 7.8530 33.20 5.1180 4 224.0 14.70 392.78 3.81 48.50
0.02009 95.00 2.680 0 0.4161 8.0340 31.90 5.1180 4 224.0 14.70 390.55 2.88 50.00
0.13642 0.00 10.590 0 0.4890 5.8910 22.30 3.9454 4 277.0 18.60 396.90 10.87 22.60
0.22969 0.00 10.590 0 0.4890 6.3260 52.50 4.3549 4 277.0 18.60 394.87 10.97 24.40
0.25199 0.00 10.590 0 0.4890 5.7830 72.70 4.3549 4 277.0 18.60 389.43 18.06 22.50
0.13587 0.00 10.590 1 0.4890 6.0640 59.10 4.2392 4 277.0 18.60 381.32 14.66 24.40
0.43571 0.00 10.590 1 0.4890 5.3440 100.00 3.8750 4 277.0 18.60 396.90 23.09 20.00
0.17446 0.00 10.590 1 0.4890 5.9600 92.10 3.8771 4 277.0 18.60 393.25 17.27 21.70
0.37578 0.00 10.590 1 0.4890 5.4040 88.60 3.6650 4 277.0 18.60 395.24 23.98 19.30
0.21719 0.00 10.590 1 0.4890 5.8070 53.80 3.6526 4 277.0 18.60 390.94 16.03 22.40
0.14052 0.00 10.590 0 0.4890 6.3750 32.30 3.9454 4 277.0 18.60 385.81 9.38 28.10
0.28955 0.00 10.590 0 0.4890 5.4120 9.80 3.5875 4 277.0 18.60 348.93 29.55 23.70
0.19802 0.00 10.590 0 0.4890 6.1820 42.40 3.9454 4 277.0 18.60 393.63 9.47 25.00
0.04560 0.00 13.890 1 0.5500 5.8880 56.00 3.1121 5 276.0 16.40 392.80 13.51 23.30
0.07013 0.00 13.890 0 0.5500 6.6420 85.10 3.4211 5 276.0 16.40 392.78 9.69 28.70
0.11069 0.00 13.890 1 0.5500 5.9510 93.80 2.8893 5 276.0 16.40 396.90 17.92 21.50
0.11425 0.00 13.890 1 0.5500 6.3730 92.40 3.3633 5 276.0 16.40 393.74 10.50 23.00
0.35809 0.00 6.200 1 0.5070 6.9510 88.50 2.8617 8 307.0 17.40 391.70 9.71 26.70
0.40771 0.00 6.200 1 0.5070 6.1640 91.30 3.0480 8 307.0 17.40 395.24 21.46 21.70
0.62356 0.00 6.200 1 0.5070 6.8790 77.70 3.2721 8 307.0 17.40 390.39 9.93 27.50
0.61470 0.00 6.200 0 0.5070 6.6180 80.80 3.2721 8 307.0 17.40 396.90 7.60 30.10
0.31533 0.00 6.200 0 0.5040 8.2660 78.30 2.8944 8 307.0 17.40 385.05 4.14 44.80
0.52693 0.00 6.200 0 0.5040 8.7250 83.00 2.8944 8 307.0 17.40 382.00 4.63 50.00
0.38214 0.00 6.200 0 0.5040 8.0400 86.50 3.2157 8 307.0 17.40 387.38 3.13 37.60
0.41238 0.00 6.200 0 0.5040 7.1630 79.90 3.2157 8 307.0 17.40 372.08 6.36 31.60
0.29819 0.00 6.200 0 0.5040 7.6860 17.00 3.3751 8 307.0 17.40 377.51 3.92 46.70
0.44178 0.00 6.200 0 0.5040 6.5520 21.40 3.3751 8 307.0 17.40 380.34 3.76 31.50
0.53700 0.00 6.200 0 0.5040 5.9810 68.10 3.6715 8 307.0 17.40 378.35 11.65 24.30
0.46296 0.00 6.200 0 0.5040 7.4120 76.90 3.6715 8 307.0 17.40 376.14 5.25 31.70
0.57529 0.00 6.200 0 0.5070 8.3370 73.30 3.8384 8 307.0 17.40 385.91 2.47 41.70
0.33147 0.00 6.200 0 0.5070 8.2470 70.40 3.6519 8 307.0 17.40 378.95 3.95 48.30
0.44791 0.00 6.200 1 0.5070 6.7260 66.50 3.6519 8 307.0 17.40 360.20 8.05 29.00
0.33045 0.00 6.200 0 0.5070 6.0860 61.50 3.6519 8 307.0 17.40 376.75 10.88 24.00
0.52058 0.00 6.200 1 0.5070 6.6310 76.50 4.1480 8 307.0 17.40 388.45 9.54 25.10
0.51183 0.00 6.200 0 0.5070 7.3580 71.60 4.1480 8 307.0 17.40 390.07 4.73 31.50
0.08244 30.00 4.930 0 0.4280 6.4810 18.50 6.1899 6 300.0 16.60 379.41 6.36 23.70
0.09252 30.00 4.930 0 0.4280 6.6060 42.20 6.1899 6 300.0 16.60 383.78 7.37 23.30
0.11329 30.00 4.930 0 0.4280 6.8970 54.30 6.3361 6 300.0 16.60 391.25 11.38 22.00
0.10612 30.00 4.930 0 0.4280 6.0950 65.10 6.3361 6 300.0 16.60 394.62 12.40 20.10
0.10290 30.00 4.930 0 0.4280 6.3580 52.90 7.0355 6 300.0 16.60 372.75 11.22 22.20
0.12757 30.00 4.930 0 0.4280 6.3930 7.80 7.0355 6 300.0 16.60 374.71 5.19 23.70
0.20608 22.00 5.860 0 0.4310 5.5930 76.50 7.9549 7 330.0 19.10 372.49 12.50 17.60
0.19133 22.00 5.860 0 0.4310 5.6050 70.20 7.9549 7 330.0 19.10 389.13 18.46 18.50
0.33983 22.00 5.860 0 0.4310 6.1080 34.90 8.0555 7 330.0 19.10 390.18 9.16 24.30
0.19657 22.00 5.860 0 0.4310 6.2260 79.20 8.0555 7 330.0 19.10 376.14 10.15 20.50
0.16439 22.00 5.860 0 0.4310 6.4330 49.10 7.8265 7 330.0 19.10 374.71 9.52 24.50
0.19073 22.00 5.860 0 0.4310 6.7180 17.50 7.8265 7 330.0 19.10 393.74 6.56 26.20
0.14030 22.00 5.860 0 0.4310 6.4870 13.00 7.3967 7 330.0 19.10 396.28 5.90 24.40
0.21409 22.00 5.860 0 0.4310 6.4380 8.90 7.3967 7 330.0 19.10 377.07 3.59 24.80
0.08221 22.00 5.860 0 0.4310 6.9570 6.80 8.9067 7 330.0 19.10 386.09 3.53 29.60
0.36894 22.00 5.860 0 0.4310 8.2590 8.40 8.9067 7 330.0 19.10 396.90 3.54 42.80
0.04819 80.00 3.640 0 0.3920 6.1080 32.00 9.2203 1 315.0 16.40 392.89 6.57 21.90
0.03548 80.00 3.640 0 0.3920 5.8760 19.10 9.2203 1 315.0 16.40 395.18 9.25 20.90
0.01538 90.00 3.750 0 0.3940 7.4540 34.20 6.3361 3 244.0 15.90 386.34 3.11 44.00
0.61154 20.00 3.970 0 0.6470 8.7040 86.90 1.8010 5 264.0 13.00 389.70 5.12 50.00
0.66351 20.00 3.970 0 0.6470 7.3330 100.00 1.8946 5 264.0 13.00 383.29 7.79 36.00
0.65665 20.00 3.970 0 0.6470 6.8420 100.00 2.0107 5 264.0 13.00 391.93 6.90 30.10
0.54011 20.00 3.970 0 0.6470 7.2030 81.80 2.1121 5 264.0 13.00 392.80 9.59 33.80
0.53412 20.00 3.970 0 0.6470 7.5200 89.40 2.1398 5 264.0 13.00 388.37 7.26 43.10
0.52014 20.00 3.970 0 0.6470 8.3980 91.50 2.2885 5 264.0 13.00 386.86 5.91 48.80
0.82526 20.00 3.970 0 0.6470 7.3270 94.50 2.0788 5 264.0 13.00 393.42 11.25 31.00
0.55007 20.00 3.970 0 0.6470 7.2060 91.60 1.9301 5 264.0 13.00 387.89 8.10 36.50
0.76162 20.00 3.970 0 0.6470 5.5600 62.80 1.9865 5 264.0 13.00 392.40 10.45 22.80
0.78570 20.00 3.970 0 0.6470 7.0140 84.60 2.1329 5 264.0 13.00 384.07 14.79 30.70
0.57834 20.00 3.970 0 0.5750 8.2970 67.00 2.4216 5 264.0 13.00 384.54 7.44 50.00
0.54050 20.00 3.970 0 0.5750 7.4700 52.60 2.8720 5 264.0 13.00 390.30 3.16 43.50
0.09065 20.00 6.960 1 0.4640 5.9200 61.50 3.9175 3 223.0 18.60 391.34 13.65 20.70
0.29916 20.00 6.960 0 0.4640 5.8560 42.10 4.4290 3 223.0 18.60 388.65 13.00 21.10
0.16211 20.00 6.960 0 0.4640 6.2400 16.30 4.4290 3 223.0 18.60 396.90 6.59 25.20
0.11460 20.00 6.960 0 0.4640 6.5380 58.70 3.9175 3 223.0 18.60 394.96 7.73 24.40
0.22188 20.00 6.960 1 0.4640 7.6910 51.80 4.3665 3 223.0 18.60 390.77 6.58 35.20
0.05644 40.00 6.410 1 0.4470 6.7580 32.90 4.0776 4 254.0 17.60 396.90 3.53 32.40
0.09604 40.00 6.410 0 0.4470 6.8540 42.80 4.2673 4 254.0 17.60 396.90 2.98 32.00
0.10469 40.00 6.410 1 0.4470 7.2670 49.00 4.7872 4 254.0 17.60 389.25 6.05 33.20
0.06127 40.00 6.410 1 0.4470 6.8260 27.60 4.8628 4 254.0 17.60 393.45 4.16 33.10
0.07978 40.00 6.410 0 0.4470 6.4820 32.10 4.1403 4 254.0 17.60 396.90 7.19 29.10
0.21038 20.00 3.330 0 0.4429 6.8120 32.20 4.1007 5 216.0 14.90 396.90 4.85 35.10
0.03578 20.00 3.330 0 0.4429 7.8200 64.50 4.6947 5 216.0 14.90 387.31 3.76 45.40
0.03705 20.00 3.330 0 0.4429 6.9680 37.20 5.2447 5 216.0 14.90 392.23 4.59 35.40
0.06129 20.00 3.330 1 0.4429 7.6450 49.70 5.2119 5 216.0 14.90 377.07 3.01 46.00
0.01501 90.00 1.210 1 0.4010 7.9230 24.80 5.8850 1 198.0 13.60 395.52 3.16 50.00
0.00906 90.00 2.970 0 0.4000 7.0880 20.80 7.3073 1 285.0 15.30 394.72 7.85 32.20
0.01096 55.00 2.250 0 0.3890 6.4530 31.90 7.3073 1 300.0 15.30 394.72 8.23 22.00
0.01965 80.00 1.760 0 0.3850 6.2300 31.50 9.0892 1 241.0 18.20 341.60 12.93 20.10
0.03871 52.50 5.320 0 0.4050 6.2090 31.30 7.3172 6 293.0 16.60 396.90 7.14 23.20
0.04590 52.50 5.320 0 0.4050 6.3150 45.60 7.3172 6 293.0 16.60 396.90 7.60 22.30
0.04297 52.50 5.320 0 0.4050 6.5650 22.90 7.3172 6 293.0 16.60 371.72 9.51 24.80
0.03502 80.00 4.950 0 0.4110 6.8610 27.90 5.1167 4 245.0 19.20 396.90 3.33 28.50
0.07886 80.00 4.950 0 0.4110 7.1480 27.70 5.1167 4 245.0 19.20 396.90 3.56 37.30
0.03615 80.00 4.950 0 0.4110 6.6300 23.40 5.1167 4 245.0 19.20 396.90 4.70 27.90
0.08265 0.00 13.920 0 0.4370 6.1270 18.40 5.5027 4 289.0 16.00 396.90 8.58 23.90
0.08199 0.00 13.920 0 0.4370 6.0090 42.30 5.5027 4 289.0 16.00 396.90 10.40 21.70
0.12932 0.00 13.920 0 0.4370 6.6780 31.10 5.9604 4 289.0 16.00 396.90 6.27 28.60
0.05372 0.00 13.920 0 0.4370 6.5490 51.00 5.9604 4 289.0 16.00 392.85 7.39 27.10
0.14103 0.00 13.920 0 0.4370 5.7900 58.00 6.3200 4 289.0 16.00 396.90 15.84 20.30
0.06466 70.00 2.240 0 0.4000 6.3450 20.10 7.8278 5 358.0 14.80 368.24 4.97 22.50
0.05561 70.00 2.240 0 0.4000 7.0410 10.00 7.8278 5 358.0 14.80 371.58 4.74 29.00
0.04417 70.00 2.240 0 0.4000 6.8710 47.40 7.8278 5 358.0 14.80 390.86 6.07 24.80
0.03537 34.00 6.090 0 0.4330 6.5900 40.40 5.4917 7 329.0 16.10 395.75 9.50 22.00
0.09266 34.00 6.090 0 0.4330 6.4950 18.40 5.4917 7 329.0 16.10 383.61 8.67 26.40
0.10000 34.00 6.090 0 0.4330 6.9820 17.70 5.4917 7 329.0 16.10 390.43 4.86 33.10
0.05515 33.00 2.180 0 0.4720 7.2360 41.10 4.0220 7 222.0 18.40 393.68 6.93 36.10
0.05479 33.00 2.180 0 0.4720 6.6160 58.10 3.3700 7 222.0 18.40 393.36 8.93 28.40
0.07503 33.00 2.180 0 0.4720 7.4200 71.90 3.0992 7 222.0 18.40 396.90 6.47 33.40
0.04932 33.00 2.180 0 0.4720 6.8490 70.30 3.1827 7 222.0 18.40 396.90 7.53 28.20
0.49298 0.00 9.900 0 0.5440 6.6350 82.50 3.3175 4 304.0 18.40 396.90 4.54 22.80
0.34940 0.00 9.900 0 0.5440 5.9720 76.70 3.1025 4 304.0 18.40 396.24 9.97 20.30
2.63548 0.00 9.900 0 0.5440 4.9730 37.80 2.5194 4 304.0 18.40 350.45 12.64 16.10
0.79041 0.00 9.900 0 0.5440 6.1220 52.80 2.6403 4 304.0 18.40 396.90 5.98 22.10
0.26169 0.00 9.900 0 0.5440 6.0230 90.40 2.8340 4 304.0 18.40 396.30 11.72 19.40
0.26938 0.00 9.900 0 0.5440 6.2660 82.80 3.2628 4 304.0 18.40 393.39 7.90 21.60
0.36920 0.00 9.900 0 0.5440 6.5670 87.30 3.6023 4 304.0 18.40 395.69 9.28 23.80
0.25356 0.00 9.900 0 0.5440 5.7050 77.70 3.9450 4 304.0 18.40 396.42 11.50 16.20
0.31827 0.00 9.900 0 0.5440 5.9140 83.20 3.9986 4 304.0 18.40 390.70 18.33 17.80
0.24522 0.00 9.900 0 0.5440 5.7820 71.70 4.0317 4 304.0 18.40 396.90 15.94 19.80
0.40202 0.00 9.900 0 0.5440 6.3820 67.20 3.5325 4 304.0 18.40 395.21 10.36 23.10
0.47547 0.00 9.900 0 0.5440 6.1130 58.80 4.0019 4 304.0 18.40 396.23 12.73 21.00
0.16760 0.00 7.380 0 0.4930 6.4260 52.30 4.5404 5 287.0 19.60 396.90 7.20 23.80
0.18159 0.00 7.380 0 0.4930 6.3760 54.30 4.5404 5 287.0 19.60 396.90 6.87 23.10
0.35114 0.00 7.380 0 0.4930 6.0410 49.90 4.7211 5 287.0 19.60 396.90 7.70 20.40
0.28392 0.00 7.380 0 0.4930 5.7080 74.30 4.7211 5 287.0 19.60 391.13 11.74 18.50
0.34109 0.00 7.380 0 0.4930 6.4150 40.10 4.7211 5 287.0 19.60 396.90 6.12 25.00
0.19186 0.00 7.380 0 0.4930 6.4310 14.70 5.4159 5 287.0 19.60 393.68 5.08 24.60
0.30347 0.00 7.380 0 0.4930 6.3120 28.90 5.4159 5 287.0 19.60 396.90 6.15 23.00
0.24103 0.00 7.380 0 0.4930 6.0830 43.70 5.4159 5 287.0 19.60 396.90 12.79 22.20
0.06617 0.00 3.240 0 0.4600 5.8680 25.80 5.2146 4 430.0 16.90 382.44 9.97 19.30
0.06724 0.00 3.240 0 0.4600 6.3330 17.20 5.2146 4 430.0 16.90 375.21 7.34 22.60
0.04544 0.00 3.240 0 0.4600 6.1440 32.20 5.8736 4 430.0 16.90 368.57 9.09 19.80
0.05023 35.00 6.060 0 0.4379 5.7060 28.40 6.6407 1 304.0 16.90 394.02 12.43 17.10
0.03466 35.00 6.060 0 0.4379 6.0310 23.30 6.6407 1 304.0 16.90 362.25 7.83 19.40
0.05083 0.00 5.190 0 0.5150 6.3160 38.10 6.4584 5 224.0 20.20 389.71 5.68 22.20
0.03738 0.00 5.190 0 0.5150 6.3100 38.50 6.4584 5 224.0 20.20 389.40 6.75 20.70
0.03961 0.00 5.190 0 0.5150 6.0370 34.50 5.9853 5 224.0 20.20 396.90 8.01 21.10
0.03427 0.00 5.190 0 0.5150 5.8690 46.30 5.2311 5 224.0 20.20 396.90 9.80 19.50
0.03041 0.00 5.190 0 0.5150 5.8950 59.60 5.6150 5 224.0 20.20 394.81 10.56 18.50
0.03306 0.00 5.190 0 0.5150 6.0590 37.30 4.8122 5 224.0 20.20 396.14 8.51 20.60
0.05497 0.00 5.190 0 0.5150 5.9850 45.40 4.8122 5 224.0 20.20 396.90 9.74 19.00
0.06151 0.00 5.190 0 0.5150 5.9680 58.50 4.8122 5 224.0 20.20 396.90 9.29 18.70
0.01301 35.00 1.520 0 0.4420 7.2410 49.30 7.0379 1 284.0 15.50 394.74 5.49 32.70
0.02498 0.00 1.890 0 0.5180 6.5400 59.70 6.2669 1 422.0 15.90 389.96 8.65 16.50
0.02543 55.00 3.780 0 0.4840 6.6960 56.40 5.7321 5 370.0 17.60 396.90 7.18 23.90
0.03049 55.00 3.780 0 0.4840 6.8740 28.10 6.4654 5 370.0 17.60 387.97 4.61 31.20
0.03113 0.00 4.390 0 0.4420 6.0140 48.50 8.0136 3 352.0 18.80 385.64 10.53 17.50
0.06162 0.00 4.390 0 0.4420 5.8980 52.30 8.0136 3 352.0 18.80 364.61 12.67 17.20
0.01870 85.00 4.150 0 0.4290 6.5160 27.70 8.5353 4 351.0 17.90 392.43 6.36 23.10
0.01501 80.00 2.010 0 0.4350 6.6350 29.70 8.3440 4 280.0 17.00 390.94 5.99 24.50
0.02899 40.00 1.250 0 0.4290 6.9390 34.50 8.7921 1 335.0 19.70 389.85 5.89 26.60
0.06211 40.00 1.250 0 0.4290 6.4900 44.40 8.7921 1 335.0 19.70 396.90 5.98 22.90
0.07950 60.00 1.690 0 0.4110 6.5790 35.90 10.7103 4 411.0 18.30 370.78 5.49 24.10
0.07244 60.00 1.690 0 0.4110 5.8840 18.50 10.7103 4 411.0 18.30 392.33 7.79 18.60
0.01709 90.00 2.020 0 0.4100 6.7280 36.10 12.1265 5 187.0 17.00 384.46 4.50 30.10
0.04301 80.00 1.910 0 0.4130 5.6630 21.90 10.5857 4 334.0 22.00 382.80 8.05 18.20
0.10659 80.00 1.910 0 0.4130 5.9360 19.50 10.5857 4 334.0 22.00 376.04 5.57 20.60
8.98296 0.00 18.100 1 0.7700 6.2120 97.40 2.1222 24 666.0 20.20 377.73 17.60 17.80
3.84970 0.00 18.100 1 0.7700 6.3950 91.00 2.5052 24 666.0 20.20 391.34 13.27 21.70
5.20177 0.00 18.100 1 0.7700 6.1270 83.40 2.7227 24 666.0 20.20 395.43 11.48 22.70
4.26131 0.00 18.100 0 0.7700 6.1120 81.30 2.5091 24 666.0 20.20 390.74 12.67 22.60
4.54192 0.00 18.100 0 0.7700 6.3980 88.00 2.5182 24 666.0 20.20 374.56 7.79 25.00
3.83684 0.00 18.100 0 0.7700 6.2510 91.10 2.2955 24 666.0 20.20 350.65 14.19 19.90
3.67822 0.00 18.100 0 0.7700 5.3620 96.20 2.1036 24 666.0 20.20 380.79 10.19 20.80
4.22239 0.00 18.100 1 0.7700 5.8030 89.00 1.9047 24 666.0 20.20 353.04 14.64 16.80
3.47428 0.00 18.100 1 0.7180 8.7800 82.90 1.9047 24 666.0 20.20 354.55 5.29 21.90
4.55587 0.00 18.100 0 0.7180 3.5610 87.90 1.6132 24 666.0 20.20 354.70 7.12 27.50
3.69695 0.00 18.100 0 0.7180 4.9630 91.40 1.7523 24 666.0 20.20 316.03 14.00 21.90
13.52220 0.00 18.100 0 0.6310 3.8630 100.00 1.5106 24 666.0 20.20 131.42 13.33 23.10
4.89822 0.00 18.100 0 0.6310 4.9700 100.00 1.3325 24 666.0 20.20 375.52 3.26 50.00
5.66998 0.00 18.100 1 0.6310 6.6830 96.80 1.3567 24 666.0 20.20 375.33 3.73 50.00
6.53876 0.00 18.100 1 0.6310 7.0160 97.50 1.2024 24 666.0 20.20 392.05 2.96 50.00
9.23230 0.00 18.100 0 0.6310 6.2160 100.00 1.1691 24 666.0 20.20 366.15 9.53 50.00
8.26725 0.00 18.100 1 0.6680 5.8750 89.60 1.1296 24 666.0 20.20 347.88 8.88 50.00
11.10810 0.00 18.100 0 0.6680 4.9060 100.00 1.1742 24 666.0 20.20 396.90 34.77 13.80
18.49820 0.00 18.100 0 0.6680 4.1380 100.00 1.1370 24 666.0 20.20 396.90 37.97 13.80
19.60910 0.00 18.100 0 0.6710 7.3130 97.90 1.3163 24 666.0 20.20 396.90 13.44 15.00
15.28800 0.00 18.100 0 0.6710 6.6490 93.30 1.3449 24 666.0 20.20 363.02 23.24 13.90
9.82349 0.00 18.100 0 0.6710 6.7940 98.80 1.3580 24 666.0 20.20 396.90 21.24 13.30
23.64820 0.00 18.100 0 0.6710 6.3800 96.20 1.3861 24 666.0 20.20 396.90 23.69 13.10
17.86670 0.00 18.100 0 0.6710 6.2230 100.00 1.3861 24 666.0 20.20 393.74 21.78 10.20
88.97620 0.00 18.100 0 0.6710 6.9680 91.90 1.4165 24 666.0 20.20 396.90 17.21 10.40
15.87440 0.00 18.100 0 0.6710 6.5450 99.10 1.5192 24 666.0 20.20 396.90 21.08 10.90
9.18702 0.00 18.100 0 0.7000 5.5360 100.00 1.5804 24 666.0 20.20 396.90 23.60 11.30
7.99248 0.00 18.100 0 0.7000 5.5200 100.00 1.5331 24 666.0 20.20 396.90 24.56 12.30
20.08490 0.00 18.100 0 0.7000 4.3680 91.20 1.4395 24 666.0 20.20 285.83 30.63 8.80
16.81180 0.00 18.100 0 0.7000 5.2770 98.10 1.4261 24 666.0 20.20 396.90 30.81 7.20
24.39380 0.00 18.100 0 0.7000 4.6520 100.00 1.4672 24 666.0 20.20 396.90 28.28 10.50
22.59710 0.00 18.100 0 0.7000 5.0000 89.50 1.5184 24 666.0 20.20 396.90 31.99 7.40
14.33370 0.00 18.100 0 0.7000 4.8800 100.00 1.5895 24 666.0 20.20 372.92 30.62 10.20
8.15174 0.00 18.100 0 0.7000 5.3900 98.90 1.7281 24 666.0 20.20 396.90 20.85 11.50
6.96215 0.00 18.100 0 0.7000 5.7130 97.00 1.9265 24 666.0 20.20 394.43 17.11 15.10
5.29305 0.00 18.100 0 0.7000 6.0510 82.50 2.1678 24 666.0 20.20 378.38 18.76 23.20
11.57790 0.00 18.100 0 0.7000 5.0360 97.00 1.7700 24 666.0 20.20 396.90 25.68 9.70
8.64476 0.00 18.100 0 0.6930 6.1930 92.60 1.7912 24 666.0 20.20 396.90 15.17 13.80
13.35980 0.00 18.100 0 0.6930 5.8870 94.70 1.7821 24 666.0 20.20 396.90 16.35 12.70
8.71675 0.00 18.100 0 0.6930 6.4710 98.80 1.7257 24 666.0 20.20 391.98 17.12 13.10
5.87205 0.00 18.100 0 0.6930 6.4050 96.00 1.6768 24 666.0 20.20 396.90 19.37 12.50
7.67202 0.00 18.100 0 0.6930 5.7470 98.90 1.6334 24 666.0 20.20 393.10 19.92 8.50
38.35180 0.00 18.100 0 0.6930 5.4530 100.00 1.4896 24 666.0 20.20 396.90 30.59 5.00
9.91655 0.00 18.100 0 0.6930 5.8520 77.80 1.5004 24 666.0 20.20 338.16 29.97 6.30
25.04610 0.00 18.100 0 0.6930 5.9870 100.00 1.5888 24 666.0 20.20 396.90 26.77 5.60
14.23620 0.00 18.100 0 0.6930 6.3430 100.00 1.5741 24 666.0 20.20 396.90 20.32 7.20
9.59571 0.00 18.100 0 0.6930 6.4040 100.00 1.6390 24 666.0 20.20 376.11 20.31 12.10
24.80170 0.00 18.100 0 0.6930 5.3490 96.00 1.7028 24 666.0 20.20 396.90 19.77 8.30
41.52920 0.00 18.100 0 0.6930 5.5310 85.40 1.6074 24 666.0 20.20 329.46 27.38 8.50
67.92080 0.00 18.100 0 0.6930 5.6830 100.00 1.4254 24 666.0 20.20 384.97 22.98 5.00
20.71620 0.00 18.100 0 0.6590 4.1380 100.00 1.1781 24 666.0 20.20 370.22 23.34 11.90
11.95110 0.00 18.100 0 0.6590 5.6080 100.00 1.2852 24 666.0 20.20 332.09 12.13 27.90
7.40389 0.00 18.100 0 0.5970 5.6170 97.90 1.4547 24 666.0 20.20 314.64 26.40 17.20
14.43830 0.00 18.100 0 0.5970 6.8520 100.00 1.4655 24 666.0 20.20 179.36 19.78 27.50
51.13580 0.00 18.100 0 0.5970 5.7570 100.00 1.4130 24 666.0 20.20 2.60 10.11 15.00
14.05070 0.00 18.100 0 0.5970 6.6570 100.00 1.5275 24 666.0 20.20 35.05 21.22 17.20
18.81100 0.00 18.100 0 0.5970 4.6280 100.00 1.5539 24 666.0 20.20 28.79 34.37 17.90
28.65580 0.00 18.100 0 0.5970 5.1550 100.00 1.5894 24 666.0 20.20 210.97 20.08 16.30
45.74610 0.00 18.100 0 0.6930 4.5190 100.00 1.6582 24 666.0 20.20 88.27 36.98 7.00
18.08460 0.00 18.100 0 0.6790 6.4340 100.00 1.8347 24 666.0 20.20 27.25 29.05 7.20
10.83420 0.00 18.100 0 0.6790 6.7820 90.80 1.8195 24 666.0 20.20 21.57 25.79 7.50
25.94060 0.00 18.100 0 0.6790 5.3040 89.10 1.6475 24 666.0 20.20 127.36 26.64 10.40
73.53410 0.00 18.100 0 0.6790 5.9570 100.00 1.8026 24 666.0 20.20 16.45 20.62 8.80
11.81230 0.00 18.100 0 0.7180 6.8240 76.50 1.7940 24 666.0 20.20 48.45 22.74 8.40
11.08740 0.00 18.100 0 0.7180 6.4110 100.00 1.8589 24 666.0 20.20 318.75 15.02 16.70
7.02259 0.00 18.100 0 0.7180 6.0060 95.30 1.8746 24 666.0 20.20 319.98 15.70 14.20
12.04820 0.00 18.100 0 0.6140 5.6480 87.60 1.9512 24 666.0 20.20 291.55 14.10 20.80
7.05042 0.00 18.100 0 0.6140 6.1030 85.10 2.0218 24 666.0 20.20 2.52 23.29 13.40
8.79212 0.00 18.100 0 0.5840 5.5650 70.60 2.0635 24 666.0 20.20 3.65 17.16 11.70
15.86030 0.00 18.100 0 0.6790 5.8960 95.40 1.9096 24 666.0 20.20 7.68 24.39 8.30
12.24720 0.00 18.100 0 0.5840 5.8370 59.70 1.9976 24 666.0 20.20 24.65 15.69 10.20
37.66190 0.00 18.100 0 0.6790 6.2020 78.70 1.8629 24 666.0 20.20 18.82 14.52 10.90
7.36711 0.00 18.100 0 0.6790 6.1930 78.10 1.9356 24 666.0 20.20 96.73 21.52 11.00
9.33889 0.00 18.100 0 0.6790 6.3800 95.60 1.9682 24 666.0 20.20 60.72 24.08 9.50
8.49213 0.00 18.100 0 0.5840 6.3480 86.10 2.0527 24 666.0 20.20 83.45 17.64 14.50
10.06230 0.00 18.100 0 0.5840 6.8330 94.30 2.0882 24 666.0 20.20 81.33 19.69 14.10
6.44405 0.00 18.100 0 0.5840 6.4250 74.80 2.2004 24 666.0 20.20 97.95 12.03 16.10
5.58107 0.00 18.100 0 0.7130 6.4360 87.90 2.3158 24 666.0 20.20 100.19 16.22 14.30
13.91340 0.00 18.100 0 0.7130 6.2080 95.00 2.2222 24 666.0 20.20 100.63 15.17 11.70
11.16040 0.00 18.100 0 0.7400 6.6290 94.60 2.1247 24 666.0 20.20 109.85 23.27 13.40
14.42080 0.00 18.100 0 0.7400 6.4610 93.30 2.0026 24 666.0 20.20 27.49 18.05 9.60
15.17720 0.00 18.100 0 0.7400 6.1520 100.00 1.9142 24 666.0 20.20 9.32 26.45 8.70
13.67810 0.00 18.100 0 0.7400 5.9350 87.90 1.8206 24 666.0 20.20 68.95 34.02 8.40
9.39063 0.00 18.100 0 0.7400 5.6270 93.90 1.8172 24 666.0 20.20 396.90 22.88 12.80
22.05110 0.00 18.100 0 0.7400 5.8180 92.40 1.8662 24 666.0 20.20 391.45 22.11 10.50
9.72418 0.00 18.100 0 0.7400 6.4060 97.20 2.0651 24 666.0 20.20 385.96 19.52 17.10
5.66637 0.00 18.100 0 0.7400 6.2190 100.00 2.0048 24 666.0 20.20 395.69 16.59 18.40
9.96654 0.00 18.100 0 0.7400 6.4850 100.00 1.9784 24 666.0 20.20 386.73 18.85 15.40
12.80230 0.00 18.100 0 0.7400 5.8540 96.60 1.8956 24 666.0 20.20 240.52 23.79 10.80
10.67180 0.00 18.100 0 0.7400 6.4590 94.80 1.9879 24 666.0 20.20 43.06 23.98 11.80
6.28807 0.00 18.100 0 0.7400 6.3410 96.40 2.0720 24 666.0 20.20 318.01 17.79 14.90
9.92485 0.00 18.100 0 0.7400 6.2510 96.60 2.1980 24 666.0 20.20 388.52 16.44 12.60
9.32909 0.00 18.100 0 0.7130 6.1850 98.70 2.2616 24 666.0 20.20 396.90 18.13 14.10
7.52601 0.00 18.100 0 0.7130 6.4170 98.30 2.1850 24 666.0 20.20 304.21 19.31 13.00
6.71772 0.00 18.100 0 0.7130 6.7490 92.60 2.3236 24 666.0 20.20 0.32 17.44 13.40
5.44114 0.00 18.100 0 0.7130 6.6550 98.20 2.3552 24 666.0 20.20 355.29 17.73 15.20
5.09017 0.00 18.100 0 0.7130 6.2970 91.80 2.3682 24 666.0 20.20 385.09 17.27 16.10
8.24809 0.00 18.100 0 0.7130 7.3930 99.30 2.4527 24 666.0 20.20 375.87 16.74 17.80
9.51363 0.00 18.100 0 0.7130 6.7280 94.10 2.4961 24 666.0 20.20 6.68 18.71 14.90
4.75237 0.00 18.100 0 0.7130 6.5250 86.50 2.4358 24 666.0 20.20 50.92 18.13 14.10
4.66883 0.00 18.100 0 0.7130 5.9760 87.90 2.5806 24 666.0 20.20 10.48 19.01 12.70
8.20058 0.00 18.100 0 0.7130 5.9360 80.30 2.7792 24 666.0 20.20 3.50 16.94 13.50
7.75223 0.00 18.100 0 0.7130 6.3010 83.70 2.7831 24 666.0 20.20 272.21 16.23 14.90
6.80117 0.00 18.100 0 0.7130 6.0810 84.40 2.7175 24 666.0 20.20 396.90 14.70 20.00
4.81213 0.00 18.100 0 0.7130 6.7010 90.00 2.5975 24 666.0 20.20 255.23 16.42 16.40
3.69311 0.00 18.100 0 0.7130 6.3760 88.40 2.5671 24 666.0 20.20 391.43 14.65 17.70
6.65492 0.00 18.100 0 0.7130 6.3170 83.00 2.7344 24 666.0 20.20 396.90 13.99 19.50
5.82115 0.00 18.100 0 0.7130 6.5130 89.90 2.8016 24 666.0 20.20 393.82 10.29 20.20
7.83932 0.00 18.100 0 0.6550 6.2090 65.40 2.9634 24 666.0 20.20 396.90 13.22 21.40
3.16360 0.00 18.100 0 0.6550 5.7590 48.20 3.0665 24 666.0 20.20 334.40 14.13 19.90
3.77498 0.00 18.100 0 0.6550 5.9520 84.70 2.8715 24 666.0 20.20 22.01 17.15 19.00
4.42228 0.00 18.100 0 0.5840 6.0030 94.50 2.5403 24 666.0 20.20 331.29 21.32 19.10
15.57570 0.00 18.100 0 0.5800 5.9260 71.00 2.9084 24 666.0 20.20 368.74 18.13 19.10
13.07510 0.00 18.100 0 0.5800 5.7130 56.70 2.8237 24 666.0 20.20 396.90 14.76 20.10
4.34879 0.00 18.100 0 0.5800 6.1670 84.00 3.0334 24 666.0 20.20 396.90 16.29 19.90
4.03841 0.00 18.100 0 0.5320 6.2290 90.70 3.0993 24 666.0 20.20 395.33 12.87 19.60
3.56868 0.00 18.100 0 0.5800 6.4370 75.00 2.8965 24 666.0 20.20 393.37 14.36 23.20
4.64689 0.00 18.100 0 0.6140 6.9800 67.60 2.5329 24 666.0 20.20 374.68 11.66 29.80
8.05579 0.00 18.100 0 0.5840 5.4270 95.40 2.4298 24 666.0 20.20 352.58 18.14 13.80
6.39312 0.00 18.100 0 0.5840 6.1620 97.40 2.2060 24 666.0 20.20 302.76 24.10 13.30
4.87141 0.00 18.100 0 0.6140 6.4840 93.60 2.3053 24 666.0 20.20 396.21 18.68 16.70
15.02340 0.00 18.100 0 0.6140 5.3040 97.30 2.1007 24 666.0 20.20 349.48 24.91 12.00
10.23300 0.00 18.100 0 0.6140 6.1850 96.70 2.1705 24 666.0 20.20 379.70 18.03 14.60
14.33370 0.00 18.100 0 0.6140 6.2290 88.00 1.9512 24 666.0 20.20 383.32 13.11 21.40
5.82401 0.00 18.100 0 0.5320 6.2420 64.70 3.4242 24 666.0 20.20 396.90 10.74 23.00
5.70818 0.00 18.100 0 0.5320 6.7500 74.90 3.3317 24 666.0 20.20 393.07 7.74 23.70
5.73116 0.00 18.100 0 0.5320 7.0610 77.00 3.4106 24 666.0 20.20 395.28 7.01 25.00
2.81838 0.00 18.100 0 0.5320 5.7620 40.30 4.0983 24 666.0 20.20 392.92 10.42 21.80
2.37857 0.00 18.100 0 0.5830 5.8710 41.90 3.7240 24 666.0 20.20 370.73 13.34 20.60
3.67367 0.00 18.100 0 0.5830 6.3120 51.90 3.9917 24 666.0 20.20 388.62 10.58 21.20
5.69175 0.00 18.100 0 0.5830 6.1140 79.80 3.5459 24 666.0 20.20 392.68 14.98 19.10
4.83567 0.00 18.100 0 0.5830 5.9050 53.20 3.1523 24 666.0 20.20 388.22 11.45 20.60
0.15086 0.00 27.740 0 0.6090 5.4540 92.70 1.8209 4 711.0 20.10 395.09 18.06 15.20
0.18337 0.00 27.740 0 0.6090 5.4140 98.30 1.7554 4 711.0 20.10 344.05 23.97 7.00
0.20746 0.00 27.740 0 0.6090 5.0930 98.00 1.8226 4 711.0 20.10 318.43 29.68 8.10
0.10574 0.00 27.740 0 0.6090 5.9830 98.80 1.8681 4 711.0 20.10 390.11 18.07 13.60
0.11132 0.00 27.740 0 0.6090 5.9830 83.50 2.1099 4 711.0 20.10 396.90 13.35 20.10
0.17331 0.00 9.690 0 0.5850 5.7070 54.00 2.3817 6 391.0 19.20 396.90 12.01 21.80
0.27957 0.00 9.690 0 0.5850 5.9260 42.60 2.3817 6 391.0 19.20 396.90 13.59 24.50
0.17899 0.00 9.690 0 0.5850 5.6700 28.80 2.7986 6 391.0 19.20 393.29 17.60 23.10
0.28960 0.00 9.690 0 0.5850 5.3900 72.90 2.7986 6 391.0 19.20 396.90 21.14 19.70
0.26838 0.00 9.690 0 0.5850 5.7940 70.60 2.8927 6 391.0 19.20 396.90 14.10 18.30
0.23912 0.00 9.690 0 0.5850 6.0190 65.30 2.4091 6 391.0 19.20 396.90 12.92 21.20
0.17783 0.00 9.690 0 0.5850 5.5690 73.50 2.3999 6 391.0 19.20 395.77 15.10 17.50
0.22438 0.00 9.690 0 0.5850 6.0270 79.70 2.4982 6 391.0 19.20 396.90 14.33 16.80
0.06263 0.00 11.930 0 0.5730 6.5930 69.10 2.4786 1 273.0 21.00 391.99 9.67 22.40
0.04527 0.00 11.930 0 0.5730 6.1200 76.70 2.2875 1 273.0 21.00 396.90 9.08 20.60
0.06076 0.00 11.930 0 0.5730 6.9760 91.00 2.1675 1 273.0 21.00 396.90 5.64 23.90
0.10959 0.00 11.930 0 0.5730 6.7940 89.30 2.3889 1 273.0 21.00 393.45 6.48 22.00
0.04741 0.00 11.930 0 0.5730 6.0300 80.80 2.5050 1 273.0 21.00 396.90 7.88 11.90

View File

@@ -0,0 +1,768 @@
6,148,72,35,0,33.6,0.627,50,1
1,85,66,29,0,26.6,0.351,31,0
8,183,64,0,0,23.3,0.672,32,1
1,89,66,23,94,28.1,0.167,21,0
0,137,40,35,168,43.1,2.288,33,1
5,116,74,0,0,25.6,0.201,30,0
3,78,50,32,88,31.0,0.248,26,1
10,115,0,0,0,35.3,0.134,29,0
2,197,70,45,543,30.5,0.158,53,1
8,125,96,0,0,0.0,0.232,54,1
4,110,92,0,0,37.6,0.191,30,0
10,168,74,0,0,38.0,0.537,34,1
10,139,80,0,0,27.1,1.441,57,0
1,189,60,23,846,30.1,0.398,59,1
5,166,72,19,175,25.8,0.587,51,1
7,100,0,0,0,30.0,0.484,32,1
0,118,84,47,230,45.8,0.551,31,1
7,107,74,0,0,29.6,0.254,31,1
1,103,30,38,83,43.3,0.183,33,0
1,115,70,30,96,34.6,0.529,32,1
3,126,88,41,235,39.3,0.704,27,0
8,99,84,0,0,35.4,0.388,50,0
7,196,90,0,0,39.8,0.451,41,1
9,119,80,35,0,29.0,0.263,29,1
11,143,94,33,146,36.6,0.254,51,1
10,125,70,26,115,31.1,0.205,41,1
7,147,76,0,0,39.4,0.257,43,1
1,97,66,15,140,23.2,0.487,22,0
13,145,82,19,110,22.2,0.245,57,0
5,117,92,0,0,34.1,0.337,38,0
5,109,75,26,0,36.0,0.546,60,0
3,158,76,36,245,31.6,0.851,28,1
3,88,58,11,54,24.8,0.267,22,0
6,92,92,0,0,19.9,0.188,28,0
10,122,78,31,0,27.6,0.512,45,0
4,103,60,33,192,24.0,0.966,33,0
11,138,76,0,0,33.2,0.420,35,0
9,102,76,37,0,32.9,0.665,46,1
2,90,68,42,0,38.2,0.503,27,1
4,111,72,47,207,37.1,1.390,56,1
3,180,64,25,70,34.0,0.271,26,0
7,133,84,0,0,40.2,0.696,37,0
7,106,92,18,0,22.7,0.235,48,0
9,171,110,24,240,45.4,0.721,54,1
7,159,64,0,0,27.4,0.294,40,0
0,180,66,39,0,42.0,1.893,25,1
1,146,56,0,0,29.7,0.564,29,0
2,71,70,27,0,28.0,0.586,22,0
7,103,66,32,0,39.1,0.344,31,1
7,105,0,0,0,0.0,0.305,24,0
1,103,80,11,82,19.4,0.491,22,0
1,101,50,15,36,24.2,0.526,26,0
5,88,66,21,23,24.4,0.342,30,0
8,176,90,34,300,33.7,0.467,58,1
7,150,66,42,342,34.7,0.718,42,0
1,73,50,10,0,23.0,0.248,21,0
7,187,68,39,304,37.7,0.254,41,1
0,100,88,60,110,46.8,0.962,31,0
0,146,82,0,0,40.5,1.781,44,0
0,105,64,41,142,41.5,0.173,22,0
2,84,0,0,0,0.0,0.304,21,0
8,133,72,0,0,32.9,0.270,39,1
5,44,62,0,0,25.0,0.587,36,0
2,141,58,34,128,25.4,0.699,24,0
7,114,66,0,0,32.8,0.258,42,1
5,99,74,27,0,29.0,0.203,32,0
0,109,88,30,0,32.5,0.855,38,1
2,109,92,0,0,42.7,0.845,54,0
1,95,66,13,38,19.6,0.334,25,0
4,146,85,27,100,28.9,0.189,27,0
2,100,66,20,90,32.9,0.867,28,1
5,139,64,35,140,28.6,0.411,26,0
13,126,90,0,0,43.4,0.583,42,1
4,129,86,20,270,35.1,0.231,23,0
1,79,75,30,0,32.0,0.396,22,0
1,0,48,20,0,24.7,0.140,22,0
7,62,78,0,0,32.6,0.391,41,0
5,95,72,33,0,37.7,0.370,27,0
0,131,0,0,0,43.2,0.270,26,1
2,112,66,22,0,25.0,0.307,24,0
3,113,44,13,0,22.4,0.140,22,0
2,74,0,0,0,0.0,0.102,22,0
7,83,78,26,71,29.3,0.767,36,0
0,101,65,28,0,24.6,0.237,22,0
5,137,108,0,0,48.8,0.227,37,1
2,110,74,29,125,32.4,0.698,27,0
13,106,72,54,0,36.6,0.178,45,0
2,100,68,25,71,38.5,0.324,26,0
15,136,70,32,110,37.1,0.153,43,1
1,107,68,19,0,26.5,0.165,24,0
1,80,55,0,0,19.1,0.258,21,0
4,123,80,15,176,32.0,0.443,34,0
7,81,78,40,48,46.7,0.261,42,0
4,134,72,0,0,23.8,0.277,60,1
2,142,82,18,64,24.7,0.761,21,0
6,144,72,27,228,33.9,0.255,40,0
2,92,62,28,0,31.6,0.130,24,0
1,71,48,18,76,20.4,0.323,22,0
6,93,50,30,64,28.7,0.356,23,0
1,122,90,51,220,49.7,0.325,31,1
1,163,72,0,0,39.0,1.222,33,1
1,151,60,0,0,26.1,0.179,22,0
0,125,96,0,0,22.5,0.262,21,0
1,81,72,18,40,26.6,0.283,24,0
2,85,65,0,0,39.6,0.930,27,0
1,126,56,29,152,28.7,0.801,21,0
1,96,122,0,0,22.4,0.207,27,0
4,144,58,28,140,29.5,0.287,37,0
3,83,58,31,18,34.3,0.336,25,0
0,95,85,25,36,37.4,0.247,24,1
3,171,72,33,135,33.3,0.199,24,1
8,155,62,26,495,34.0,0.543,46,1
1,89,76,34,37,31.2,0.192,23,0
4,76,62,0,0,34.0,0.391,25,0
7,160,54,32,175,30.5,0.588,39,1
4,146,92,0,0,31.2,0.539,61,1
5,124,74,0,0,34.0,0.220,38,1
5,78,48,0,0,33.7,0.654,25,0
4,97,60,23,0,28.2,0.443,22,0
4,99,76,15,51,23.2,0.223,21,0
0,162,76,56,100,53.2,0.759,25,1
6,111,64,39,0,34.2,0.260,24,0
2,107,74,30,100,33.6,0.404,23,0
5,132,80,0,0,26.8,0.186,69,0
0,113,76,0,0,33.3,0.278,23,1
1,88,30,42,99,55.0,0.496,26,1
3,120,70,30,135,42.9,0.452,30,0
1,118,58,36,94,33.3,0.261,23,0
1,117,88,24,145,34.5,0.403,40,1
0,105,84,0,0,27.9,0.741,62,1
4,173,70,14,168,29.7,0.361,33,1
9,122,56,0,0,33.3,1.114,33,1
3,170,64,37,225,34.5,0.356,30,1
8,84,74,31,0,38.3,0.457,39,0
2,96,68,13,49,21.1,0.647,26,0
2,125,60,20,140,33.8,0.088,31,0
0,100,70,26,50,30.8,0.597,21,0
0,93,60,25,92,28.7,0.532,22,0
0,129,80,0,0,31.2,0.703,29,0
5,105,72,29,325,36.9,0.159,28,0
3,128,78,0,0,21.1,0.268,55,0
5,106,82,30,0,39.5,0.286,38,0
2,108,52,26,63,32.5,0.318,22,0
10,108,66,0,0,32.4,0.272,42,1
4,154,62,31,284,32.8,0.237,23,0
0,102,75,23,0,0.0,0.572,21,0
9,57,80,37,0,32.8,0.096,41,0
2,106,64,35,119,30.5,1.400,34,0
5,147,78,0,0,33.7,0.218,65,0
2,90,70,17,0,27.3,0.085,22,0
1,136,74,50,204,37.4,0.399,24,0
4,114,65,0,0,21.9,0.432,37,0
9,156,86,28,155,34.3,1.189,42,1
1,153,82,42,485,40.6,0.687,23,0
8,188,78,0,0,47.9,0.137,43,1
7,152,88,44,0,50.0,0.337,36,1
2,99,52,15,94,24.6,0.637,21,0
1,109,56,21,135,25.2,0.833,23,0
2,88,74,19,53,29.0,0.229,22,0
17,163,72,41,114,40.9,0.817,47,1
4,151,90,38,0,29.7,0.294,36,0
7,102,74,40,105,37.2,0.204,45,0
0,114,80,34,285,44.2,0.167,27,0
2,100,64,23,0,29.7,0.368,21,0
0,131,88,0,0,31.6,0.743,32,1
6,104,74,18,156,29.9,0.722,41,1
3,148,66,25,0,32.5,0.256,22,0
4,120,68,0,0,29.6,0.709,34,0
4,110,66,0,0,31.9,0.471,29,0
3,111,90,12,78,28.4,0.495,29,0
6,102,82,0,0,30.8,0.180,36,1
6,134,70,23,130,35.4,0.542,29,1
2,87,0,23,0,28.9,0.773,25,0
1,79,60,42,48,43.5,0.678,23,0
2,75,64,24,55,29.7,0.370,33,0
8,179,72,42,130,32.7,0.719,36,1
6,85,78,0,0,31.2,0.382,42,0
0,129,110,46,130,67.1,0.319,26,1
5,143,78,0,0,45.0,0.190,47,0
5,130,82,0,0,39.1,0.956,37,1
6,87,80,0,0,23.2,0.084,32,0
0,119,64,18,92,34.9,0.725,23,0
1,0,74,20,23,27.7,0.299,21,0
5,73,60,0,0,26.8,0.268,27,0
4,141,74,0,0,27.6,0.244,40,0
7,194,68,28,0,35.9,0.745,41,1
8,181,68,36,495,30.1,0.615,60,1
1,128,98,41,58,32.0,1.321,33,1
8,109,76,39,114,27.9,0.640,31,1
5,139,80,35,160,31.6,0.361,25,1
3,111,62,0,0,22.6,0.142,21,0
9,123,70,44,94,33.1,0.374,40,0
7,159,66,0,0,30.4,0.383,36,1
11,135,0,0,0,52.3,0.578,40,1
8,85,55,20,0,24.4,0.136,42,0
5,158,84,41,210,39.4,0.395,29,1
1,105,58,0,0,24.3,0.187,21,0
3,107,62,13,48,22.9,0.678,23,1
4,109,64,44,99,34.8,0.905,26,1
4,148,60,27,318,30.9,0.150,29,1
0,113,80,16,0,31.0,0.874,21,0
1,138,82,0,0,40.1,0.236,28,0
0,108,68,20,0,27.3,0.787,32,0
2,99,70,16,44,20.4,0.235,27,0
6,103,72,32,190,37.7,0.324,55,0
5,111,72,28,0,23.9,0.407,27,0
8,196,76,29,280,37.5,0.605,57,1
5,162,104,0,0,37.7,0.151,52,1
1,96,64,27,87,33.2,0.289,21,0
7,184,84,33,0,35.5,0.355,41,1
2,81,60,22,0,27.7,0.290,25,0
0,147,85,54,0,42.8,0.375,24,0
7,179,95,31,0,34.2,0.164,60,0
0,140,65,26,130,42.6,0.431,24,1
9,112,82,32,175,34.2,0.260,36,1
12,151,70,40,271,41.8,0.742,38,1
5,109,62,41,129,35.8,0.514,25,1
6,125,68,30,120,30.0,0.464,32,0
5,85,74,22,0,29.0,1.224,32,1
5,112,66,0,0,37.8,0.261,41,1
0,177,60,29,478,34.6,1.072,21,1
2,158,90,0,0,31.6,0.805,66,1
7,119,0,0,0,25.2,0.209,37,0
7,142,60,33,190,28.8,0.687,61,0
1,100,66,15,56,23.6,0.666,26,0
1,87,78,27,32,34.6,0.101,22,0
0,101,76,0,0,35.7,0.198,26,0
3,162,52,38,0,37.2,0.652,24,1
4,197,70,39,744,36.7,2.329,31,0
0,117,80,31,53,45.2,0.089,24,0
4,142,86,0,0,44.0,0.645,22,1
6,134,80,37,370,46.2,0.238,46,1
1,79,80,25,37,25.4,0.583,22,0
4,122,68,0,0,35.0,0.394,29,0
3,74,68,28,45,29.7,0.293,23,0
4,171,72,0,0,43.6,0.479,26,1
7,181,84,21,192,35.9,0.586,51,1
0,179,90,27,0,44.1,0.686,23,1
9,164,84,21,0,30.8,0.831,32,1
0,104,76,0,0,18.4,0.582,27,0
1,91,64,24,0,29.2,0.192,21,0
4,91,70,32,88,33.1,0.446,22,0
3,139,54,0,0,25.6,0.402,22,1
6,119,50,22,176,27.1,1.318,33,1
2,146,76,35,194,38.2,0.329,29,0
9,184,85,15,0,30.0,1.213,49,1
10,122,68,0,0,31.2,0.258,41,0
0,165,90,33,680,52.3,0.427,23,0
9,124,70,33,402,35.4,0.282,34,0
1,111,86,19,0,30.1,0.143,23,0
9,106,52,0,0,31.2,0.380,42,0
2,129,84,0,0,28.0,0.284,27,0
2,90,80,14,55,24.4,0.249,24,0
0,86,68,32,0,35.8,0.238,25,0
12,92,62,7,258,27.6,0.926,44,1
1,113,64,35,0,33.6,0.543,21,1
3,111,56,39,0,30.1,0.557,30,0
2,114,68,22,0,28.7,0.092,25,0
1,193,50,16,375,25.9,0.655,24,0
11,155,76,28,150,33.3,1.353,51,1
3,191,68,15,130,30.9,0.299,34,0
3,141,0,0,0,30.0,0.761,27,1
4,95,70,32,0,32.1,0.612,24,0
3,142,80,15,0,32.4,0.200,63,0
4,123,62,0,0,32.0,0.226,35,1
5,96,74,18,67,33.6,0.997,43,0
0,138,0,0,0,36.3,0.933,25,1
2,128,64,42,0,40.0,1.101,24,0
0,102,52,0,0,25.1,0.078,21,0
2,146,0,0,0,27.5,0.240,28,1
10,101,86,37,0,45.6,1.136,38,1
2,108,62,32,56,25.2,0.128,21,0
3,122,78,0,0,23.0,0.254,40,0
1,71,78,50,45,33.2,0.422,21,0
13,106,70,0,0,34.2,0.251,52,0
2,100,70,52,57,40.5,0.677,25,0
7,106,60,24,0,26.5,0.296,29,1
0,104,64,23,116,27.8,0.454,23,0
5,114,74,0,0,24.9,0.744,57,0
2,108,62,10,278,25.3,0.881,22,0
0,146,70,0,0,37.9,0.334,28,1
10,129,76,28,122,35.9,0.280,39,0
7,133,88,15,155,32.4,0.262,37,0
7,161,86,0,0,30.4,0.165,47,1
2,108,80,0,0,27.0,0.259,52,1
7,136,74,26,135,26.0,0.647,51,0
5,155,84,44,545,38.7,0.619,34,0
1,119,86,39,220,45.6,0.808,29,1
4,96,56,17,49,20.8,0.340,26,0
5,108,72,43,75,36.1,0.263,33,0
0,78,88,29,40,36.9,0.434,21,0
0,107,62,30,74,36.6,0.757,25,1
2,128,78,37,182,43.3,1.224,31,1
1,128,48,45,194,40.5,0.613,24,1
0,161,50,0,0,21.9,0.254,65,0
6,151,62,31,120,35.5,0.692,28,0
2,146,70,38,360,28.0,0.337,29,1
0,126,84,29,215,30.7,0.520,24,0
14,100,78,25,184,36.6,0.412,46,1
8,112,72,0,0,23.6,0.840,58,0
0,167,0,0,0,32.3,0.839,30,1
2,144,58,33,135,31.6,0.422,25,1
5,77,82,41,42,35.8,0.156,35,0
5,115,98,0,0,52.9,0.209,28,1
3,150,76,0,0,21.0,0.207,37,0
2,120,76,37,105,39.7,0.215,29,0
10,161,68,23,132,25.5,0.326,47,1
0,137,68,14,148,24.8,0.143,21,0
0,128,68,19,180,30.5,1.391,25,1
2,124,68,28,205,32.9,0.875,30,1
6,80,66,30,0,26.2,0.313,41,0
0,106,70,37,148,39.4,0.605,22,0
2,155,74,17,96,26.6,0.433,27,1
3,113,50,10,85,29.5,0.626,25,0
7,109,80,31,0,35.9,1.127,43,1
2,112,68,22,94,34.1,0.315,26,0
3,99,80,11,64,19.3,0.284,30,0
3,182,74,0,0,30.5,0.345,29,1
3,115,66,39,140,38.1,0.150,28,0
6,194,78,0,0,23.5,0.129,59,1
4,129,60,12,231,27.5,0.527,31,0
3,112,74,30,0,31.6,0.197,25,1
0,124,70,20,0,27.4,0.254,36,1
13,152,90,33,29,26.8,0.731,43,1
2,112,75,32,0,35.7,0.148,21,0
1,157,72,21,168,25.6,0.123,24,0
1,122,64,32,156,35.1,0.692,30,1
10,179,70,0,0,35.1,0.200,37,0
2,102,86,36,120,45.5,0.127,23,1
6,105,70,32,68,30.8,0.122,37,0
8,118,72,19,0,23.1,1.476,46,0
2,87,58,16,52,32.7,0.166,25,0
1,180,0,0,0,43.3,0.282,41,1
12,106,80,0,0,23.6,0.137,44,0
1,95,60,18,58,23.9,0.260,22,0
0,165,76,43,255,47.9,0.259,26,0
0,117,0,0,0,33.8,0.932,44,0
5,115,76,0,0,31.2,0.343,44,1
9,152,78,34,171,34.2,0.893,33,1
7,178,84,0,0,39.9,0.331,41,1
1,130,70,13,105,25.9,0.472,22,0
1,95,74,21,73,25.9,0.673,36,0
1,0,68,35,0,32.0,0.389,22,0
5,122,86,0,0,34.7,0.290,33,0
8,95,72,0,0,36.8,0.485,57,0
8,126,88,36,108,38.5,0.349,49,0
1,139,46,19,83,28.7,0.654,22,0
3,116,0,0,0,23.5,0.187,23,0
3,99,62,19,74,21.8,0.279,26,0
5,0,80,32,0,41.0,0.346,37,1
4,92,80,0,0,42.2,0.237,29,0
4,137,84,0,0,31.2,0.252,30,0
3,61,82,28,0,34.4,0.243,46,0
1,90,62,12,43,27.2,0.580,24,0
3,90,78,0,0,42.7,0.559,21,0
9,165,88,0,0,30.4,0.302,49,1
1,125,50,40,167,33.3,0.962,28,1
13,129,0,30,0,39.9,0.569,44,1
12,88,74,40,54,35.3,0.378,48,0
1,196,76,36,249,36.5,0.875,29,1
5,189,64,33,325,31.2,0.583,29,1
5,158,70,0,0,29.8,0.207,63,0
5,103,108,37,0,39.2,0.305,65,0
4,146,78,0,0,38.5,0.520,67,1
4,147,74,25,293,34.9,0.385,30,0
5,99,54,28,83,34.0,0.499,30,0
6,124,72,0,0,27.6,0.368,29,1
0,101,64,17,0,21.0,0.252,21,0
3,81,86,16,66,27.5,0.306,22,0
1,133,102,28,140,32.8,0.234,45,1
3,173,82,48,465,38.4,2.137,25,1
0,118,64,23,89,0.0,1.731,21,0
0,84,64,22,66,35.8,0.545,21,0
2,105,58,40,94,34.9,0.225,25,0
2,122,52,43,158,36.2,0.816,28,0
12,140,82,43,325,39.2,0.528,58,1
0,98,82,15,84,25.2,0.299,22,0
1,87,60,37,75,37.2,0.509,22,0
4,156,75,0,0,48.3,0.238,32,1
0,93,100,39,72,43.4,1.021,35,0
1,107,72,30,82,30.8,0.821,24,0
0,105,68,22,0,20.0,0.236,22,0
1,109,60,8,182,25.4,0.947,21,0
1,90,62,18,59,25.1,1.268,25,0
1,125,70,24,110,24.3,0.221,25,0
1,119,54,13,50,22.3,0.205,24,0
5,116,74,29,0,32.3,0.660,35,1
8,105,100,36,0,43.3,0.239,45,1
5,144,82,26,285,32.0,0.452,58,1
3,100,68,23,81,31.6,0.949,28,0
1,100,66,29,196,32.0,0.444,42,0
5,166,76,0,0,45.7,0.340,27,1
1,131,64,14,415,23.7,0.389,21,0
4,116,72,12,87,22.1,0.463,37,0
4,158,78,0,0,32.9,0.803,31,1
2,127,58,24,275,27.7,1.600,25,0
3,96,56,34,115,24.7,0.944,39,0
0,131,66,40,0,34.3,0.196,22,1
3,82,70,0,0,21.1,0.389,25,0
3,193,70,31,0,34.9,0.241,25,1
4,95,64,0,0,32.0,0.161,31,1
6,137,61,0,0,24.2,0.151,55,0
5,136,84,41,88,35.0,0.286,35,1
9,72,78,25,0,31.6,0.280,38,0
5,168,64,0,0,32.9,0.135,41,1
2,123,48,32,165,42.1,0.520,26,0
4,115,72,0,0,28.9,0.376,46,1
0,101,62,0,0,21.9,0.336,25,0
8,197,74,0,0,25.9,1.191,39,1
1,172,68,49,579,42.4,0.702,28,1
6,102,90,39,0,35.7,0.674,28,0
1,112,72,30,176,34.4,0.528,25,0
1,143,84,23,310,42.4,1.076,22,0
1,143,74,22,61,26.2,0.256,21,0
0,138,60,35,167,34.6,0.534,21,1
3,173,84,33,474,35.7,0.258,22,1
1,97,68,21,0,27.2,1.095,22,0
4,144,82,32,0,38.5,0.554,37,1
1,83,68,0,0,18.2,0.624,27,0
3,129,64,29,115,26.4,0.219,28,1
1,119,88,41,170,45.3,0.507,26,0
2,94,68,18,76,26.0,0.561,21,0
0,102,64,46,78,40.6,0.496,21,0
2,115,64,22,0,30.8,0.421,21,0
8,151,78,32,210,42.9,0.516,36,1
4,184,78,39,277,37.0,0.264,31,1
0,94,0,0,0,0.0,0.256,25,0
1,181,64,30,180,34.1,0.328,38,1
0,135,94,46,145,40.6,0.284,26,0
1,95,82,25,180,35.0,0.233,43,1
2,99,0,0,0,22.2,0.108,23,0
3,89,74,16,85,30.4,0.551,38,0
1,80,74,11,60,30.0,0.527,22,0
2,139,75,0,0,25.6,0.167,29,0
1,90,68,8,0,24.5,1.138,36,0
0,141,0,0,0,42.4,0.205,29,1
12,140,85,33,0,37.4,0.244,41,0
5,147,75,0,0,29.9,0.434,28,0
1,97,70,15,0,18.2,0.147,21,0
6,107,88,0,0,36.8,0.727,31,0
0,189,104,25,0,34.3,0.435,41,1
2,83,66,23,50,32.2,0.497,22,0
4,117,64,27,120,33.2,0.230,24,0
8,108,70,0,0,30.5,0.955,33,1
4,117,62,12,0,29.7,0.380,30,1
0,180,78,63,14,59.4,2.420,25,1
1,100,72,12,70,25.3,0.658,28,0
0,95,80,45,92,36.5,0.330,26,0
0,104,64,37,64,33.6,0.510,22,1
0,120,74,18,63,30.5,0.285,26,0
1,82,64,13,95,21.2,0.415,23,0
2,134,70,0,0,28.9,0.542,23,1
0,91,68,32,210,39.9,0.381,25,0
2,119,0,0,0,19.6,0.832,72,0
2,100,54,28,105,37.8,0.498,24,0
14,175,62,30,0,33.6,0.212,38,1
1,135,54,0,0,26.7,0.687,62,0
5,86,68,28,71,30.2,0.364,24,0
10,148,84,48,237,37.6,1.001,51,1
9,134,74,33,60,25.9,0.460,81,0
9,120,72,22,56,20.8,0.733,48,0
1,71,62,0,0,21.8,0.416,26,0
8,74,70,40,49,35.3,0.705,39,0
5,88,78,30,0,27.6,0.258,37,0
10,115,98,0,0,24.0,1.022,34,0
0,124,56,13,105,21.8,0.452,21,0
0,74,52,10,36,27.8,0.269,22,0
0,97,64,36,100,36.8,0.600,25,0
8,120,0,0,0,30.0,0.183,38,1
6,154,78,41,140,46.1,0.571,27,0
1,144,82,40,0,41.3,0.607,28,0
0,137,70,38,0,33.2,0.170,22,0
0,119,66,27,0,38.8,0.259,22,0
7,136,90,0,0,29.9,0.210,50,0
4,114,64,0,0,28.9,0.126,24,0
0,137,84,27,0,27.3,0.231,59,0
2,105,80,45,191,33.7,0.711,29,1
7,114,76,17,110,23.8,0.466,31,0
8,126,74,38,75,25.9,0.162,39,0
4,132,86,31,0,28.0,0.419,63,0
3,158,70,30,328,35.5,0.344,35,1
0,123,88,37,0,35.2,0.197,29,0
4,85,58,22,49,27.8,0.306,28,0
0,84,82,31,125,38.2,0.233,23,0
0,145,0,0,0,44.2,0.630,31,1
0,135,68,42,250,42.3,0.365,24,1
1,139,62,41,480,40.7,0.536,21,0
0,173,78,32,265,46.5,1.159,58,0
4,99,72,17,0,25.6,0.294,28,0
8,194,80,0,0,26.1,0.551,67,0
2,83,65,28,66,36.8,0.629,24,0
2,89,90,30,0,33.5,0.292,42,0
4,99,68,38,0,32.8,0.145,33,0
4,125,70,18,122,28.9,1.144,45,1
3,80,0,0,0,0.0,0.174,22,0
6,166,74,0,0,26.6,0.304,66,0
5,110,68,0,0,26.0,0.292,30,0
2,81,72,15,76,30.1,0.547,25,0
7,195,70,33,145,25.1,0.163,55,1
6,154,74,32,193,29.3,0.839,39,0
2,117,90,19,71,25.2,0.313,21,0
3,84,72,32,0,37.2,0.267,28,0
6,0,68,41,0,39.0,0.727,41,1
7,94,64,25,79,33.3,0.738,41,0
3,96,78,39,0,37.3,0.238,40,0
10,75,82,0,0,33.3,0.263,38,0
0,180,90,26,90,36.5,0.314,35,1
1,130,60,23,170,28.6,0.692,21,0
2,84,50,23,76,30.4,0.968,21,0
8,120,78,0,0,25.0,0.409,64,0
12,84,72,31,0,29.7,0.297,46,1
0,139,62,17,210,22.1,0.207,21,0
9,91,68,0,0,24.2,0.200,58,0
2,91,62,0,0,27.3,0.525,22,0
3,99,54,19,86,25.6,0.154,24,0
3,163,70,18,105,31.6,0.268,28,1
9,145,88,34,165,30.3,0.771,53,1
7,125,86,0,0,37.6,0.304,51,0
13,76,60,0,0,32.8,0.180,41,0
6,129,90,7,326,19.6,0.582,60,0
2,68,70,32,66,25.0,0.187,25,0
3,124,80,33,130,33.2,0.305,26,0
6,114,0,0,0,0.0,0.189,26,0
9,130,70,0,0,34.2,0.652,45,1
3,125,58,0,0,31.6,0.151,24,0
3,87,60,18,0,21.8,0.444,21,0
1,97,64,19,82,18.2,0.299,21,0
3,116,74,15,105,26.3,0.107,24,0
0,117,66,31,188,30.8,0.493,22,0
0,111,65,0,0,24.6,0.660,31,0
2,122,60,18,106,29.8,0.717,22,0
0,107,76,0,0,45.3,0.686,24,0
1,86,66,52,65,41.3,0.917,29,0
6,91,0,0,0,29.8,0.501,31,0
1,77,56,30,56,33.3,1.251,24,0
4,132,0,0,0,32.9,0.302,23,1
0,105,90,0,0,29.6,0.197,46,0
0,57,60,0,0,21.7,0.735,67,0
0,127,80,37,210,36.3,0.804,23,0
3,129,92,49,155,36.4,0.968,32,1
8,100,74,40,215,39.4,0.661,43,1
3,128,72,25,190,32.4,0.549,27,1
10,90,85,32,0,34.9,0.825,56,1
4,84,90,23,56,39.5,0.159,25,0
1,88,78,29,76,32.0,0.365,29,0
8,186,90,35,225,34.5,0.423,37,1
5,187,76,27,207,43.6,1.034,53,1
4,131,68,21,166,33.1,0.160,28,0
1,164,82,43,67,32.8,0.341,50,0
4,189,110,31,0,28.5,0.680,37,0
1,116,70,28,0,27.4,0.204,21,0
3,84,68,30,106,31.9,0.591,25,0
6,114,88,0,0,27.8,0.247,66,0
1,88,62,24,44,29.9,0.422,23,0
1,84,64,23,115,36.9,0.471,28,0
7,124,70,33,215,25.5,0.161,37,0
1,97,70,40,0,38.1,0.218,30,0
8,110,76,0,0,27.8,0.237,58,0
11,103,68,40,0,46.2,0.126,42,0
11,85,74,0,0,30.1,0.300,35,0
6,125,76,0,0,33.8,0.121,54,1
0,198,66,32,274,41.3,0.502,28,1
1,87,68,34,77,37.6,0.401,24,0
6,99,60,19,54,26.9,0.497,32,0
0,91,80,0,0,32.4,0.601,27,0
2,95,54,14,88,26.1,0.748,22,0
1,99,72,30,18,38.6,0.412,21,0
6,92,62,32,126,32.0,0.085,46,0
4,154,72,29,126,31.3,0.338,37,0
0,121,66,30,165,34.3,0.203,33,1
3,78,70,0,0,32.5,0.270,39,0
2,130,96,0,0,22.6,0.268,21,0
3,111,58,31,44,29.5,0.430,22,0
2,98,60,17,120,34.7,0.198,22,0
1,143,86,30,330,30.1,0.892,23,0
1,119,44,47,63,35.5,0.280,25,0
6,108,44,20,130,24.0,0.813,35,0
2,118,80,0,0,42.9,0.693,21,1
10,133,68,0,0,27.0,0.245,36,0
2,197,70,99,0,34.7,0.575,62,1
0,151,90,46,0,42.1,0.371,21,1
6,109,60,27,0,25.0,0.206,27,0
12,121,78,17,0,26.5,0.259,62,0
8,100,76,0,0,38.7,0.190,42,0
8,124,76,24,600,28.7,0.687,52,1
1,93,56,11,0,22.5,0.417,22,0
8,143,66,0,0,34.9,0.129,41,1
6,103,66,0,0,24.3,0.249,29,0
3,176,86,27,156,33.3,1.154,52,1
0,73,0,0,0,21.1,0.342,25,0
11,111,84,40,0,46.8,0.925,45,1
2,112,78,50,140,39.4,0.175,24,0
3,132,80,0,0,34.4,0.402,44,1
2,82,52,22,115,28.5,1.699,25,0
6,123,72,45,230,33.6,0.733,34,0
0,188,82,14,185,32.0,0.682,22,1
0,67,76,0,0,45.3,0.194,46,0
1,89,24,19,25,27.8,0.559,21,0
1,173,74,0,0,36.8,0.088,38,1
1,109,38,18,120,23.1,0.407,26,0
1,108,88,19,0,27.1,0.400,24,0
6,96,0,0,0,23.7,0.190,28,0
1,124,74,36,0,27.8,0.100,30,0
7,150,78,29,126,35.2,0.692,54,1
4,183,0,0,0,28.4,0.212,36,1
1,124,60,32,0,35.8,0.514,21,0
1,181,78,42,293,40.0,1.258,22,1
1,92,62,25,41,19.5,0.482,25,0
0,152,82,39,272,41.5,0.270,27,0
1,111,62,13,182,24.0,0.138,23,0
3,106,54,21,158,30.9,0.292,24,0
3,174,58,22,194,32.9,0.593,36,1
7,168,88,42,321,38.2,0.787,40,1
6,105,80,28,0,32.5,0.878,26,0
11,138,74,26,144,36.1,0.557,50,1
3,106,72,0,0,25.8,0.207,27,0
6,117,96,0,0,28.7,0.157,30,0
2,68,62,13,15,20.1,0.257,23,0
9,112,82,24,0,28.2,1.282,50,1
0,119,0,0,0,32.4,0.141,24,1
2,112,86,42,160,38.4,0.246,28,0
2,92,76,20,0,24.2,1.698,28,0
6,183,94,0,0,40.8,1.461,45,0
0,94,70,27,115,43.5,0.347,21,0
2,108,64,0,0,30.8,0.158,21,0
4,90,88,47,54,37.7,0.362,29,0
0,125,68,0,0,24.7,0.206,21,0
0,132,78,0,0,32.4,0.393,21,0
5,128,80,0,0,34.6,0.144,45,0
4,94,65,22,0,24.7,0.148,21,0
7,114,64,0,0,27.4,0.732,34,1
0,102,78,40,90,34.5,0.238,24,0
2,111,60,0,0,26.2,0.343,23,0
1,128,82,17,183,27.5,0.115,22,0
10,92,62,0,0,25.9,0.167,31,0
13,104,72,0,0,31.2,0.465,38,1
5,104,74,0,0,28.8,0.153,48,0
2,94,76,18,66,31.6,0.649,23,0
7,97,76,32,91,40.9,0.871,32,1
1,100,74,12,46,19.5,0.149,28,0
0,102,86,17,105,29.3,0.695,27,0
4,128,70,0,0,34.3,0.303,24,0
6,147,80,0,0,29.5,0.178,50,1
4,90,0,0,0,28.0,0.610,31,0
3,103,72,30,152,27.6,0.730,27,0
2,157,74,35,440,39.4,0.134,30,0
1,167,74,17,144,23.4,0.447,33,1
0,179,50,36,159,37.8,0.455,22,1
11,136,84,35,130,28.3,0.260,42,1
0,107,60,25,0,26.4,0.133,23,0
1,91,54,25,100,25.2,0.234,23,0
1,117,60,23,106,33.8,0.466,27,0
5,123,74,40,77,34.1,0.269,28,0
2,120,54,0,0,26.8,0.455,27,0
1,106,70,28,135,34.2,0.142,22,0
2,155,52,27,540,38.7,0.240,25,1
2,101,58,35,90,21.8,0.155,22,0
1,120,80,48,200,38.9,1.162,41,0
11,127,106,0,0,39.0,0.190,51,0
3,80,82,31,70,34.2,1.292,27,1
10,162,84,0,0,27.7,0.182,54,0
1,199,76,43,0,42.9,1.394,22,1
8,167,106,46,231,37.6,0.165,43,1
9,145,80,46,130,37.9,0.637,40,1
6,115,60,39,0,33.7,0.245,40,1
1,112,80,45,132,34.8,0.217,24,0
4,145,82,18,0,32.5,0.235,70,1
10,111,70,27,0,27.5,0.141,40,1
6,98,58,33,190,34.0,0.430,43,0
9,154,78,30,100,30.9,0.164,45,0
6,165,68,26,168,33.6,0.631,49,0
1,99,58,10,0,25.4,0.551,21,0
10,68,106,23,49,35.5,0.285,47,0
3,123,100,35,240,57.3,0.880,22,0
8,91,82,0,0,35.6,0.587,68,0
6,195,70,0,0,30.9,0.328,31,1
9,156,86,0,0,24.8,0.230,53,1
0,93,60,0,0,35.3,0.263,25,0
3,121,52,0,0,36.0,0.127,25,1
2,101,58,17,265,24.2,0.614,23,0
2,56,56,28,45,24.2,0.332,22,0
0,162,76,36,0,49.6,0.364,26,1
0,95,64,39,105,44.6,0.366,22,0
4,125,80,0,0,32.3,0.536,27,1
5,136,82,0,0,0.0,0.640,69,0
2,129,74,26,205,33.2,0.591,25,0
3,130,64,0,0,23.1,0.314,22,0
1,107,50,19,0,28.3,0.181,29,0
1,140,74,26,180,24.1,0.828,23,0
1,144,82,46,180,46.1,0.335,46,1
8,107,80,0,0,24.6,0.856,34,0
13,158,114,0,0,42.3,0.257,44,1
2,121,70,32,95,39.1,0.886,23,0
7,129,68,49,125,38.5,0.439,43,1
2,90,60,0,0,23.5,0.191,25,0
7,142,90,24,480,30.4,0.128,43,1
3,169,74,19,125,29.9,0.268,31,1
0,99,0,0,0,25.0,0.253,22,0
4,127,88,11,155,34.5,0.598,28,0
4,118,70,0,0,44.5,0.904,26,0
2,122,76,27,200,35.9,0.483,26,0
6,125,78,31,0,27.6,0.565,49,1
1,168,88,29,0,35.0,0.905,52,1
2,129,0,0,0,38.5,0.304,41,0
4,110,76,20,100,28.4,0.118,27,0
6,80,80,36,0,39.8,0.177,28,0
10,115,0,0,0,0.0,0.261,30,1
2,127,46,21,335,34.4,0.176,22,0
9,164,78,0,0,32.8,0.148,45,1
2,93,64,32,160,38.0,0.674,23,1
3,158,64,13,387,31.2,0.295,24,0
5,126,78,27,22,29.6,0.439,40,0
10,129,62,36,0,41.2,0.441,38,1
0,134,58,20,291,26.4,0.352,21,0
3,102,74,0,0,29.5,0.121,32,0
7,187,50,33,392,33.9,0.826,34,1
3,173,78,39,185,33.8,0.970,31,1
10,94,72,18,0,23.1,0.595,56,0
1,108,60,46,178,35.5,0.415,24,0
5,97,76,27,0,35.6,0.378,52,1
4,83,86,19,0,29.3,0.317,34,0
1,114,66,36,200,38.1,0.289,21,0
1,149,68,29,127,29.3,0.349,42,1
5,117,86,30,105,39.1,0.251,42,0
1,111,94,0,0,32.8,0.265,45,0
4,112,78,40,0,39.4,0.236,38,0
1,116,78,29,180,36.1,0.496,25,0
0,141,84,26,0,32.4,0.433,22,0
2,175,88,0,0,22.9,0.326,22,0
2,92,52,0,0,30.1,0.141,22,0
3,130,78,23,79,28.4,0.323,34,1
8,120,86,0,0,28.4,0.259,22,1
2,174,88,37,120,44.5,0.646,24,1
2,106,56,27,165,29.0,0.426,22,0
2,105,75,0,0,23.3,0.560,53,0
4,95,60,32,0,35.4,0.284,28,0
0,126,86,27,120,27.4,0.515,21,0
8,65,72,23,0,32.0,0.600,42,0
2,99,60,17,160,36.6,0.453,21,0
1,102,74,0,0,39.5,0.293,42,1
11,120,80,37,150,42.3,0.785,48,1
3,102,44,20,94,30.8,0.400,26,0
1,109,58,18,116,28.5,0.219,22,0
9,140,94,0,0,32.7,0.734,45,1
13,153,88,37,140,40.6,1.174,39,0
12,100,84,33,105,30.0,0.488,46,0
1,147,94,41,0,49.3,0.358,27,1
1,81,74,41,57,46.3,1.096,32,0
3,187,70,22,200,36.4,0.408,36,1
6,162,62,0,0,24.3,0.178,50,1
4,136,70,0,0,31.2,1.182,22,1
1,121,78,39,74,39.0,0.261,28,0
3,108,62,24,0,26.0,0.223,25,0
0,181,88,44,510,43.3,0.222,26,1
8,154,78,32,0,32.4,0.443,45,1
1,128,88,39,110,36.5,1.057,37,1
7,137,90,41,0,32.0,0.391,39,0
0,123,72,0,0,36.3,0.258,52,1
1,106,76,0,0,37.5,0.197,26,0
6,190,92,0,0,35.5,0.278,66,1
2,88,58,26,16,28.4,0.766,22,0
9,170,74,31,0,44.0,0.403,43,1
9,89,62,0,0,22.5,0.142,33,0
10,101,76,48,180,32.9,0.171,63,0
2,122,70,27,0,36.8,0.340,27,0
5,121,72,23,112,26.2,0.245,30,0
1,126,60,0,0,30.1,0.349,47,1
1,93,70,31,0,30.4,0.315,23,0
1 6 148 72 35 0 33.6 0.627 50 1
2 1 85 66 29 0 26.6 0.351 31 0
3 8 183 64 0 0 23.3 0.672 32 1
4 1 89 66 23 94 28.1 0.167 21 0
5 0 137 40 35 168 43.1 2.288 33 1
6 5 116 74 0 0 25.6 0.201 30 0
7 3 78 50 32 88 31.0 0.248 26 1
8 10 115 0 0 0 35.3 0.134 29 0
9 2 197 70 45 543 30.5 0.158 53 1
10 8 125 96 0 0 0.0 0.232 54 1
11 4 110 92 0 0 37.6 0.191 30 0
12 10 168 74 0 0 38.0 0.537 34 1
13 10 139 80 0 0 27.1 1.441 57 0
14 1 189 60 23 846 30.1 0.398 59 1
15 5 166 72 19 175 25.8 0.587 51 1
16 7 100 0 0 0 30.0 0.484 32 1
17 0 118 84 47 230 45.8 0.551 31 1
18 7 107 74 0 0 29.6 0.254 31 1
19 1 103 30 38 83 43.3 0.183 33 0
20 1 115 70 30 96 34.6 0.529 32 1
21 3 126 88 41 235 39.3 0.704 27 0
22 8 99 84 0 0 35.4 0.388 50 0
23 7 196 90 0 0 39.8 0.451 41 1
24 9 119 80 35 0 29.0 0.263 29 1
25 11 143 94 33 146 36.6 0.254 51 1
26 10 125 70 26 115 31.1 0.205 41 1
27 7 147 76 0 0 39.4 0.257 43 1
28 1 97 66 15 140 23.2 0.487 22 0
29 13 145 82 19 110 22.2 0.245 57 0
30 5 117 92 0 0 34.1 0.337 38 0
31 5 109 75 26 0 36.0 0.546 60 0
32 3 158 76 36 245 31.6 0.851 28 1
33 3 88 58 11 54 24.8 0.267 22 0
34 6 92 92 0 0 19.9 0.188 28 0
35 10 122 78 31 0 27.6 0.512 45 0
36 4 103 60 33 192 24.0 0.966 33 0
37 11 138 76 0 0 33.2 0.420 35 0
38 9 102 76 37 0 32.9 0.665 46 1
39 2 90 68 42 0 38.2 0.503 27 1
40 4 111 72 47 207 37.1 1.390 56 1
41 3 180 64 25 70 34.0 0.271 26 0
42 7 133 84 0 0 40.2 0.696 37 0
43 7 106 92 18 0 22.7 0.235 48 0
44 9 171 110 24 240 45.4 0.721 54 1
45 7 159 64 0 0 27.4 0.294 40 0
46 0 180 66 39 0 42.0 1.893 25 1
47 1 146 56 0 0 29.7 0.564 29 0
48 2 71 70 27 0 28.0 0.586 22 0
49 7 103 66 32 0 39.1 0.344 31 1
50 7 105 0 0 0 0.0 0.305 24 0
51 1 103 80 11 82 19.4 0.491 22 0
52 1 101 50 15 36 24.2 0.526 26 0
53 5 88 66 21 23 24.4 0.342 30 0
54 8 176 90 34 300 33.7 0.467 58 1
55 7 150 66 42 342 34.7 0.718 42 0
56 1 73 50 10 0 23.0 0.248 21 0
57 7 187 68 39 304 37.7 0.254 41 1
58 0 100 88 60 110 46.8 0.962 31 0
59 0 146 82 0 0 40.5 1.781 44 0
60 0 105 64 41 142 41.5 0.173 22 0
61 2 84 0 0 0 0.0 0.304 21 0
62 8 133 72 0 0 32.9 0.270 39 1
63 5 44 62 0 0 25.0 0.587 36 0
64 2 141 58 34 128 25.4 0.699 24 0
65 7 114 66 0 0 32.8 0.258 42 1
66 5 99 74 27 0 29.0 0.203 32 0
67 0 109 88 30 0 32.5 0.855 38 1
68 2 109 92 0 0 42.7 0.845 54 0
69 1 95 66 13 38 19.6 0.334 25 0
70 4 146 85 27 100 28.9 0.189 27 0
71 2 100 66 20 90 32.9 0.867 28 1
72 5 139 64 35 140 28.6 0.411 26 0
73 13 126 90 0 0 43.4 0.583 42 1
74 4 129 86 20 270 35.1 0.231 23 0
75 1 79 75 30 0 32.0 0.396 22 0
76 1 0 48 20 0 24.7 0.140 22 0
77 7 62 78 0 0 32.6 0.391 41 0
78 5 95 72 33 0 37.7 0.370 27 0
79 0 131 0 0 0 43.2 0.270 26 1
80 2 112 66 22 0 25.0 0.307 24 0
81 3 113 44 13 0 22.4 0.140 22 0
82 2 74 0 0 0 0.0 0.102 22 0
83 7 83 78 26 71 29.3 0.767 36 0
84 0 101 65 28 0 24.6 0.237 22 0
85 5 137 108 0 0 48.8 0.227 37 1
86 2 110 74 29 125 32.4 0.698 27 0
87 13 106 72 54 0 36.6 0.178 45 0
88 2 100 68 25 71 38.5 0.324 26 0
89 15 136 70 32 110 37.1 0.153 43 1
90 1 107 68 19 0 26.5 0.165 24 0
91 1 80 55 0 0 19.1 0.258 21 0
92 4 123 80 15 176 32.0 0.443 34 0
93 7 81 78 40 48 46.7 0.261 42 0
94 4 134 72 0 0 23.8 0.277 60 1
95 2 142 82 18 64 24.7 0.761 21 0
96 6 144 72 27 228 33.9 0.255 40 0
97 2 92 62 28 0 31.6 0.130 24 0
98 1 71 48 18 76 20.4 0.323 22 0
99 6 93 50 30 64 28.7 0.356 23 0
100 1 122 90 51 220 49.7 0.325 31 1
101 1 163 72 0 0 39.0 1.222 33 1
102 1 151 60 0 0 26.1 0.179 22 0
103 0 125 96 0 0 22.5 0.262 21 0
104 1 81 72 18 40 26.6 0.283 24 0
105 2 85 65 0 0 39.6 0.930 27 0
106 1 126 56 29 152 28.7 0.801 21 0
107 1 96 122 0 0 22.4 0.207 27 0
108 4 144 58 28 140 29.5 0.287 37 0
109 3 83 58 31 18 34.3 0.336 25 0
110 0 95 85 25 36 37.4 0.247 24 1
111 3 171 72 33 135 33.3 0.199 24 1
112 8 155 62 26 495 34.0 0.543 46 1
113 1 89 76 34 37 31.2 0.192 23 0
114 4 76 62 0 0 34.0 0.391 25 0
115 7 160 54 32 175 30.5 0.588 39 1
116 4 146 92 0 0 31.2 0.539 61 1
117 5 124 74 0 0 34.0 0.220 38 1
118 5 78 48 0 0 33.7 0.654 25 0
119 4 97 60 23 0 28.2 0.443 22 0
120 4 99 76 15 51 23.2 0.223 21 0
121 0 162 76 56 100 53.2 0.759 25 1
122 6 111 64 39 0 34.2 0.260 24 0
123 2 107 74 30 100 33.6 0.404 23 0
124 5 132 80 0 0 26.8 0.186 69 0
125 0 113 76 0 0 33.3 0.278 23 1
126 1 88 30 42 99 55.0 0.496 26 1
127 3 120 70 30 135 42.9 0.452 30 0
128 1 118 58 36 94 33.3 0.261 23 0
129 1 117 88 24 145 34.5 0.403 40 1
130 0 105 84 0 0 27.9 0.741 62 1
131 4 173 70 14 168 29.7 0.361 33 1
132 9 122 56 0 0 33.3 1.114 33 1
133 3 170 64 37 225 34.5 0.356 30 1
134 8 84 74 31 0 38.3 0.457 39 0
135 2 96 68 13 49 21.1 0.647 26 0
136 2 125 60 20 140 33.8 0.088 31 0
137 0 100 70 26 50 30.8 0.597 21 0
138 0 93 60 25 92 28.7 0.532 22 0
139 0 129 80 0 0 31.2 0.703 29 0
140 5 105 72 29 325 36.9 0.159 28 0
141 3 128 78 0 0 21.1 0.268 55 0
142 5 106 82 30 0 39.5 0.286 38 0
143 2 108 52 26 63 32.5 0.318 22 0
144 10 108 66 0 0 32.4 0.272 42 1
145 4 154 62 31 284 32.8 0.237 23 0
146 0 102 75 23 0 0.0 0.572 21 0
147 9 57 80 37 0 32.8 0.096 41 0
148 2 106 64 35 119 30.5 1.400 34 0
149 5 147 78 0 0 33.7 0.218 65 0
150 2 90 70 17 0 27.3 0.085 22 0
151 1 136 74 50 204 37.4 0.399 24 0
152 4 114 65 0 0 21.9 0.432 37 0
153 9 156 86 28 155 34.3 1.189 42 1
154 1 153 82 42 485 40.6 0.687 23 0
155 8 188 78 0 0 47.9 0.137 43 1
156 7 152 88 44 0 50.0 0.337 36 1
157 2 99 52 15 94 24.6 0.637 21 0
158 1 109 56 21 135 25.2 0.833 23 0
159 2 88 74 19 53 29.0 0.229 22 0
160 17 163 72 41 114 40.9 0.817 47 1
161 4 151 90 38 0 29.7 0.294 36 0
162 7 102 74 40 105 37.2 0.204 45 0
163 0 114 80 34 285 44.2 0.167 27 0
164 2 100 64 23 0 29.7 0.368 21 0
165 0 131 88 0 0 31.6 0.743 32 1
166 6 104 74 18 156 29.9 0.722 41 1
167 3 148 66 25 0 32.5 0.256 22 0
168 4 120 68 0 0 29.6 0.709 34 0
169 4 110 66 0 0 31.9 0.471 29 0
170 3 111 90 12 78 28.4 0.495 29 0
171 6 102 82 0 0 30.8 0.180 36 1
172 6 134 70 23 130 35.4 0.542 29 1
173 2 87 0 23 0 28.9 0.773 25 0
174 1 79 60 42 48 43.5 0.678 23 0
175 2 75 64 24 55 29.7 0.370 33 0
176 8 179 72 42 130 32.7 0.719 36 1
177 6 85 78 0 0 31.2 0.382 42 0
178 0 129 110 46 130 67.1 0.319 26 1
179 5 143 78 0 0 45.0 0.190 47 0
180 5 130 82 0 0 39.1 0.956 37 1
181 6 87 80 0 0 23.2 0.084 32 0
182 0 119 64 18 92 34.9 0.725 23 0
183 1 0 74 20 23 27.7 0.299 21 0
184 5 73 60 0 0 26.8 0.268 27 0
185 4 141 74 0 0 27.6 0.244 40 0
186 7 194 68 28 0 35.9 0.745 41 1
187 8 181 68 36 495 30.1 0.615 60 1
188 1 128 98 41 58 32.0 1.321 33 1
189 8 109 76 39 114 27.9 0.640 31 1
190 5 139 80 35 160 31.6 0.361 25 1
191 3 111 62 0 0 22.6 0.142 21 0
192 9 123 70 44 94 33.1 0.374 40 0
193 7 159 66 0 0 30.4 0.383 36 1
194 11 135 0 0 0 52.3 0.578 40 1
195 8 85 55 20 0 24.4 0.136 42 0
196 5 158 84 41 210 39.4 0.395 29 1
197 1 105 58 0 0 24.3 0.187 21 0
198 3 107 62 13 48 22.9 0.678 23 1
199 4 109 64 44 99 34.8 0.905 26 1
200 4 148 60 27 318 30.9 0.150 29 1
201 0 113 80 16 0 31.0 0.874 21 0
202 1 138 82 0 0 40.1 0.236 28 0
203 0 108 68 20 0 27.3 0.787 32 0
204 2 99 70 16 44 20.4 0.235 27 0
205 6 103 72 32 190 37.7 0.324 55 0
206 5 111 72 28 0 23.9 0.407 27 0
207 8 196 76 29 280 37.5 0.605 57 1
208 5 162 104 0 0 37.7 0.151 52 1
209 1 96 64 27 87 33.2 0.289 21 0
210 7 184 84 33 0 35.5 0.355 41 1
211 2 81 60 22 0 27.7 0.290 25 0
212 0 147 85 54 0 42.8 0.375 24 0
213 7 179 95 31 0 34.2 0.164 60 0
214 0 140 65 26 130 42.6 0.431 24 1
215 9 112 82 32 175 34.2 0.260 36 1
216 12 151 70 40 271 41.8 0.742 38 1
217 5 109 62 41 129 35.8 0.514 25 1
218 6 125 68 30 120 30.0 0.464 32 0
219 5 85 74 22 0 29.0 1.224 32 1
220 5 112 66 0 0 37.8 0.261 41 1
221 0 177 60 29 478 34.6 1.072 21 1
222 2 158 90 0 0 31.6 0.805 66 1
223 7 119 0 0 0 25.2 0.209 37 0
224 7 142 60 33 190 28.8 0.687 61 0
225 1 100 66 15 56 23.6 0.666 26 0
226 1 87 78 27 32 34.6 0.101 22 0
227 0 101 76 0 0 35.7 0.198 26 0
228 3 162 52 38 0 37.2 0.652 24 1
229 4 197 70 39 744 36.7 2.329 31 0
230 0 117 80 31 53 45.2 0.089 24 0
231 4 142 86 0 0 44.0 0.645 22 1
232 6 134 80 37 370 46.2 0.238 46 1
233 1 79 80 25 37 25.4 0.583 22 0
234 4 122 68 0 0 35.0 0.394 29 0
235 3 74 68 28 45 29.7 0.293 23 0
236 4 171 72 0 0 43.6 0.479 26 1
237 7 181 84 21 192 35.9 0.586 51 1
238 0 179 90 27 0 44.1 0.686 23 1
239 9 164 84 21 0 30.8 0.831 32 1
240 0 104 76 0 0 18.4 0.582 27 0
241 1 91 64 24 0 29.2 0.192 21 0
242 4 91 70 32 88 33.1 0.446 22 0
243 3 139 54 0 0 25.6 0.402 22 1
244 6 119 50 22 176 27.1 1.318 33 1
245 2 146 76 35 194 38.2 0.329 29 0
246 9 184 85 15 0 30.0 1.213 49 1
247 10 122 68 0 0 31.2 0.258 41 0
248 0 165 90 33 680 52.3 0.427 23 0
249 9 124 70 33 402 35.4 0.282 34 0
250 1 111 86 19 0 30.1 0.143 23 0
251 9 106 52 0 0 31.2 0.380 42 0
252 2 129 84 0 0 28.0 0.284 27 0
253 2 90 80 14 55 24.4 0.249 24 0
254 0 86 68 32 0 35.8 0.238 25 0
255 12 92 62 7 258 27.6 0.926 44 1
256 1 113 64 35 0 33.6 0.543 21 1
257 3 111 56 39 0 30.1 0.557 30 0
258 2 114 68 22 0 28.7 0.092 25 0
259 1 193 50 16 375 25.9 0.655 24 0
260 11 155 76 28 150 33.3 1.353 51 1
261 3 191 68 15 130 30.9 0.299 34 0
262 3 141 0 0 0 30.0 0.761 27 1
263 4 95 70 32 0 32.1 0.612 24 0
264 3 142 80 15 0 32.4 0.200 63 0
265 4 123 62 0 0 32.0 0.226 35 1
266 5 96 74 18 67 33.6 0.997 43 0
267 0 138 0 0 0 36.3 0.933 25 1
268 2 128 64 42 0 40.0 1.101 24 0
269 0 102 52 0 0 25.1 0.078 21 0
270 2 146 0 0 0 27.5 0.240 28 1
271 10 101 86 37 0 45.6 1.136 38 1
272 2 108 62 32 56 25.2 0.128 21 0
273 3 122 78 0 0 23.0 0.254 40 0
274 1 71 78 50 45 33.2 0.422 21 0
275 13 106 70 0 0 34.2 0.251 52 0
276 2 100 70 52 57 40.5 0.677 25 0
277 7 106 60 24 0 26.5 0.296 29 1
278 0 104 64 23 116 27.8 0.454 23 0
279 5 114 74 0 0 24.9 0.744 57 0
280 2 108 62 10 278 25.3 0.881 22 0
281 0 146 70 0 0 37.9 0.334 28 1
282 10 129 76 28 122 35.9 0.280 39 0
283 7 133 88 15 155 32.4 0.262 37 0
284 7 161 86 0 0 30.4 0.165 47 1
285 2 108 80 0 0 27.0 0.259 52 1
286 7 136 74 26 135 26.0 0.647 51 0
287 5 155 84 44 545 38.7 0.619 34 0
288 1 119 86 39 220 45.6 0.808 29 1
289 4 96 56 17 49 20.8 0.340 26 0
290 5 108 72 43 75 36.1 0.263 33 0
291 0 78 88 29 40 36.9 0.434 21 0
292 0 107 62 30 74 36.6 0.757 25 1
293 2 128 78 37 182 43.3 1.224 31 1
294 1 128 48 45 194 40.5 0.613 24 1
295 0 161 50 0 0 21.9 0.254 65 0
296 6 151 62 31 120 35.5 0.692 28 0
297 2 146 70 38 360 28.0 0.337 29 1
298 0 126 84 29 215 30.7 0.520 24 0
299 14 100 78 25 184 36.6 0.412 46 1
300 8 112 72 0 0 23.6 0.840 58 0
301 0 167 0 0 0 32.3 0.839 30 1
302 2 144 58 33 135 31.6 0.422 25 1
303 5 77 82 41 42 35.8 0.156 35 0
304 5 115 98 0 0 52.9 0.209 28 1
305 3 150 76 0 0 21.0 0.207 37 0
306 2 120 76 37 105 39.7 0.215 29 0
307 10 161 68 23 132 25.5 0.326 47 1
308 0 137 68 14 148 24.8 0.143 21 0
309 0 128 68 19 180 30.5 1.391 25 1
310 2 124 68 28 205 32.9 0.875 30 1
311 6 80 66 30 0 26.2 0.313 41 0
312 0 106 70 37 148 39.4 0.605 22 0
313 2 155 74 17 96 26.6 0.433 27 1
314 3 113 50 10 85 29.5 0.626 25 0
315 7 109 80 31 0 35.9 1.127 43 1
316 2 112 68 22 94 34.1 0.315 26 0
317 3 99 80 11 64 19.3 0.284 30 0
318 3 182 74 0 0 30.5 0.345 29 1
319 3 115 66 39 140 38.1 0.150 28 0
320 6 194 78 0 0 23.5 0.129 59 1
321 4 129 60 12 231 27.5 0.527 31 0
322 3 112 74 30 0 31.6 0.197 25 1
323 0 124 70 20 0 27.4 0.254 36 1
324 13 152 90 33 29 26.8 0.731 43 1
325 2 112 75 32 0 35.7 0.148 21 0
326 1 157 72 21 168 25.6 0.123 24 0
327 1 122 64 32 156 35.1 0.692 30 1
328 10 179 70 0 0 35.1 0.200 37 0
329 2 102 86 36 120 45.5 0.127 23 1
330 6 105 70 32 68 30.8 0.122 37 0
331 8 118 72 19 0 23.1 1.476 46 0
332 2 87 58 16 52 32.7 0.166 25 0
333 1 180 0 0 0 43.3 0.282 41 1
334 12 106 80 0 0 23.6 0.137 44 0
335 1 95 60 18 58 23.9 0.260 22 0
336 0 165 76 43 255 47.9 0.259 26 0
337 0 117 0 0 0 33.8 0.932 44 0
338 5 115 76 0 0 31.2 0.343 44 1
339 9 152 78 34 171 34.2 0.893 33 1
340 7 178 84 0 0 39.9 0.331 41 1
341 1 130 70 13 105 25.9 0.472 22 0
342 1 95 74 21 73 25.9 0.673 36 0
343 1 0 68 35 0 32.0 0.389 22 0
344 5 122 86 0 0 34.7 0.290 33 0
345 8 95 72 0 0 36.8 0.485 57 0
346 8 126 88 36 108 38.5 0.349 49 0
347 1 139 46 19 83 28.7 0.654 22 0
348 3 116 0 0 0 23.5 0.187 23 0
349 3 99 62 19 74 21.8 0.279 26 0
350 5 0 80 32 0 41.0 0.346 37 1
351 4 92 80 0 0 42.2 0.237 29 0
352 4 137 84 0 0 31.2 0.252 30 0
353 3 61 82 28 0 34.4 0.243 46 0
354 1 90 62 12 43 27.2 0.580 24 0
355 3 90 78 0 0 42.7 0.559 21 0
356 9 165 88 0 0 30.4 0.302 49 1
357 1 125 50 40 167 33.3 0.962 28 1
358 13 129 0 30 0 39.9 0.569 44 1
359 12 88 74 40 54 35.3 0.378 48 0
360 1 196 76 36 249 36.5 0.875 29 1
361 5 189 64 33 325 31.2 0.583 29 1
362 5 158 70 0 0 29.8 0.207 63 0
363 5 103 108 37 0 39.2 0.305 65 0
364 4 146 78 0 0 38.5 0.520 67 1
365 4 147 74 25 293 34.9 0.385 30 0
366 5 99 54 28 83 34.0 0.499 30 0
367 6 124 72 0 0 27.6 0.368 29 1
368 0 101 64 17 0 21.0 0.252 21 0
369 3 81 86 16 66 27.5 0.306 22 0
370 1 133 102 28 140 32.8 0.234 45 1
371 3 173 82 48 465 38.4 2.137 25 1
372 0 118 64 23 89 0.0 1.731 21 0
373 0 84 64 22 66 35.8 0.545 21 0
374 2 105 58 40 94 34.9 0.225 25 0
375 2 122 52 43 158 36.2 0.816 28 0
376 12 140 82 43 325 39.2 0.528 58 1
377 0 98 82 15 84 25.2 0.299 22 0
378 1 87 60 37 75 37.2 0.509 22 0
379 4 156 75 0 0 48.3 0.238 32 1
380 0 93 100 39 72 43.4 1.021 35 0
381 1 107 72 30 82 30.8 0.821 24 0
382 0 105 68 22 0 20.0 0.236 22 0
383 1 109 60 8 182 25.4 0.947 21 0
384 1 90 62 18 59 25.1 1.268 25 0
385 1 125 70 24 110 24.3 0.221 25 0
386 1 119 54 13 50 22.3 0.205 24 0
387 5 116 74 29 0 32.3 0.660 35 1
388 8 105 100 36 0 43.3 0.239 45 1
389 5 144 82 26 285 32.0 0.452 58 1
390 3 100 68 23 81 31.6 0.949 28 0
391 1 100 66 29 196 32.0 0.444 42 0
392 5 166 76 0 0 45.7 0.340 27 1
393 1 131 64 14 415 23.7 0.389 21 0
394 4 116 72 12 87 22.1 0.463 37 0
395 4 158 78 0 0 32.9 0.803 31 1
396 2 127 58 24 275 27.7 1.600 25 0
397 3 96 56 34 115 24.7 0.944 39 0
398 0 131 66 40 0 34.3 0.196 22 1
399 3 82 70 0 0 21.1 0.389 25 0
400 3 193 70 31 0 34.9 0.241 25 1
401 4 95 64 0 0 32.0 0.161 31 1
402 6 137 61 0 0 24.2 0.151 55 0
403 5 136 84 41 88 35.0 0.286 35 1
404 9 72 78 25 0 31.6 0.280 38 0
405 5 168 64 0 0 32.9 0.135 41 1
406 2 123 48 32 165 42.1 0.520 26 0
407 4 115 72 0 0 28.9 0.376 46 1
408 0 101 62 0 0 21.9 0.336 25 0
409 8 197 74 0 0 25.9 1.191 39 1
410 1 172 68 49 579 42.4 0.702 28 1
411 6 102 90 39 0 35.7 0.674 28 0
412 1 112 72 30 176 34.4 0.528 25 0
413 1 143 84 23 310 42.4 1.076 22 0
414 1 143 74 22 61 26.2 0.256 21 0
415 0 138 60 35 167 34.6 0.534 21 1
416 3 173 84 33 474 35.7 0.258 22 1
417 1 97 68 21 0 27.2 1.095 22 0
418 4 144 82 32 0 38.5 0.554 37 1
419 1 83 68 0 0 18.2 0.624 27 0
420 3 129 64 29 115 26.4 0.219 28 1
421 1 119 88 41 170 45.3 0.507 26 0
422 2 94 68 18 76 26.0 0.561 21 0
423 0 102 64 46 78 40.6 0.496 21 0
424 2 115 64 22 0 30.8 0.421 21 0
425 8 151 78 32 210 42.9 0.516 36 1
426 4 184 78 39 277 37.0 0.264 31 1
427 0 94 0 0 0 0.0 0.256 25 0
428 1 181 64 30 180 34.1 0.328 38 1
429 0 135 94 46 145 40.6 0.284 26 0
430 1 95 82 25 180 35.0 0.233 43 1
431 2 99 0 0 0 22.2 0.108 23 0
432 3 89 74 16 85 30.4 0.551 38 0
433 1 80 74 11 60 30.0 0.527 22 0
434 2 139 75 0 0 25.6 0.167 29 0
435 1 90 68 8 0 24.5 1.138 36 0
436 0 141 0 0 0 42.4 0.205 29 1
437 12 140 85 33 0 37.4 0.244 41 0
438 5 147 75 0 0 29.9 0.434 28 0
439 1 97 70 15 0 18.2 0.147 21 0
440 6 107 88 0 0 36.8 0.727 31 0
441 0 189 104 25 0 34.3 0.435 41 1
442 2 83 66 23 50 32.2 0.497 22 0
443 4 117 64 27 120 33.2 0.230 24 0
444 8 108 70 0 0 30.5 0.955 33 1
445 4 117 62 12 0 29.7 0.380 30 1
446 0 180 78 63 14 59.4 2.420 25 1
447 1 100 72 12 70 25.3 0.658 28 0
448 0 95 80 45 92 36.5 0.330 26 0
449 0 104 64 37 64 33.6 0.510 22 1
450 0 120 74 18 63 30.5 0.285 26 0
451 1 82 64 13 95 21.2 0.415 23 0
452 2 134 70 0 0 28.9 0.542 23 1
453 0 91 68 32 210 39.9 0.381 25 0
454 2 119 0 0 0 19.6 0.832 72 0
455 2 100 54 28 105 37.8 0.498 24 0
456 14 175 62 30 0 33.6 0.212 38 1
457 1 135 54 0 0 26.7 0.687 62 0
458 5 86 68 28 71 30.2 0.364 24 0
459 10 148 84 48 237 37.6 1.001 51 1
460 9 134 74 33 60 25.9 0.460 81 0
461 9 120 72 22 56 20.8 0.733 48 0
462 1 71 62 0 0 21.8 0.416 26 0
463 8 74 70 40 49 35.3 0.705 39 0
464 5 88 78 30 0 27.6 0.258 37 0
465 10 115 98 0 0 24.0 1.022 34 0
466 0 124 56 13 105 21.8 0.452 21 0
467 0 74 52 10 36 27.8 0.269 22 0
468 0 97 64 36 100 36.8 0.600 25 0
469 8 120 0 0 0 30.0 0.183 38 1
470 6 154 78 41 140 46.1 0.571 27 0
471 1 144 82 40 0 41.3 0.607 28 0
472 0 137 70 38 0 33.2 0.170 22 0
473 0 119 66 27 0 38.8 0.259 22 0
474 7 136 90 0 0 29.9 0.210 50 0
475 4 114 64 0 0 28.9 0.126 24 0
476 0 137 84 27 0 27.3 0.231 59 0
477 2 105 80 45 191 33.7 0.711 29 1
478 7 114 76 17 110 23.8 0.466 31 0
479 8 126 74 38 75 25.9 0.162 39 0
480 4 132 86 31 0 28.0 0.419 63 0
481 3 158 70 30 328 35.5 0.344 35 1
482 0 123 88 37 0 35.2 0.197 29 0
483 4 85 58 22 49 27.8 0.306 28 0
484 0 84 82 31 125 38.2 0.233 23 0
485 0 145 0 0 0 44.2 0.630 31 1
486 0 135 68 42 250 42.3 0.365 24 1
487 1 139 62 41 480 40.7 0.536 21 0
488 0 173 78 32 265 46.5 1.159 58 0
489 4 99 72 17 0 25.6 0.294 28 0
490 8 194 80 0 0 26.1 0.551 67 0
491 2 83 65 28 66 36.8 0.629 24 0
492 2 89 90 30 0 33.5 0.292 42 0
493 4 99 68 38 0 32.8 0.145 33 0
494 4 125 70 18 122 28.9 1.144 45 1
495 3 80 0 0 0 0.0 0.174 22 0
496 6 166 74 0 0 26.6 0.304 66 0
497 5 110 68 0 0 26.0 0.292 30 0
498 2 81 72 15 76 30.1 0.547 25 0
499 7 195 70 33 145 25.1 0.163 55 1
500 6 154 74 32 193 29.3 0.839 39 0
501 2 117 90 19 71 25.2 0.313 21 0
502 3 84 72 32 0 37.2 0.267 28 0
503 6 0 68 41 0 39.0 0.727 41 1
504 7 94 64 25 79 33.3 0.738 41 0
505 3 96 78 39 0 37.3 0.238 40 0
506 10 75 82 0 0 33.3 0.263 38 0
507 0 180 90 26 90 36.5 0.314 35 1
508 1 130 60 23 170 28.6 0.692 21 0
509 2 84 50 23 76 30.4 0.968 21 0
510 8 120 78 0 0 25.0 0.409 64 0
511 12 84 72 31 0 29.7 0.297 46 1
512 0 139 62 17 210 22.1 0.207 21 0
513 9 91 68 0 0 24.2 0.200 58 0
514 2 91 62 0 0 27.3 0.525 22 0
515 3 99 54 19 86 25.6 0.154 24 0
516 3 163 70 18 105 31.6 0.268 28 1
517 9 145 88 34 165 30.3 0.771 53 1
518 7 125 86 0 0 37.6 0.304 51 0
519 13 76 60 0 0 32.8 0.180 41 0
520 6 129 90 7 326 19.6 0.582 60 0
521 2 68 70 32 66 25.0 0.187 25 0
522 3 124 80 33 130 33.2 0.305 26 0
523 6 114 0 0 0 0.0 0.189 26 0
524 9 130 70 0 0 34.2 0.652 45 1
525 3 125 58 0 0 31.6 0.151 24 0
526 3 87 60 18 0 21.8 0.444 21 0
527 1 97 64 19 82 18.2 0.299 21 0
528 3 116 74 15 105 26.3 0.107 24 0
529 0 117 66 31 188 30.8 0.493 22 0
530 0 111 65 0 0 24.6 0.660 31 0
531 2 122 60 18 106 29.8 0.717 22 0
532 0 107 76 0 0 45.3 0.686 24 0
533 1 86 66 52 65 41.3 0.917 29 0
534 6 91 0 0 0 29.8 0.501 31 0
535 1 77 56 30 56 33.3 1.251 24 0
536 4 132 0 0 0 32.9 0.302 23 1
537 0 105 90 0 0 29.6 0.197 46 0
538 0 57 60 0 0 21.7 0.735 67 0
539 0 127 80 37 210 36.3 0.804 23 0
540 3 129 92 49 155 36.4 0.968 32 1
541 8 100 74 40 215 39.4 0.661 43 1
542 3 128 72 25 190 32.4 0.549 27 1
543 10 90 85 32 0 34.9 0.825 56 1
544 4 84 90 23 56 39.5 0.159 25 0
545 1 88 78 29 76 32.0 0.365 29 0
546 8 186 90 35 225 34.5 0.423 37 1
547 5 187 76 27 207 43.6 1.034 53 1
548 4 131 68 21 166 33.1 0.160 28 0
549 1 164 82 43 67 32.8 0.341 50 0
550 4 189 110 31 0 28.5 0.680 37 0
551 1 116 70 28 0 27.4 0.204 21 0
552 3 84 68 30 106 31.9 0.591 25 0
553 6 114 88 0 0 27.8 0.247 66 0
554 1 88 62 24 44 29.9 0.422 23 0
555 1 84 64 23 115 36.9 0.471 28 0
556 7 124 70 33 215 25.5 0.161 37 0
557 1 97 70 40 0 38.1 0.218 30 0
558 8 110 76 0 0 27.8 0.237 58 0
559 11 103 68 40 0 46.2 0.126 42 0
560 11 85 74 0 0 30.1 0.300 35 0
561 6 125 76 0 0 33.8 0.121 54 1
562 0 198 66 32 274 41.3 0.502 28 1
563 1 87 68 34 77 37.6 0.401 24 0
564 6 99 60 19 54 26.9 0.497 32 0
565 0 91 80 0 0 32.4 0.601 27 0
566 2 95 54 14 88 26.1 0.748 22 0
567 1 99 72 30 18 38.6 0.412 21 0
568 6 92 62 32 126 32.0 0.085 46 0
569 4 154 72 29 126 31.3 0.338 37 0
570 0 121 66 30 165 34.3 0.203 33 1
571 3 78 70 0 0 32.5 0.270 39 0
572 2 130 96 0 0 22.6 0.268 21 0
573 3 111 58 31 44 29.5 0.430 22 0
574 2 98 60 17 120 34.7 0.198 22 0
575 1 143 86 30 330 30.1 0.892 23 0
576 1 119 44 47 63 35.5 0.280 25 0
577 6 108 44 20 130 24.0 0.813 35 0
578 2 118 80 0 0 42.9 0.693 21 1
579 10 133 68 0 0 27.0 0.245 36 0
580 2 197 70 99 0 34.7 0.575 62 1
581 0 151 90 46 0 42.1 0.371 21 1
582 6 109 60 27 0 25.0 0.206 27 0
583 12 121 78 17 0 26.5 0.259 62 0
584 8 100 76 0 0 38.7 0.190 42 0
585 8 124 76 24 600 28.7 0.687 52 1
586 1 93 56 11 0 22.5 0.417 22 0
587 8 143 66 0 0 34.9 0.129 41 1
588 6 103 66 0 0 24.3 0.249 29 0
589 3 176 86 27 156 33.3 1.154 52 1
590 0 73 0 0 0 21.1 0.342 25 0
591 11 111 84 40 0 46.8 0.925 45 1
592 2 112 78 50 140 39.4 0.175 24 0
593 3 132 80 0 0 34.4 0.402 44 1
594 2 82 52 22 115 28.5 1.699 25 0
595 6 123 72 45 230 33.6 0.733 34 0
596 0 188 82 14 185 32.0 0.682 22 1
597 0 67 76 0 0 45.3 0.194 46 0
598 1 89 24 19 25 27.8 0.559 21 0
599 1 173 74 0 0 36.8 0.088 38 1
600 1 109 38 18 120 23.1 0.407 26 0
601 1 108 88 19 0 27.1 0.400 24 0
602 6 96 0 0 0 23.7 0.190 28 0
603 1 124 74 36 0 27.8 0.100 30 0
604 7 150 78 29 126 35.2 0.692 54 1
605 4 183 0 0 0 28.4 0.212 36 1
606 1 124 60 32 0 35.8 0.514 21 0
607 1 181 78 42 293 40.0 1.258 22 1
608 1 92 62 25 41 19.5 0.482 25 0
609 0 152 82 39 272 41.5 0.270 27 0
610 1 111 62 13 182 24.0 0.138 23 0
611 3 106 54 21 158 30.9 0.292 24 0
612 3 174 58 22 194 32.9 0.593 36 1
613 7 168 88 42 321 38.2 0.787 40 1
614 6 105 80 28 0 32.5 0.878 26 0
615 11 138 74 26 144 36.1 0.557 50 1
616 3 106 72 0 0 25.8 0.207 27 0
617 6 117 96 0 0 28.7 0.157 30 0
618 2 68 62 13 15 20.1 0.257 23 0
619 9 112 82 24 0 28.2 1.282 50 1
620 0 119 0 0 0 32.4 0.141 24 1
621 2 112 86 42 160 38.4 0.246 28 0
622 2 92 76 20 0 24.2 1.698 28 0
623 6 183 94 0 0 40.8 1.461 45 0
624 0 94 70 27 115 43.5 0.347 21 0
625 2 108 64 0 0 30.8 0.158 21 0
626 4 90 88 47 54 37.7 0.362 29 0
627 0 125 68 0 0 24.7 0.206 21 0
628 0 132 78 0 0 32.4 0.393 21 0
629 5 128 80 0 0 34.6 0.144 45 0
630 4 94 65 22 0 24.7 0.148 21 0
631 7 114 64 0 0 27.4 0.732 34 1
632 0 102 78 40 90 34.5 0.238 24 0
633 2 111 60 0 0 26.2 0.343 23 0
634 1 128 82 17 183 27.5 0.115 22 0
635 10 92 62 0 0 25.9 0.167 31 0
636 13 104 72 0 0 31.2 0.465 38 1
637 5 104 74 0 0 28.8 0.153 48 0
638 2 94 76 18 66 31.6 0.649 23 0
639 7 97 76 32 91 40.9 0.871 32 1
640 1 100 74 12 46 19.5 0.149 28 0
641 0 102 86 17 105 29.3 0.695 27 0
642 4 128 70 0 0 34.3 0.303 24 0
643 6 147 80 0 0 29.5 0.178 50 1
644 4 90 0 0 0 28.0 0.610 31 0
645 3 103 72 30 152 27.6 0.730 27 0
646 2 157 74 35 440 39.4 0.134 30 0
647 1 167 74 17 144 23.4 0.447 33 1
648 0 179 50 36 159 37.8 0.455 22 1
649 11 136 84 35 130 28.3 0.260 42 1
650 0 107 60 25 0 26.4 0.133 23 0
651 1 91 54 25 100 25.2 0.234 23 0
652 1 117 60 23 106 33.8 0.466 27 0
653 5 123 74 40 77 34.1 0.269 28 0
654 2 120 54 0 0 26.8 0.455 27 0
655 1 106 70 28 135 34.2 0.142 22 0
656 2 155 52 27 540 38.7 0.240 25 1
657 2 101 58 35 90 21.8 0.155 22 0
658 1 120 80 48 200 38.9 1.162 41 0
659 11 127 106 0 0 39.0 0.190 51 0
660 3 80 82 31 70 34.2 1.292 27 1
661 10 162 84 0 0 27.7 0.182 54 0
662 1 199 76 43 0 42.9 1.394 22 1
663 8 167 106 46 231 37.6 0.165 43 1
664 9 145 80 46 130 37.9 0.637 40 1
665 6 115 60 39 0 33.7 0.245 40 1
666 1 112 80 45 132 34.8 0.217 24 0
667 4 145 82 18 0 32.5 0.235 70 1
668 10 111 70 27 0 27.5 0.141 40 1
669 6 98 58 33 190 34.0 0.430 43 0
670 9 154 78 30 100 30.9 0.164 45 0
671 6 165 68 26 168 33.6 0.631 49 0
672 1 99 58 10 0 25.4 0.551 21 0
673 10 68 106 23 49 35.5 0.285 47 0
674 3 123 100 35 240 57.3 0.880 22 0
675 8 91 82 0 0 35.6 0.587 68 0
676 6 195 70 0 0 30.9 0.328 31 1
677 9 156 86 0 0 24.8 0.230 53 1
678 0 93 60 0 0 35.3 0.263 25 0
679 3 121 52 0 0 36.0 0.127 25 1
680 2 101 58 17 265 24.2 0.614 23 0
681 2 56 56 28 45 24.2 0.332 22 0
682 0 162 76 36 0 49.6 0.364 26 1
683 0 95 64 39 105 44.6 0.366 22 0
684 4 125 80 0 0 32.3 0.536 27 1
685 5 136 82 0 0 0.0 0.640 69 0
686 2 129 74 26 205 33.2 0.591 25 0
687 3 130 64 0 0 23.1 0.314 22 0
688 1 107 50 19 0 28.3 0.181 29 0
689 1 140 74 26 180 24.1 0.828 23 0
690 1 144 82 46 180 46.1 0.335 46 1
691 8 107 80 0 0 24.6 0.856 34 0
692 13 158 114 0 0 42.3 0.257 44 1
693 2 121 70 32 95 39.1 0.886 23 0
694 7 129 68 49 125 38.5 0.439 43 1
695 2 90 60 0 0 23.5 0.191 25 0
696 7 142 90 24 480 30.4 0.128 43 1
697 3 169 74 19 125 29.9 0.268 31 1
698 0 99 0 0 0 25.0 0.253 22 0
699 4 127 88 11 155 34.5 0.598 28 0
700 4 118 70 0 0 44.5 0.904 26 0
701 2 122 76 27 200 35.9 0.483 26 0
702 6 125 78 31 0 27.6 0.565 49 1
703 1 168 88 29 0 35.0 0.905 52 1
704 2 129 0 0 0 38.5 0.304 41 0
705 4 110 76 20 100 28.4 0.118 27 0
706 6 80 80 36 0 39.8 0.177 28 0
707 10 115 0 0 0 0.0 0.261 30 1
708 2 127 46 21 335 34.4 0.176 22 0
709 9 164 78 0 0 32.8 0.148 45 1
710 2 93 64 32 160 38.0 0.674 23 1
711 3 158 64 13 387 31.2 0.295 24 0
712 5 126 78 27 22 29.6 0.439 40 0
713 10 129 62 36 0 41.2 0.441 38 1
714 0 134 58 20 291 26.4 0.352 21 0
715 3 102 74 0 0 29.5 0.121 32 0
716 7 187 50 33 392 33.9 0.826 34 1
717 3 173 78 39 185 33.8 0.970 31 1
718 10 94 72 18 0 23.1 0.595 56 0
719 1 108 60 46 178 35.5 0.415 24 0
720 5 97 76 27 0 35.6 0.378 52 1
721 4 83 86 19 0 29.3 0.317 34 0
722 1 114 66 36 200 38.1 0.289 21 0
723 1 149 68 29 127 29.3 0.349 42 1
724 5 117 86 30 105 39.1 0.251 42 0
725 1 111 94 0 0 32.8 0.265 45 0
726 4 112 78 40 0 39.4 0.236 38 0
727 1 116 78 29 180 36.1 0.496 25 0
728 0 141 84 26 0 32.4 0.433 22 0
729 2 175 88 0 0 22.9 0.326 22 0
730 2 92 52 0 0 30.1 0.141 22 0
731 3 130 78 23 79 28.4 0.323 34 1
732 8 120 86 0 0 28.4 0.259 22 1
733 2 174 88 37 120 44.5 0.646 24 1
734 2 106 56 27 165 29.0 0.426 22 0
735 2 105 75 0 0 23.3 0.560 53 0
736 4 95 60 32 0 35.4 0.284 28 0
737 0 126 86 27 120 27.4 0.515 21 0
738 8 65 72 23 0 32.0 0.600 42 0
739 2 99 60 17 160 36.6 0.453 21 0
740 1 102 74 0 0 39.5 0.293 42 1
741 11 120 80 37 150 42.3 0.785 48 1
742 3 102 44 20 94 30.8 0.400 26 0
743 1 109 58 18 116 28.5 0.219 22 0
744 9 140 94 0 0 32.7 0.734 45 1
745 13 153 88 37 140 40.6 1.174 39 0
746 12 100 84 33 105 30.0 0.488 46 0
747 1 147 94 41 0 49.3 0.358 27 1
748 1 81 74 41 57 46.3 1.096 32 0
749 3 187 70 22 200 36.4 0.408 36 1
750 6 162 62 0 0 24.3 0.178 50 1
751 4 136 70 0 0 31.2 1.182 22 1
752 1 121 78 39 74 39.0 0.261 28 0
753 3 108 62 24 0 26.0 0.223 25 0
754 0 181 88 44 510 43.3 0.222 26 1
755 8 154 78 32 0 32.4 0.443 45 1
756 1 128 88 39 110 36.5 1.057 37 1
757 7 137 90 41 0 32.0 0.391 39 0
758 0 123 72 0 0 36.3 0.258 52 1
759 1 106 76 0 0 37.5 0.197 26 0
760 6 190 92 0 0 35.5 0.278 66 1
761 2 88 58 26 16 28.4 0.766 22 0
762 9 170 74 31 0 44.0 0.403 43 1
763 9 89 62 0 0 22.5 0.142 33 0
764 10 101 76 48 180 32.9 0.171 63 0
765 2 122 70 27 0 36.8 0.340 27 0
766 5 121 72 23 112 26.2 0.245 30 0
767 1 126 60 0 0 30.1 0.349 47 1
768 1 93 70 31 0 30.4 0.315 23 0

892
data/titanic.csv Normal file
View File

@@ -0,0 +1,892 @@
PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
1,0,3,"Braund, Mr. Owen Harris",male,22,1,0,A/5 21171,7.25,,S
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38,1,0,PC 17599,71.2833,C85,C
3,1,3,"Heikkinen, Miss. Laina",female,26,0,0,STON/O2. 3101282,7.925,,S
4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35,1,0,113803,53.1,C123,S
5,0,3,"Allen, Mr. William Henry",male,35,0,0,373450,8.05,,S
6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
7,0,1,"McCarthy, Mr. Timothy J",male,54,0,0,17463,51.8625,E46,S
8,0,3,"Palsson, Master. Gosta Leonard",male,2,3,1,349909,21.075,,S
9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27,0,2,347742,11.1333,,S
10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14,1,0,237736,30.0708,,C
11,1,3,"Sandstrom, Miss. Marguerite Rut",female,4,1,1,PP 9549,16.7,G6,S
12,1,1,"Bonnell, Miss. Elizabeth",female,58,0,0,113783,26.55,C103,S
13,0,3,"Saundercock, Mr. William Henry",male,20,0,0,A/5. 2151,8.05,,S
14,0,3,"Andersson, Mr. Anders Johan",male,39,1,5,347082,31.275,,S
15,0,3,"Vestrom, Miss. Hulda Amanda Adolfina",female,14,0,0,350406,7.8542,,S
16,1,2,"Hewlett, Mrs. (Mary D Kingcome) ",female,55,0,0,248706,16,,S
17,0,3,"Rice, Master. Eugene",male,2,4,1,382652,29.125,,Q
18,1,2,"Williams, Mr. Charles Eugene",male,,0,0,244373,13,,S
19,0,3,"Vander Planke, Mrs. Julius (Emelia Maria Vandemoortele)",female,31,1,0,345763,18,,S
20,1,3,"Masselmani, Mrs. Fatima",female,,0,0,2649,7.225,,C
21,0,2,"Fynney, Mr. Joseph J",male,35,0,0,239865,26,,S
22,1,2,"Beesley, Mr. Lawrence",male,34,0,0,248698,13,D56,S
23,1,3,"McGowan, Miss. Anna ""Annie""",female,15,0,0,330923,8.0292,,Q
24,1,1,"Sloper, Mr. William Thompson",male,28,0,0,113788,35.5,A6,S
25,0,3,"Palsson, Miss. Torborg Danira",female,8,3,1,349909,21.075,,S
26,1,3,"Asplund, Mrs. Carl Oscar (Selma Augusta Emilia Johansson)",female,38,1,5,347077,31.3875,,S
27,0,3,"Emir, Mr. Farred Chehab",male,,0,0,2631,7.225,,C
28,0,1,"Fortune, Mr. Charles Alexander",male,19,3,2,19950,263,C23 C25 C27,S
29,1,3,"O'Dwyer, Miss. Ellen ""Nellie""",female,,0,0,330959,7.8792,,Q
30,0,3,"Todoroff, Mr. Lalio",male,,0,0,349216,7.8958,,S
31,0,1,"Uruchurtu, Don. Manuel E",male,40,0,0,PC 17601,27.7208,,C
32,1,1,"Spencer, Mrs. William Augustus (Marie Eugenie)",female,,1,0,PC 17569,146.5208,B78,C
33,1,3,"Glynn, Miss. Mary Agatha",female,,0,0,335677,7.75,,Q
34,0,2,"Wheadon, Mr. Edward H",male,66,0,0,C.A. 24579,10.5,,S
35,0,1,"Meyer, Mr. Edgar Joseph",male,28,1,0,PC 17604,82.1708,,C
36,0,1,"Holverson, Mr. Alexander Oskar",male,42,1,0,113789,52,,S
37,1,3,"Mamee, Mr. Hanna",male,,0,0,2677,7.2292,,C
38,0,3,"Cann, Mr. Ernest Charles",male,21,0,0,A./5. 2152,8.05,,S
39,0,3,"Vander Planke, Miss. Augusta Maria",female,18,2,0,345764,18,,S
40,1,3,"Nicola-Yarred, Miss. Jamila",female,14,1,0,2651,11.2417,,C
41,0,3,"Ahlin, Mrs. Johan (Johanna Persdotter Larsson)",female,40,1,0,7546,9.475,,S
42,0,2,"Turpin, Mrs. William John Robert (Dorothy Ann Wonnacott)",female,27,1,0,11668,21,,S
43,0,3,"Kraeff, Mr. Theodor",male,,0,0,349253,7.8958,,C
44,1,2,"Laroche, Miss. Simonne Marie Anne Andree",female,3,1,2,SC/Paris 2123,41.5792,,C
45,1,3,"Devaney, Miss. Margaret Delia",female,19,0,0,330958,7.8792,,Q
46,0,3,"Rogers, Mr. William John",male,,0,0,S.C./A.4. 23567,8.05,,S
47,0,3,"Lennon, Mr. Denis",male,,1,0,370371,15.5,,Q
48,1,3,"O'Driscoll, Miss. Bridget",female,,0,0,14311,7.75,,Q
49,0,3,"Samaan, Mr. Youssef",male,,2,0,2662,21.6792,,C
50,0,3,"Arnold-Franchi, Mrs. Josef (Josefine Franchi)",female,18,1,0,349237,17.8,,S
51,0,3,"Panula, Master. Juha Niilo",male,7,4,1,3101295,39.6875,,S
52,0,3,"Nosworthy, Mr. Richard Cater",male,21,0,0,A/4. 39886,7.8,,S
53,1,1,"Harper, Mrs. Henry Sleeper (Myna Haxtun)",female,49,1,0,PC 17572,76.7292,D33,C
54,1,2,"Faunthorpe, Mrs. Lizzie (Elizabeth Anne Wilkinson)",female,29,1,0,2926,26,,S
55,0,1,"Ostby, Mr. Engelhart Cornelius",male,65,0,1,113509,61.9792,B30,C
56,1,1,"Woolner, Mr. Hugh",male,,0,0,19947,35.5,C52,S
57,1,2,"Rugg, Miss. Emily",female,21,0,0,C.A. 31026,10.5,,S
58,0,3,"Novel, Mr. Mansouer",male,28.5,0,0,2697,7.2292,,C
59,1,2,"West, Miss. Constance Mirium",female,5,1,2,C.A. 34651,27.75,,S
60,0,3,"Goodwin, Master. William Frederick",male,11,5,2,CA 2144,46.9,,S
61,0,3,"Sirayanian, Mr. Orsen",male,22,0,0,2669,7.2292,,C
62,1,1,"Icard, Miss. Amelie",female,38,0,0,113572,80,B28,
63,0,1,"Harris, Mr. Henry Birkhardt",male,45,1,0,36973,83.475,C83,S
64,0,3,"Skoog, Master. Harald",male,4,3,2,347088,27.9,,S
65,0,1,"Stewart, Mr. Albert A",male,,0,0,PC 17605,27.7208,,C
66,1,3,"Moubarek, Master. Gerios",male,,1,1,2661,15.2458,,C
67,1,2,"Nye, Mrs. (Elizabeth Ramell)",female,29,0,0,C.A. 29395,10.5,F33,S
68,0,3,"Crease, Mr. Ernest James",male,19,0,0,S.P. 3464,8.1583,,S
69,1,3,"Andersson, Miss. Erna Alexandra",female,17,4,2,3101281,7.925,,S
70,0,3,"Kink, Mr. Vincenz",male,26,2,0,315151,8.6625,,S
71,0,2,"Jenkin, Mr. Stephen Curnow",male,32,0,0,C.A. 33111,10.5,,S
72,0,3,"Goodwin, Miss. Lillian Amy",female,16,5,2,CA 2144,46.9,,S
73,0,2,"Hood, Mr. Ambrose Jr",male,21,0,0,S.O.C. 14879,73.5,,S
74,0,3,"Chronopoulos, Mr. Apostolos",male,26,1,0,2680,14.4542,,C
75,1,3,"Bing, Mr. Lee",male,32,0,0,1601,56.4958,,S
76,0,3,"Moen, Mr. Sigurd Hansen",male,25,0,0,348123,7.65,F G73,S
77,0,3,"Staneff, Mr. Ivan",male,,0,0,349208,7.8958,,S
78,0,3,"Moutal, Mr. Rahamin Haim",male,,0,0,374746,8.05,,S
79,1,2,"Caldwell, Master. Alden Gates",male,0.83,0,2,248738,29,,S
80,1,3,"Dowdell, Miss. Elizabeth",female,30,0,0,364516,12.475,,S
81,0,3,"Waelens, Mr. Achille",male,22,0,0,345767,9,,S
82,1,3,"Sheerlinck, Mr. Jan Baptist",male,29,0,0,345779,9.5,,S
83,1,3,"McDermott, Miss. Brigdet Delia",female,,0,0,330932,7.7875,,Q
84,0,1,"Carrau, Mr. Francisco M",male,28,0,0,113059,47.1,,S
85,1,2,"Ilett, Miss. Bertha",female,17,0,0,SO/C 14885,10.5,,S
86,1,3,"Backstrom, Mrs. Karl Alfred (Maria Mathilda Gustafsson)",female,33,3,0,3101278,15.85,,S
87,0,3,"Ford, Mr. William Neal",male,16,1,3,W./C. 6608,34.375,,S
88,0,3,"Slocovski, Mr. Selman Francis",male,,0,0,SOTON/OQ 392086,8.05,,S
89,1,1,"Fortune, Miss. Mabel Helen",female,23,3,2,19950,263,C23 C25 C27,S
90,0,3,"Celotti, Mr. Francesco",male,24,0,0,343275,8.05,,S
91,0,3,"Christmann, Mr. Emil",male,29,0,0,343276,8.05,,S
92,0,3,"Andreasson, Mr. Paul Edvin",male,20,0,0,347466,7.8542,,S
93,0,1,"Chaffee, Mr. Herbert Fuller",male,46,1,0,W.E.P. 5734,61.175,E31,S
94,0,3,"Dean, Mr. Bertram Frank",male,26,1,2,C.A. 2315,20.575,,S
95,0,3,"Coxon, Mr. Daniel",male,59,0,0,364500,7.25,,S
96,0,3,"Shorney, Mr. Charles Joseph",male,,0,0,374910,8.05,,S
97,0,1,"Goldschmidt, Mr. George B",male,71,0,0,PC 17754,34.6542,A5,C
98,1,1,"Greenfield, Mr. William Bertram",male,23,0,1,PC 17759,63.3583,D10 D12,C
99,1,2,"Doling, Mrs. John T (Ada Julia Bone)",female,34,0,1,231919,23,,S
100,0,2,"Kantor, Mr. Sinai",male,34,1,0,244367,26,,S
101,0,3,"Petranec, Miss. Matilda",female,28,0,0,349245,7.8958,,S
102,0,3,"Petroff, Mr. Pastcho (""Pentcho"")",male,,0,0,349215,7.8958,,S
103,0,1,"White, Mr. Richard Frasar",male,21,0,1,35281,77.2875,D26,S
104,0,3,"Johansson, Mr. Gustaf Joel",male,33,0,0,7540,8.6542,,S
105,0,3,"Gustafsson, Mr. Anders Vilhelm",male,37,2,0,3101276,7.925,,S
106,0,3,"Mionoff, Mr. Stoytcho",male,28,0,0,349207,7.8958,,S
107,1,3,"Salkjelsvik, Miss. Anna Kristine",female,21,0,0,343120,7.65,,S
108,1,3,"Moss, Mr. Albert Johan",male,,0,0,312991,7.775,,S
109,0,3,"Rekic, Mr. Tido",male,38,0,0,349249,7.8958,,S
110,1,3,"Moran, Miss. Bertha",female,,1,0,371110,24.15,,Q
111,0,1,"Porter, Mr. Walter Chamberlain",male,47,0,0,110465,52,C110,S
112,0,3,"Zabour, Miss. Hileni",female,14.5,1,0,2665,14.4542,,C
113,0,3,"Barton, Mr. David John",male,22,0,0,324669,8.05,,S
114,0,3,"Jussila, Miss. Katriina",female,20,1,0,4136,9.825,,S
115,0,3,"Attalah, Miss. Malake",female,17,0,0,2627,14.4583,,C
116,0,3,"Pekoniemi, Mr. Edvard",male,21,0,0,STON/O 2. 3101294,7.925,,S
117,0,3,"Connors, Mr. Patrick",male,70.5,0,0,370369,7.75,,Q
118,0,2,"Turpin, Mr. William John Robert",male,29,1,0,11668,21,,S
119,0,1,"Baxter, Mr. Quigg Edmond",male,24,0,1,PC 17558,247.5208,B58 B60,C
120,0,3,"Andersson, Miss. Ellis Anna Maria",female,2,4,2,347082,31.275,,S
121,0,2,"Hickman, Mr. Stanley George",male,21,2,0,S.O.C. 14879,73.5,,S
122,0,3,"Moore, Mr. Leonard Charles",male,,0,0,A4. 54510,8.05,,S
123,0,2,"Nasser, Mr. Nicholas",male,32.5,1,0,237736,30.0708,,C
124,1,2,"Webber, Miss. Susan",female,32.5,0,0,27267,13,E101,S
125,0,1,"White, Mr. Percival Wayland",male,54,0,1,35281,77.2875,D26,S
126,1,3,"Nicola-Yarred, Master. Elias",male,12,1,0,2651,11.2417,,C
127,0,3,"McMahon, Mr. Martin",male,,0,0,370372,7.75,,Q
128,1,3,"Madsen, Mr. Fridtjof Arne",male,24,0,0,C 17369,7.1417,,S
129,1,3,"Peter, Miss. Anna",female,,1,1,2668,22.3583,F E69,C
130,0,3,"Ekstrom, Mr. Johan",male,45,0,0,347061,6.975,,S
131,0,3,"Drazenoic, Mr. Jozef",male,33,0,0,349241,7.8958,,C
132,0,3,"Coelho, Mr. Domingos Fernandeo",male,20,0,0,SOTON/O.Q. 3101307,7.05,,S
133,0,3,"Robins, Mrs. Alexander A (Grace Charity Laury)",female,47,1,0,A/5. 3337,14.5,,S
134,1,2,"Weisz, Mrs. Leopold (Mathilde Francoise Pede)",female,29,1,0,228414,26,,S
135,0,2,"Sobey, Mr. Samuel James Hayden",male,25,0,0,C.A. 29178,13,,S
136,0,2,"Richard, Mr. Emile",male,23,0,0,SC/PARIS 2133,15.0458,,C
137,1,1,"Newsom, Miss. Helen Monypeny",female,19,0,2,11752,26.2833,D47,S
138,0,1,"Futrelle, Mr. Jacques Heath",male,37,1,0,113803,53.1,C123,S
139,0,3,"Osen, Mr. Olaf Elon",male,16,0,0,7534,9.2167,,S
140,0,1,"Giglio, Mr. Victor",male,24,0,0,PC 17593,79.2,B86,C
141,0,3,"Boulos, Mrs. Joseph (Sultana)",female,,0,2,2678,15.2458,,C
142,1,3,"Nysten, Miss. Anna Sofia",female,22,0,0,347081,7.75,,S
143,1,3,"Hakkarainen, Mrs. Pekka Pietari (Elin Matilda Dolck)",female,24,1,0,STON/O2. 3101279,15.85,,S
144,0,3,"Burke, Mr. Jeremiah",male,19,0,0,365222,6.75,,Q
145,0,2,"Andrew, Mr. Edgardo Samuel",male,18,0,0,231945,11.5,,S
146,0,2,"Nicholls, Mr. Joseph Charles",male,19,1,1,C.A. 33112,36.75,,S
147,1,3,"Andersson, Mr. August Edvard (""Wennerstrom"")",male,27,0,0,350043,7.7958,,S
148,0,3,"Ford, Miss. Robina Maggie ""Ruby""",female,9,2,2,W./C. 6608,34.375,,S
149,0,2,"Navratil, Mr. Michel (""Louis M Hoffman"")",male,36.5,0,2,230080,26,F2,S
150,0,2,"Byles, Rev. Thomas Roussel Davids",male,42,0,0,244310,13,,S
151,0,2,"Bateman, Rev. Robert James",male,51,0,0,S.O.P. 1166,12.525,,S
152,1,1,"Pears, Mrs. Thomas (Edith Wearne)",female,22,1,0,113776,66.6,C2,S
153,0,3,"Meo, Mr. Alfonzo",male,55.5,0,0,A.5. 11206,8.05,,S
154,0,3,"van Billiard, Mr. Austin Blyler",male,40.5,0,2,A/5. 851,14.5,,S
155,0,3,"Olsen, Mr. Ole Martin",male,,0,0,Fa 265302,7.3125,,S
156,0,1,"Williams, Mr. Charles Duane",male,51,0,1,PC 17597,61.3792,,C
157,1,3,"Gilnagh, Miss. Katherine ""Katie""",female,16,0,0,35851,7.7333,,Q
158,0,3,"Corn, Mr. Harry",male,30,0,0,SOTON/OQ 392090,8.05,,S
159,0,3,"Smiljanic, Mr. Mile",male,,0,0,315037,8.6625,,S
160,0,3,"Sage, Master. Thomas Henry",male,,8,2,CA. 2343,69.55,,S
161,0,3,"Cribb, Mr. John Hatfield",male,44,0,1,371362,16.1,,S
162,1,2,"Watt, Mrs. James (Elizabeth ""Bessie"" Inglis Milne)",female,40,0,0,C.A. 33595,15.75,,S
163,0,3,"Bengtsson, Mr. John Viktor",male,26,0,0,347068,7.775,,S
164,0,3,"Calic, Mr. Jovo",male,17,0,0,315093,8.6625,,S
165,0,3,"Panula, Master. Eino Viljami",male,1,4,1,3101295,39.6875,,S
166,1,3,"Goldsmith, Master. Frank John William ""Frankie""",male,9,0,2,363291,20.525,,S
167,1,1,"Chibnall, Mrs. (Edith Martha Bowerman)",female,,0,1,113505,55,E33,S
168,0,3,"Skoog, Mrs. William (Anna Bernhardina Karlsson)",female,45,1,4,347088,27.9,,S
169,0,1,"Baumann, Mr. John D",male,,0,0,PC 17318,25.925,,S
170,0,3,"Ling, Mr. Lee",male,28,0,0,1601,56.4958,,S
171,0,1,"Van der hoef, Mr. Wyckoff",male,61,0,0,111240,33.5,B19,S
172,0,3,"Rice, Master. Arthur",male,4,4,1,382652,29.125,,Q
173,1,3,"Johnson, Miss. Eleanor Ileen",female,1,1,1,347742,11.1333,,S
174,0,3,"Sivola, Mr. Antti Wilhelm",male,21,0,0,STON/O 2. 3101280,7.925,,S
175,0,1,"Smith, Mr. James Clinch",male,56,0,0,17764,30.6958,A7,C
176,0,3,"Klasen, Mr. Klas Albin",male,18,1,1,350404,7.8542,,S
177,0,3,"Lefebre, Master. Henry Forbes",male,,3,1,4133,25.4667,,S
178,0,1,"Isham, Miss. Ann Elizabeth",female,50,0,0,PC 17595,28.7125,C49,C
179,0,2,"Hale, Mr. Reginald",male,30,0,0,250653,13,,S
180,0,3,"Leonard, Mr. Lionel",male,36,0,0,LINE,0,,S
181,0,3,"Sage, Miss. Constance Gladys",female,,8,2,CA. 2343,69.55,,S
182,0,2,"Pernot, Mr. Rene",male,,0,0,SC/PARIS 2131,15.05,,C
183,0,3,"Asplund, Master. Clarence Gustaf Hugo",male,9,4,2,347077,31.3875,,S
184,1,2,"Becker, Master. Richard F",male,1,2,1,230136,39,F4,S
185,1,3,"Kink-Heilmann, Miss. Luise Gretchen",female,4,0,2,315153,22.025,,S
186,0,1,"Rood, Mr. Hugh Roscoe",male,,0,0,113767,50,A32,S
187,1,3,"O'Brien, Mrs. Thomas (Johanna ""Hannah"" Godfrey)",female,,1,0,370365,15.5,,Q
188,1,1,"Romaine, Mr. Charles Hallace (""Mr C Rolmane"")",male,45,0,0,111428,26.55,,S
189,0,3,"Bourke, Mr. John",male,40,1,1,364849,15.5,,Q
190,0,3,"Turcin, Mr. Stjepan",male,36,0,0,349247,7.8958,,S
191,1,2,"Pinsky, Mrs. (Rosa)",female,32,0,0,234604,13,,S
192,0,2,"Carbines, Mr. William",male,19,0,0,28424,13,,S
193,1,3,"Andersen-Jensen, Miss. Carla Christine Nielsine",female,19,1,0,350046,7.8542,,S
194,1,2,"Navratil, Master. Michel M",male,3,1,1,230080,26,F2,S
195,1,1,"Brown, Mrs. James Joseph (Margaret Tobin)",female,44,0,0,PC 17610,27.7208,B4,C
196,1,1,"Lurette, Miss. Elise",female,58,0,0,PC 17569,146.5208,B80,C
197,0,3,"Mernagh, Mr. Robert",male,,0,0,368703,7.75,,Q
198,0,3,"Olsen, Mr. Karl Siegwart Andreas",male,42,0,1,4579,8.4042,,S
199,1,3,"Madigan, Miss. Margaret ""Maggie""",female,,0,0,370370,7.75,,Q
200,0,2,"Yrois, Miss. Henriette (""Mrs Harbeck"")",female,24,0,0,248747,13,,S
201,0,3,"Vande Walle, Mr. Nestor Cyriel",male,28,0,0,345770,9.5,,S
202,0,3,"Sage, Mr. Frederick",male,,8,2,CA. 2343,69.55,,S
203,0,3,"Johanson, Mr. Jakob Alfred",male,34,0,0,3101264,6.4958,,S
204,0,3,"Youseff, Mr. Gerious",male,45.5,0,0,2628,7.225,,C
205,1,3,"Cohen, Mr. Gurshon ""Gus""",male,18,0,0,A/5 3540,8.05,,S
206,0,3,"Strom, Miss. Telma Matilda",female,2,0,1,347054,10.4625,G6,S
207,0,3,"Backstrom, Mr. Karl Alfred",male,32,1,0,3101278,15.85,,S
208,1,3,"Albimona, Mr. Nassef Cassem",male,26,0,0,2699,18.7875,,C
209,1,3,"Carr, Miss. Helen ""Ellen""",female,16,0,0,367231,7.75,,Q
210,1,1,"Blank, Mr. Henry",male,40,0,0,112277,31,A31,C
211,0,3,"Ali, Mr. Ahmed",male,24,0,0,SOTON/O.Q. 3101311,7.05,,S
212,1,2,"Cameron, Miss. Clear Annie",female,35,0,0,F.C.C. 13528,21,,S
213,0,3,"Perkin, Mr. John Henry",male,22,0,0,A/5 21174,7.25,,S
214,0,2,"Givard, Mr. Hans Kristensen",male,30,0,0,250646,13,,S
215,0,3,"Kiernan, Mr. Philip",male,,1,0,367229,7.75,,Q
216,1,1,"Newell, Miss. Madeleine",female,31,1,0,35273,113.275,D36,C
217,1,3,"Honkanen, Miss. Eliina",female,27,0,0,STON/O2. 3101283,7.925,,S
218,0,2,"Jacobsohn, Mr. Sidney Samuel",male,42,1,0,243847,27,,S
219,1,1,"Bazzani, Miss. Albina",female,32,0,0,11813,76.2917,D15,C
220,0,2,"Harris, Mr. Walter",male,30,0,0,W/C 14208,10.5,,S
221,1,3,"Sunderland, Mr. Victor Francis",male,16,0,0,SOTON/OQ 392089,8.05,,S
222,0,2,"Bracken, Mr. James H",male,27,0,0,220367,13,,S
223,0,3,"Green, Mr. George Henry",male,51,0,0,21440,8.05,,S
224,0,3,"Nenkoff, Mr. Christo",male,,0,0,349234,7.8958,,S
225,1,1,"Hoyt, Mr. Frederick Maxfield",male,38,1,0,19943,90,C93,S
226,0,3,"Berglund, Mr. Karl Ivar Sven",male,22,0,0,PP 4348,9.35,,S
227,1,2,"Mellors, Mr. William John",male,19,0,0,SW/PP 751,10.5,,S
228,0,3,"Lovell, Mr. John Hall (""Henry"")",male,20.5,0,0,A/5 21173,7.25,,S
229,0,2,"Fahlstrom, Mr. Arne Jonas",male,18,0,0,236171,13,,S
230,0,3,"Lefebre, Miss. Mathilde",female,,3,1,4133,25.4667,,S
231,1,1,"Harris, Mrs. Henry Birkhardt (Irene Wallach)",female,35,1,0,36973,83.475,C83,S
232,0,3,"Larsson, Mr. Bengt Edvin",male,29,0,0,347067,7.775,,S
233,0,2,"Sjostedt, Mr. Ernst Adolf",male,59,0,0,237442,13.5,,S
234,1,3,"Asplund, Miss. Lillian Gertrud",female,5,4,2,347077,31.3875,,S
235,0,2,"Leyson, Mr. Robert William Norman",male,24,0,0,C.A. 29566,10.5,,S
236,0,3,"Harknett, Miss. Alice Phoebe",female,,0,0,W./C. 6609,7.55,,S
237,0,2,"Hold, Mr. Stephen",male,44,1,0,26707,26,,S
238,1,2,"Collyer, Miss. Marjorie ""Lottie""",female,8,0,2,C.A. 31921,26.25,,S
239,0,2,"Pengelly, Mr. Frederick William",male,19,0,0,28665,10.5,,S
240,0,2,"Hunt, Mr. George Henry",male,33,0,0,SCO/W 1585,12.275,,S
241,0,3,"Zabour, Miss. Thamine",female,,1,0,2665,14.4542,,C
242,1,3,"Murphy, Miss. Katherine ""Kate""",female,,1,0,367230,15.5,,Q
243,0,2,"Coleridge, Mr. Reginald Charles",male,29,0,0,W./C. 14263,10.5,,S
244,0,3,"Maenpaa, Mr. Matti Alexanteri",male,22,0,0,STON/O 2. 3101275,7.125,,S
245,0,3,"Attalah, Mr. Sleiman",male,30,0,0,2694,7.225,,C
246,0,1,"Minahan, Dr. William Edward",male,44,2,0,19928,90,C78,Q
247,0,3,"Lindahl, Miss. Agda Thorilda Viktoria",female,25,0,0,347071,7.775,,S
248,1,2,"Hamalainen, Mrs. William (Anna)",female,24,0,2,250649,14.5,,S
249,1,1,"Beckwith, Mr. Richard Leonard",male,37,1,1,11751,52.5542,D35,S
250,0,2,"Carter, Rev. Ernest Courtenay",male,54,1,0,244252,26,,S
251,0,3,"Reed, Mr. James George",male,,0,0,362316,7.25,,S
252,0,3,"Strom, Mrs. Wilhelm (Elna Matilda Persson)",female,29,1,1,347054,10.4625,G6,S
253,0,1,"Stead, Mr. William Thomas",male,62,0,0,113514,26.55,C87,S
254,0,3,"Lobb, Mr. William Arthur",male,30,1,0,A/5. 3336,16.1,,S
255,0,3,"Rosblom, Mrs. Viktor (Helena Wilhelmina)",female,41,0,2,370129,20.2125,,S
256,1,3,"Touma, Mrs. Darwis (Hanne Youssef Razi)",female,29,0,2,2650,15.2458,,C
257,1,1,"Thorne, Mrs. Gertrude Maybelle",female,,0,0,PC 17585,79.2,,C
258,1,1,"Cherry, Miss. Gladys",female,30,0,0,110152,86.5,B77,S
259,1,1,"Ward, Miss. Anna",female,35,0,0,PC 17755,512.3292,,C
260,1,2,"Parrish, Mrs. (Lutie Davis)",female,50,0,1,230433,26,,S
261,0,3,"Smith, Mr. Thomas",male,,0,0,384461,7.75,,Q
262,1,3,"Asplund, Master. Edvin Rojj Felix",male,3,4,2,347077,31.3875,,S
263,0,1,"Taussig, Mr. Emil",male,52,1,1,110413,79.65,E67,S
264,0,1,"Harrison, Mr. William",male,40,0,0,112059,0,B94,S
265,0,3,"Henry, Miss. Delia",female,,0,0,382649,7.75,,Q
266,0,2,"Reeves, Mr. David",male,36,0,0,C.A. 17248,10.5,,S
267,0,3,"Panula, Mr. Ernesti Arvid",male,16,4,1,3101295,39.6875,,S
268,1,3,"Persson, Mr. Ernst Ulrik",male,25,1,0,347083,7.775,,S
269,1,1,"Graham, Mrs. William Thompson (Edith Junkins)",female,58,0,1,PC 17582,153.4625,C125,S
270,1,1,"Bissette, Miss. Amelia",female,35,0,0,PC 17760,135.6333,C99,S
271,0,1,"Cairns, Mr. Alexander",male,,0,0,113798,31,,S
272,1,3,"Tornquist, Mr. William Henry",male,25,0,0,LINE,0,,S
273,1,2,"Mellinger, Mrs. (Elizabeth Anne Maidment)",female,41,0,1,250644,19.5,,S
274,0,1,"Natsch, Mr. Charles H",male,37,0,1,PC 17596,29.7,C118,C
275,1,3,"Healy, Miss. Hanora ""Nora""",female,,0,0,370375,7.75,,Q
276,1,1,"Andrews, Miss. Kornelia Theodosia",female,63,1,0,13502,77.9583,D7,S
277,0,3,"Lindblom, Miss. Augusta Charlotta",female,45,0,0,347073,7.75,,S
278,0,2,"Parkes, Mr. Francis ""Frank""",male,,0,0,239853,0,,S
279,0,3,"Rice, Master. Eric",male,7,4,1,382652,29.125,,Q
280,1,3,"Abbott, Mrs. Stanton (Rosa Hunt)",female,35,1,1,C.A. 2673,20.25,,S
281,0,3,"Duane, Mr. Frank",male,65,0,0,336439,7.75,,Q
282,0,3,"Olsson, Mr. Nils Johan Goransson",male,28,0,0,347464,7.8542,,S
283,0,3,"de Pelsmaeker, Mr. Alfons",male,16,0,0,345778,9.5,,S
284,1,3,"Dorking, Mr. Edward Arthur",male,19,0,0,A/5. 10482,8.05,,S
285,0,1,"Smith, Mr. Richard William",male,,0,0,113056,26,A19,S
286,0,3,"Stankovic, Mr. Ivan",male,33,0,0,349239,8.6625,,C
287,1,3,"de Mulder, Mr. Theodore",male,30,0,0,345774,9.5,,S
288,0,3,"Naidenoff, Mr. Penko",male,22,0,0,349206,7.8958,,S
289,1,2,"Hosono, Mr. Masabumi",male,42,0,0,237798,13,,S
290,1,3,"Connolly, Miss. Kate",female,22,0,0,370373,7.75,,Q
291,1,1,"Barber, Miss. Ellen ""Nellie""",female,26,0,0,19877,78.85,,S
292,1,1,"Bishop, Mrs. Dickinson H (Helen Walton)",female,19,1,0,11967,91.0792,B49,C
293,0,2,"Levy, Mr. Rene Jacques",male,36,0,0,SC/Paris 2163,12.875,D,C
294,0,3,"Haas, Miss. Aloisia",female,24,0,0,349236,8.85,,S
295,0,3,"Mineff, Mr. Ivan",male,24,0,0,349233,7.8958,,S
296,0,1,"Lewy, Mr. Ervin G",male,,0,0,PC 17612,27.7208,,C
297,0,3,"Hanna, Mr. Mansour",male,23.5,0,0,2693,7.2292,,C
298,0,1,"Allison, Miss. Helen Loraine",female,2,1,2,113781,151.55,C22 C26,S
299,1,1,"Saalfeld, Mr. Adolphe",male,,0,0,19988,30.5,C106,S
300,1,1,"Baxter, Mrs. James (Helene DeLaudeniere Chaput)",female,50,0,1,PC 17558,247.5208,B58 B60,C
301,1,3,"Kelly, Miss. Anna Katherine ""Annie Kate""",female,,0,0,9234,7.75,,Q
302,1,3,"McCoy, Mr. Bernard",male,,2,0,367226,23.25,,Q
303,0,3,"Johnson, Mr. William Cahoone Jr",male,19,0,0,LINE,0,,S
304,1,2,"Keane, Miss. Nora A",female,,0,0,226593,12.35,E101,Q
305,0,3,"Williams, Mr. Howard Hugh ""Harry""",male,,0,0,A/5 2466,8.05,,S
306,1,1,"Allison, Master. Hudson Trevor",male,0.92,1,2,113781,151.55,C22 C26,S
307,1,1,"Fleming, Miss. Margaret",female,,0,0,17421,110.8833,,C
308,1,1,"Penasco y Castellana, Mrs. Victor de Satode (Maria Josefa Perez de Soto y Vallejo)",female,17,1,0,PC 17758,108.9,C65,C
309,0,2,"Abelson, Mr. Samuel",male,30,1,0,P/PP 3381,24,,C
310,1,1,"Francatelli, Miss. Laura Mabel",female,30,0,0,PC 17485,56.9292,E36,C
311,1,1,"Hays, Miss. Margaret Bechstein",female,24,0,0,11767,83.1583,C54,C
312,1,1,"Ryerson, Miss. Emily Borie",female,18,2,2,PC 17608,262.375,B57 B59 B63 B66,C
313,0,2,"Lahtinen, Mrs. William (Anna Sylfven)",female,26,1,1,250651,26,,S
314,0,3,"Hendekovic, Mr. Ignjac",male,28,0,0,349243,7.8958,,S
315,0,2,"Hart, Mr. Benjamin",male,43,1,1,F.C.C. 13529,26.25,,S
316,1,3,"Nilsson, Miss. Helmina Josefina",female,26,0,0,347470,7.8542,,S
317,1,2,"Kantor, Mrs. Sinai (Miriam Sternin)",female,24,1,0,244367,26,,S
318,0,2,"Moraweck, Dr. Ernest",male,54,0,0,29011,14,,S
319,1,1,"Wick, Miss. Mary Natalie",female,31,0,2,36928,164.8667,C7,S
320,1,1,"Spedden, Mrs. Frederic Oakley (Margaretta Corning Stone)",female,40,1,1,16966,134.5,E34,C
321,0,3,"Dennis, Mr. Samuel",male,22,0,0,A/5 21172,7.25,,S
322,0,3,"Danoff, Mr. Yoto",male,27,0,0,349219,7.8958,,S
323,1,2,"Slayter, Miss. Hilda Mary",female,30,0,0,234818,12.35,,Q
324,1,2,"Caldwell, Mrs. Albert Francis (Sylvia Mae Harbaugh)",female,22,1,1,248738,29,,S
325,0,3,"Sage, Mr. George John Jr",male,,8,2,CA. 2343,69.55,,S
326,1,1,"Young, Miss. Marie Grice",female,36,0,0,PC 17760,135.6333,C32,C
327,0,3,"Nysveen, Mr. Johan Hansen",male,61,0,0,345364,6.2375,,S
328,1,2,"Ball, Mrs. (Ada E Hall)",female,36,0,0,28551,13,D,S
329,1,3,"Goldsmith, Mrs. Frank John (Emily Alice Brown)",female,31,1,1,363291,20.525,,S
330,1,1,"Hippach, Miss. Jean Gertrude",female,16,0,1,111361,57.9792,B18,C
331,1,3,"McCoy, Miss. Agnes",female,,2,0,367226,23.25,,Q
332,0,1,"Partner, Mr. Austen",male,45.5,0,0,113043,28.5,C124,S
333,0,1,"Graham, Mr. George Edward",male,38,0,1,PC 17582,153.4625,C91,S
334,0,3,"Vander Planke, Mr. Leo Edmondus",male,16,2,0,345764,18,,S
335,1,1,"Frauenthal, Mrs. Henry William (Clara Heinsheimer)",female,,1,0,PC 17611,133.65,,S
336,0,3,"Denkoff, Mr. Mitto",male,,0,0,349225,7.8958,,S
337,0,1,"Pears, Mr. Thomas Clinton",male,29,1,0,113776,66.6,C2,S
338,1,1,"Burns, Miss. Elizabeth Margaret",female,41,0,0,16966,134.5,E40,C
339,1,3,"Dahl, Mr. Karl Edwart",male,45,0,0,7598,8.05,,S
340,0,1,"Blackwell, Mr. Stephen Weart",male,45,0,0,113784,35.5,T,S
341,1,2,"Navratil, Master. Edmond Roger",male,2,1,1,230080,26,F2,S
342,1,1,"Fortune, Miss. Alice Elizabeth",female,24,3,2,19950,263,C23 C25 C27,S
343,0,2,"Collander, Mr. Erik Gustaf",male,28,0,0,248740,13,,S
344,0,2,"Sedgwick, Mr. Charles Frederick Waddington",male,25,0,0,244361,13,,S
345,0,2,"Fox, Mr. Stanley Hubert",male,36,0,0,229236,13,,S
346,1,2,"Brown, Miss. Amelia ""Mildred""",female,24,0,0,248733,13,F33,S
347,1,2,"Smith, Miss. Marion Elsie",female,40,0,0,31418,13,,S
348,1,3,"Davison, Mrs. Thomas Henry (Mary E Finck)",female,,1,0,386525,16.1,,S
349,1,3,"Coutts, Master. William Loch ""William""",male,3,1,1,C.A. 37671,15.9,,S
350,0,3,"Dimic, Mr. Jovan",male,42,0,0,315088,8.6625,,S
351,0,3,"Odahl, Mr. Nils Martin",male,23,0,0,7267,9.225,,S
352,0,1,"Williams-Lambert, Mr. Fletcher Fellows",male,,0,0,113510,35,C128,S
353,0,3,"Elias, Mr. Tannous",male,15,1,1,2695,7.2292,,C
354,0,3,"Arnold-Franchi, Mr. Josef",male,25,1,0,349237,17.8,,S
355,0,3,"Yousif, Mr. Wazli",male,,0,0,2647,7.225,,C
356,0,3,"Vanden Steen, Mr. Leo Peter",male,28,0,0,345783,9.5,,S
357,1,1,"Bowerman, Miss. Elsie Edith",female,22,0,1,113505,55,E33,S
358,0,2,"Funk, Miss. Annie Clemmer",female,38,0,0,237671,13,,S
359,1,3,"McGovern, Miss. Mary",female,,0,0,330931,7.8792,,Q
360,1,3,"Mockler, Miss. Helen Mary ""Ellie""",female,,0,0,330980,7.8792,,Q
361,0,3,"Skoog, Mr. Wilhelm",male,40,1,4,347088,27.9,,S
362,0,2,"del Carlo, Mr. Sebastiano",male,29,1,0,SC/PARIS 2167,27.7208,,C
363,0,3,"Barbara, Mrs. (Catherine David)",female,45,0,1,2691,14.4542,,C
364,0,3,"Asim, Mr. Adola",male,35,0,0,SOTON/O.Q. 3101310,7.05,,S
365,0,3,"O'Brien, Mr. Thomas",male,,1,0,370365,15.5,,Q
366,0,3,"Adahl, Mr. Mauritz Nils Martin",male,30,0,0,C 7076,7.25,,S
367,1,1,"Warren, Mrs. Frank Manley (Anna Sophia Atkinson)",female,60,1,0,110813,75.25,D37,C
368,1,3,"Moussa, Mrs. (Mantoura Boulos)",female,,0,0,2626,7.2292,,C
369,1,3,"Jermyn, Miss. Annie",female,,0,0,14313,7.75,,Q
370,1,1,"Aubart, Mme. Leontine Pauline",female,24,0,0,PC 17477,69.3,B35,C
371,1,1,"Harder, Mr. George Achilles",male,25,1,0,11765,55.4417,E50,C
372,0,3,"Wiklund, Mr. Jakob Alfred",male,18,1,0,3101267,6.4958,,S
373,0,3,"Beavan, Mr. William Thomas",male,19,0,0,323951,8.05,,S
374,0,1,"Ringhini, Mr. Sante",male,22,0,0,PC 17760,135.6333,,C
375,0,3,"Palsson, Miss. Stina Viola",female,3,3,1,349909,21.075,,S
376,1,1,"Meyer, Mrs. Edgar Joseph (Leila Saks)",female,,1,0,PC 17604,82.1708,,C
377,1,3,"Landergren, Miss. Aurora Adelia",female,22,0,0,C 7077,7.25,,S
378,0,1,"Widener, Mr. Harry Elkins",male,27,0,2,113503,211.5,C82,C
379,0,3,"Betros, Mr. Tannous",male,20,0,0,2648,4.0125,,C
380,0,3,"Gustafsson, Mr. Karl Gideon",male,19,0,0,347069,7.775,,S
381,1,1,"Bidois, Miss. Rosalie",female,42,0,0,PC 17757,227.525,,C
382,1,3,"Nakid, Miss. Maria (""Mary"")",female,1,0,2,2653,15.7417,,C
383,0,3,"Tikkanen, Mr. Juho",male,32,0,0,STON/O 2. 3101293,7.925,,S
384,1,1,"Holverson, Mrs. Alexander Oskar (Mary Aline Towner)",female,35,1,0,113789,52,,S
385,0,3,"Plotcharsky, Mr. Vasil",male,,0,0,349227,7.8958,,S
386,0,2,"Davies, Mr. Charles Henry",male,18,0,0,S.O.C. 14879,73.5,,S
387,0,3,"Goodwin, Master. Sidney Leonard",male,1,5,2,CA 2144,46.9,,S
388,1,2,"Buss, Miss. Kate",female,36,0,0,27849,13,,S
389,0,3,"Sadlier, Mr. Matthew",male,,0,0,367655,7.7292,,Q
390,1,2,"Lehmann, Miss. Bertha",female,17,0,0,SC 1748,12,,C
391,1,1,"Carter, Mr. William Ernest",male,36,1,2,113760,120,B96 B98,S
392,1,3,"Jansson, Mr. Carl Olof",male,21,0,0,350034,7.7958,,S
393,0,3,"Gustafsson, Mr. Johan Birger",male,28,2,0,3101277,7.925,,S
394,1,1,"Newell, Miss. Marjorie",female,23,1,0,35273,113.275,D36,C
395,1,3,"Sandstrom, Mrs. Hjalmar (Agnes Charlotta Bengtsson)",female,24,0,2,PP 9549,16.7,G6,S
396,0,3,"Johansson, Mr. Erik",male,22,0,0,350052,7.7958,,S
397,0,3,"Olsson, Miss. Elina",female,31,0,0,350407,7.8542,,S
398,0,2,"McKane, Mr. Peter David",male,46,0,0,28403,26,,S
399,0,2,"Pain, Dr. Alfred",male,23,0,0,244278,10.5,,S
400,1,2,"Trout, Mrs. William H (Jessie L)",female,28,0,0,240929,12.65,,S
401,1,3,"Niskanen, Mr. Juha",male,39,0,0,STON/O 2. 3101289,7.925,,S
402,0,3,"Adams, Mr. John",male,26,0,0,341826,8.05,,S
403,0,3,"Jussila, Miss. Mari Aina",female,21,1,0,4137,9.825,,S
404,0,3,"Hakkarainen, Mr. Pekka Pietari",male,28,1,0,STON/O2. 3101279,15.85,,S
405,0,3,"Oreskovic, Miss. Marija",female,20,0,0,315096,8.6625,,S
406,0,2,"Gale, Mr. Shadrach",male,34,1,0,28664,21,,S
407,0,3,"Widegren, Mr. Carl/Charles Peter",male,51,0,0,347064,7.75,,S
408,1,2,"Richards, Master. William Rowe",male,3,1,1,29106,18.75,,S
409,0,3,"Birkeland, Mr. Hans Martin Monsen",male,21,0,0,312992,7.775,,S
410,0,3,"Lefebre, Miss. Ida",female,,3,1,4133,25.4667,,S
411,0,3,"Sdycoff, Mr. Todor",male,,0,0,349222,7.8958,,S
412,0,3,"Hart, Mr. Henry",male,,0,0,394140,6.8583,,Q
413,1,1,"Minahan, Miss. Daisy E",female,33,1,0,19928,90,C78,Q
414,0,2,"Cunningham, Mr. Alfred Fleming",male,,0,0,239853,0,,S
415,1,3,"Sundman, Mr. Johan Julian",male,44,0,0,STON/O 2. 3101269,7.925,,S
416,0,3,"Meek, Mrs. Thomas (Annie Louise Rowley)",female,,0,0,343095,8.05,,S
417,1,2,"Drew, Mrs. James Vivian (Lulu Thorne Christian)",female,34,1,1,28220,32.5,,S
418,1,2,"Silven, Miss. Lyyli Karoliina",female,18,0,2,250652,13,,S
419,0,2,"Matthews, Mr. William John",male,30,0,0,28228,13,,S
420,0,3,"Van Impe, Miss. Catharina",female,10,0,2,345773,24.15,,S
421,0,3,"Gheorgheff, Mr. Stanio",male,,0,0,349254,7.8958,,C
422,0,3,"Charters, Mr. David",male,21,0,0,A/5. 13032,7.7333,,Q
423,0,3,"Zimmerman, Mr. Leo",male,29,0,0,315082,7.875,,S
424,0,3,"Danbom, Mrs. Ernst Gilbert (Anna Sigrid Maria Brogren)",female,28,1,1,347080,14.4,,S
425,0,3,"Rosblom, Mr. Viktor Richard",male,18,1,1,370129,20.2125,,S
426,0,3,"Wiseman, Mr. Phillippe",male,,0,0,A/4. 34244,7.25,,S
427,1,2,"Clarke, Mrs. Charles V (Ada Maria Winfield)",female,28,1,0,2003,26,,S
428,1,2,"Phillips, Miss. Kate Florence (""Mrs Kate Louise Phillips Marshall"")",female,19,0,0,250655,26,,S
429,0,3,"Flynn, Mr. James",male,,0,0,364851,7.75,,Q
430,1,3,"Pickard, Mr. Berk (Berk Trembisky)",male,32,0,0,SOTON/O.Q. 392078,8.05,E10,S
431,1,1,"Bjornstrom-Steffansson, Mr. Mauritz Hakan",male,28,0,0,110564,26.55,C52,S
432,1,3,"Thorneycroft, Mrs. Percival (Florence Kate White)",female,,1,0,376564,16.1,,S
433,1,2,"Louch, Mrs. Charles Alexander (Alice Adelaide Slow)",female,42,1,0,SC/AH 3085,26,,S
434,0,3,"Kallio, Mr. Nikolai Erland",male,17,0,0,STON/O 2. 3101274,7.125,,S
435,0,1,"Silvey, Mr. William Baird",male,50,1,0,13507,55.9,E44,S
436,1,1,"Carter, Miss. Lucile Polk",female,14,1,2,113760,120,B96 B98,S
437,0,3,"Ford, Miss. Doolina Margaret ""Daisy""",female,21,2,2,W./C. 6608,34.375,,S
438,1,2,"Richards, Mrs. Sidney (Emily Hocking)",female,24,2,3,29106,18.75,,S
439,0,1,"Fortune, Mr. Mark",male,64,1,4,19950,263,C23 C25 C27,S
440,0,2,"Kvillner, Mr. Johan Henrik Johannesson",male,31,0,0,C.A. 18723,10.5,,S
441,1,2,"Hart, Mrs. Benjamin (Esther Ada Bloomfield)",female,45,1,1,F.C.C. 13529,26.25,,S
442,0,3,"Hampe, Mr. Leon",male,20,0,0,345769,9.5,,S
443,0,3,"Petterson, Mr. Johan Emil",male,25,1,0,347076,7.775,,S
444,1,2,"Reynaldo, Ms. Encarnacion",female,28,0,0,230434,13,,S
445,1,3,"Johannesen-Bratthammer, Mr. Bernt",male,,0,0,65306,8.1125,,S
446,1,1,"Dodge, Master. Washington",male,4,0,2,33638,81.8583,A34,S
447,1,2,"Mellinger, Miss. Madeleine Violet",female,13,0,1,250644,19.5,,S
448,1,1,"Seward, Mr. Frederic Kimber",male,34,0,0,113794,26.55,,S
449,1,3,"Baclini, Miss. Marie Catherine",female,5,2,1,2666,19.2583,,C
450,1,1,"Peuchen, Major. Arthur Godfrey",male,52,0,0,113786,30.5,C104,S
451,0,2,"West, Mr. Edwy Arthur",male,36,1,2,C.A. 34651,27.75,,S
452,0,3,"Hagland, Mr. Ingvald Olai Olsen",male,,1,0,65303,19.9667,,S
453,0,1,"Foreman, Mr. Benjamin Laventall",male,30,0,0,113051,27.75,C111,C
454,1,1,"Goldenberg, Mr. Samuel L",male,49,1,0,17453,89.1042,C92,C
455,0,3,"Peduzzi, Mr. Joseph",male,,0,0,A/5 2817,8.05,,S
456,1,3,"Jalsevac, Mr. Ivan",male,29,0,0,349240,7.8958,,C
457,0,1,"Millet, Mr. Francis Davis",male,65,0,0,13509,26.55,E38,S
458,1,1,"Kenyon, Mrs. Frederick R (Marion)",female,,1,0,17464,51.8625,D21,S
459,1,2,"Toomey, Miss. Ellen",female,50,0,0,F.C.C. 13531,10.5,,S
460,0,3,"O'Connor, Mr. Maurice",male,,0,0,371060,7.75,,Q
461,1,1,"Anderson, Mr. Harry",male,48,0,0,19952,26.55,E12,S
462,0,3,"Morley, Mr. William",male,34,0,0,364506,8.05,,S
463,0,1,"Gee, Mr. Arthur H",male,47,0,0,111320,38.5,E63,S
464,0,2,"Milling, Mr. Jacob Christian",male,48,0,0,234360,13,,S
465,0,3,"Maisner, Mr. Simon",male,,0,0,A/S 2816,8.05,,S
466,0,3,"Goncalves, Mr. Manuel Estanslas",male,38,0,0,SOTON/O.Q. 3101306,7.05,,S
467,0,2,"Campbell, Mr. William",male,,0,0,239853,0,,S
468,0,1,"Smart, Mr. John Montgomery",male,56,0,0,113792,26.55,,S
469,0,3,"Scanlan, Mr. James",male,,0,0,36209,7.725,,Q
470,1,3,"Baclini, Miss. Helene Barbara",female,0.75,2,1,2666,19.2583,,C
471,0,3,"Keefe, Mr. Arthur",male,,0,0,323592,7.25,,S
472,0,3,"Cacic, Mr. Luka",male,38,0,0,315089,8.6625,,S
473,1,2,"West, Mrs. Edwy Arthur (Ada Mary Worth)",female,33,1,2,C.A. 34651,27.75,,S
474,1,2,"Jerwan, Mrs. Amin S (Marie Marthe Thuillard)",female,23,0,0,SC/AH Basle 541,13.7917,D,C
475,0,3,"Strandberg, Miss. Ida Sofia",female,22,0,0,7553,9.8375,,S
476,0,1,"Clifford, Mr. George Quincy",male,,0,0,110465,52,A14,S
477,0,2,"Renouf, Mr. Peter Henry",male,34,1,0,31027,21,,S
478,0,3,"Braund, Mr. Lewis Richard",male,29,1,0,3460,7.0458,,S
479,0,3,"Karlsson, Mr. Nils August",male,22,0,0,350060,7.5208,,S
480,1,3,"Hirvonen, Miss. Hildur E",female,2,0,1,3101298,12.2875,,S
481,0,3,"Goodwin, Master. Harold Victor",male,9,5,2,CA 2144,46.9,,S
482,0,2,"Frost, Mr. Anthony Wood ""Archie""",male,,0,0,239854,0,,S
483,0,3,"Rouse, Mr. Richard Henry",male,50,0,0,A/5 3594,8.05,,S
484,1,3,"Turkula, Mrs. (Hedwig)",female,63,0,0,4134,9.5875,,S
485,1,1,"Bishop, Mr. Dickinson H",male,25,1,0,11967,91.0792,B49,C
486,0,3,"Lefebre, Miss. Jeannie",female,,3,1,4133,25.4667,,S
487,1,1,"Hoyt, Mrs. Frederick Maxfield (Jane Anne Forby)",female,35,1,0,19943,90,C93,S
488,0,1,"Kent, Mr. Edward Austin",male,58,0,0,11771,29.7,B37,C
489,0,3,"Somerton, Mr. Francis William",male,30,0,0,A.5. 18509,8.05,,S
490,1,3,"Coutts, Master. Eden Leslie ""Neville""",male,9,1,1,C.A. 37671,15.9,,S
491,0,3,"Hagland, Mr. Konrad Mathias Reiersen",male,,1,0,65304,19.9667,,S
492,0,3,"Windelov, Mr. Einar",male,21,0,0,SOTON/OQ 3101317,7.25,,S
493,0,1,"Molson, Mr. Harry Markland",male,55,0,0,113787,30.5,C30,S
494,0,1,"Artagaveytia, Mr. Ramon",male,71,0,0,PC 17609,49.5042,,C
495,0,3,"Stanley, Mr. Edward Roland",male,21,0,0,A/4 45380,8.05,,S
496,0,3,"Yousseff, Mr. Gerious",male,,0,0,2627,14.4583,,C
497,1,1,"Eustis, Miss. Elizabeth Mussey",female,54,1,0,36947,78.2667,D20,C
498,0,3,"Shellard, Mr. Frederick William",male,,0,0,C.A. 6212,15.1,,S
499,0,1,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25,1,2,113781,151.55,C22 C26,S
500,0,3,"Svensson, Mr. Olof",male,24,0,0,350035,7.7958,,S
501,0,3,"Calic, Mr. Petar",male,17,0,0,315086,8.6625,,S
502,0,3,"Canavan, Miss. Mary",female,21,0,0,364846,7.75,,Q
503,0,3,"O'Sullivan, Miss. Bridget Mary",female,,0,0,330909,7.6292,,Q
504,0,3,"Laitinen, Miss. Kristina Sofia",female,37,0,0,4135,9.5875,,S
505,1,1,"Maioni, Miss. Roberta",female,16,0,0,110152,86.5,B79,S
506,0,1,"Penasco y Castellana, Mr. Victor de Satode",male,18,1,0,PC 17758,108.9,C65,C
507,1,2,"Quick, Mrs. Frederick Charles (Jane Richards)",female,33,0,2,26360,26,,S
508,1,1,"Bradley, Mr. George (""George Arthur Brayton"")",male,,0,0,111427,26.55,,S
509,0,3,"Olsen, Mr. Henry Margido",male,28,0,0,C 4001,22.525,,S
510,1,3,"Lang, Mr. Fang",male,26,0,0,1601,56.4958,,S
511,1,3,"Daly, Mr. Eugene Patrick",male,29,0,0,382651,7.75,,Q
512,0,3,"Webber, Mr. James",male,,0,0,SOTON/OQ 3101316,8.05,,S
513,1,1,"McGough, Mr. James Robert",male,36,0,0,PC 17473,26.2875,E25,S
514,1,1,"Rothschild, Mrs. Martin (Elizabeth L. Barrett)",female,54,1,0,PC 17603,59.4,,C
515,0,3,"Coleff, Mr. Satio",male,24,0,0,349209,7.4958,,S
516,0,1,"Walker, Mr. William Anderson",male,47,0,0,36967,34.0208,D46,S
517,1,2,"Lemore, Mrs. (Amelia Milley)",female,34,0,0,C.A. 34260,10.5,F33,S
518,0,3,"Ryan, Mr. Patrick",male,,0,0,371110,24.15,,Q
519,1,2,"Angle, Mrs. William A (Florence ""Mary"" Agnes Hughes)",female,36,1,0,226875,26,,S
520,0,3,"Pavlovic, Mr. Stefo",male,32,0,0,349242,7.8958,,S
521,1,1,"Perreault, Miss. Anne",female,30,0,0,12749,93.5,B73,S
522,0,3,"Vovk, Mr. Janko",male,22,0,0,349252,7.8958,,S
523,0,3,"Lahoud, Mr. Sarkis",male,,0,0,2624,7.225,,C
524,1,1,"Hippach, Mrs. Louis Albert (Ida Sophia Fischer)",female,44,0,1,111361,57.9792,B18,C
525,0,3,"Kassem, Mr. Fared",male,,0,0,2700,7.2292,,C
526,0,3,"Farrell, Mr. James",male,40.5,0,0,367232,7.75,,Q
527,1,2,"Ridsdale, Miss. Lucy",female,50,0,0,W./C. 14258,10.5,,S
528,0,1,"Farthing, Mr. John",male,,0,0,PC 17483,221.7792,C95,S
529,0,3,"Salonen, Mr. Johan Werner",male,39,0,0,3101296,7.925,,S
530,0,2,"Hocking, Mr. Richard George",male,23,2,1,29104,11.5,,S
531,1,2,"Quick, Miss. Phyllis May",female,2,1,1,26360,26,,S
532,0,3,"Toufik, Mr. Nakli",male,,0,0,2641,7.2292,,C
533,0,3,"Elias, Mr. Joseph Jr",male,17,1,1,2690,7.2292,,C
534,1,3,"Peter, Mrs. Catherine (Catherine Rizk)",female,,0,2,2668,22.3583,,C
535,0,3,"Cacic, Miss. Marija",female,30,0,0,315084,8.6625,,S
536,1,2,"Hart, Miss. Eva Miriam",female,7,0,2,F.C.C. 13529,26.25,,S
537,0,1,"Butt, Major. Archibald Willingham",male,45,0,0,113050,26.55,B38,S
538,1,1,"LeRoy, Miss. Bertha",female,30,0,0,PC 17761,106.425,,C
539,0,3,"Risien, Mr. Samuel Beard",male,,0,0,364498,14.5,,S
540,1,1,"Frolicher, Miss. Hedwig Margaritha",female,22,0,2,13568,49.5,B39,C
541,1,1,"Crosby, Miss. Harriet R",female,36,0,2,WE/P 5735,71,B22,S
542,0,3,"Andersson, Miss. Ingeborg Constanzia",female,9,4,2,347082,31.275,,S
543,0,3,"Andersson, Miss. Sigrid Elisabeth",female,11,4,2,347082,31.275,,S
544,1,2,"Beane, Mr. Edward",male,32,1,0,2908,26,,S
545,0,1,"Douglas, Mr. Walter Donald",male,50,1,0,PC 17761,106.425,C86,C
546,0,1,"Nicholson, Mr. Arthur Ernest",male,64,0,0,693,26,,S
547,1,2,"Beane, Mrs. Edward (Ethel Clarke)",female,19,1,0,2908,26,,S
548,1,2,"Padro y Manent, Mr. Julian",male,,0,0,SC/PARIS 2146,13.8625,,C
549,0,3,"Goldsmith, Mr. Frank John",male,33,1,1,363291,20.525,,S
550,1,2,"Davies, Master. John Morgan Jr",male,8,1,1,C.A. 33112,36.75,,S
551,1,1,"Thayer, Mr. John Borland Jr",male,17,0,2,17421,110.8833,C70,C
552,0,2,"Sharp, Mr. Percival James R",male,27,0,0,244358,26,,S
553,0,3,"O'Brien, Mr. Timothy",male,,0,0,330979,7.8292,,Q
554,1,3,"Leeni, Mr. Fahim (""Philip Zenni"")",male,22,0,0,2620,7.225,,C
555,1,3,"Ohman, Miss. Velin",female,22,0,0,347085,7.775,,S
556,0,1,"Wright, Mr. George",male,62,0,0,113807,26.55,,S
557,1,1,"Duff Gordon, Lady. (Lucille Christiana Sutherland) (""Mrs Morgan"")",female,48,1,0,11755,39.6,A16,C
558,0,1,"Robbins, Mr. Victor",male,,0,0,PC 17757,227.525,,C
559,1,1,"Taussig, Mrs. Emil (Tillie Mandelbaum)",female,39,1,1,110413,79.65,E67,S
560,1,3,"de Messemaeker, Mrs. Guillaume Joseph (Emma)",female,36,1,0,345572,17.4,,S
561,0,3,"Morrow, Mr. Thomas Rowan",male,,0,0,372622,7.75,,Q
562,0,3,"Sivic, Mr. Husein",male,40,0,0,349251,7.8958,,S
563,0,2,"Norman, Mr. Robert Douglas",male,28,0,0,218629,13.5,,S
564,0,3,"Simmons, Mr. John",male,,0,0,SOTON/OQ 392082,8.05,,S
565,0,3,"Meanwell, Miss. (Marion Ogden)",female,,0,0,SOTON/O.Q. 392087,8.05,,S
566,0,3,"Davies, Mr. Alfred J",male,24,2,0,A/4 48871,24.15,,S
567,0,3,"Stoytcheff, Mr. Ilia",male,19,0,0,349205,7.8958,,S
568,0,3,"Palsson, Mrs. Nils (Alma Cornelia Berglund)",female,29,0,4,349909,21.075,,S
569,0,3,"Doharr, Mr. Tannous",male,,0,0,2686,7.2292,,C
570,1,3,"Jonsson, Mr. Carl",male,32,0,0,350417,7.8542,,S
571,1,2,"Harris, Mr. George",male,62,0,0,S.W./PP 752,10.5,,S
572,1,1,"Appleton, Mrs. Edward Dale (Charlotte Lamson)",female,53,2,0,11769,51.4792,C101,S
573,1,1,"Flynn, Mr. John Irwin (""Irving"")",male,36,0,0,PC 17474,26.3875,E25,S
574,1,3,"Kelly, Miss. Mary",female,,0,0,14312,7.75,,Q
575,0,3,"Rush, Mr. Alfred George John",male,16,0,0,A/4. 20589,8.05,,S
576,0,3,"Patchett, Mr. George",male,19,0,0,358585,14.5,,S
577,1,2,"Garside, Miss. Ethel",female,34,0,0,243880,13,,S
578,1,1,"Silvey, Mrs. William Baird (Alice Munger)",female,39,1,0,13507,55.9,E44,S
579,0,3,"Caram, Mrs. Joseph (Maria Elias)",female,,1,0,2689,14.4583,,C
580,1,3,"Jussila, Mr. Eiriik",male,32,0,0,STON/O 2. 3101286,7.925,,S
581,1,2,"Christy, Miss. Julie Rachel",female,25,1,1,237789,30,,S
582,1,1,"Thayer, Mrs. John Borland (Marian Longstreth Morris)",female,39,1,1,17421,110.8833,C68,C
583,0,2,"Downton, Mr. William James",male,54,0,0,28403,26,,S
584,0,1,"Ross, Mr. John Hugo",male,36,0,0,13049,40.125,A10,C
585,0,3,"Paulner, Mr. Uscher",male,,0,0,3411,8.7125,,C
586,1,1,"Taussig, Miss. Ruth",female,18,0,2,110413,79.65,E68,S
587,0,2,"Jarvis, Mr. John Denzil",male,47,0,0,237565,15,,S
588,1,1,"Frolicher-Stehli, Mr. Maxmillian",male,60,1,1,13567,79.2,B41,C
589,0,3,"Gilinski, Mr. Eliezer",male,22,0,0,14973,8.05,,S
590,0,3,"Murdlin, Mr. Joseph",male,,0,0,A./5. 3235,8.05,,S
591,0,3,"Rintamaki, Mr. Matti",male,35,0,0,STON/O 2. 3101273,7.125,,S
592,1,1,"Stephenson, Mrs. Walter Bertram (Martha Eustis)",female,52,1,0,36947,78.2667,D20,C
593,0,3,"Elsbury, Mr. William James",male,47,0,0,A/5 3902,7.25,,S
594,0,3,"Bourke, Miss. Mary",female,,0,2,364848,7.75,,Q
595,0,2,"Chapman, Mr. John Henry",male,37,1,0,SC/AH 29037,26,,S
596,0,3,"Van Impe, Mr. Jean Baptiste",male,36,1,1,345773,24.15,,S
597,1,2,"Leitch, Miss. Jessie Wills",female,,0,0,248727,33,,S
598,0,3,"Johnson, Mr. Alfred",male,49,0,0,LINE,0,,S
599,0,3,"Boulos, Mr. Hanna",male,,0,0,2664,7.225,,C
600,1,1,"Duff Gordon, Sir. Cosmo Edmund (""Mr Morgan"")",male,49,1,0,PC 17485,56.9292,A20,C
601,1,2,"Jacobsohn, Mrs. Sidney Samuel (Amy Frances Christy)",female,24,2,1,243847,27,,S
602,0,3,"Slabenoff, Mr. Petco",male,,0,0,349214,7.8958,,S
603,0,1,"Harrington, Mr. Charles H",male,,0,0,113796,42.4,,S
604,0,3,"Torber, Mr. Ernst William",male,44,0,0,364511,8.05,,S
605,1,1,"Homer, Mr. Harry (""Mr E Haven"")",male,35,0,0,111426,26.55,,C
606,0,3,"Lindell, Mr. Edvard Bengtsson",male,36,1,0,349910,15.55,,S
607,0,3,"Karaic, Mr. Milan",male,30,0,0,349246,7.8958,,S
608,1,1,"Daniel, Mr. Robert Williams",male,27,0,0,113804,30.5,,S
609,1,2,"Laroche, Mrs. Joseph (Juliette Marie Louise Lafargue)",female,22,1,2,SC/Paris 2123,41.5792,,C
610,1,1,"Shutes, Miss. Elizabeth W",female,40,0,0,PC 17582,153.4625,C125,S
611,0,3,"Andersson, Mrs. Anders Johan (Alfrida Konstantia Brogren)",female,39,1,5,347082,31.275,,S
612,0,3,"Jardin, Mr. Jose Neto",male,,0,0,SOTON/O.Q. 3101305,7.05,,S
613,1,3,"Murphy, Miss. Margaret Jane",female,,1,0,367230,15.5,,Q
614,0,3,"Horgan, Mr. John",male,,0,0,370377,7.75,,Q
615,0,3,"Brocklebank, Mr. William Alfred",male,35,0,0,364512,8.05,,S
616,1,2,"Herman, Miss. Alice",female,24,1,2,220845,65,,S
617,0,3,"Danbom, Mr. Ernst Gilbert",male,34,1,1,347080,14.4,,S
618,0,3,"Lobb, Mrs. William Arthur (Cordelia K Stanlick)",female,26,1,0,A/5. 3336,16.1,,S
619,1,2,"Becker, Miss. Marion Louise",female,4,2,1,230136,39,F4,S
620,0,2,"Gavey, Mr. Lawrence",male,26,0,0,31028,10.5,,S
621,0,3,"Yasbeck, Mr. Antoni",male,27,1,0,2659,14.4542,,C
622,1,1,"Kimball, Mr. Edwin Nelson Jr",male,42,1,0,11753,52.5542,D19,S
623,1,3,"Nakid, Mr. Sahid",male,20,1,1,2653,15.7417,,C
624,0,3,"Hansen, Mr. Henry Damsgaard",male,21,0,0,350029,7.8542,,S
625,0,3,"Bowen, Mr. David John ""Dai""",male,21,0,0,54636,16.1,,S
626,0,1,"Sutton, Mr. Frederick",male,61,0,0,36963,32.3208,D50,S
627,0,2,"Kirkland, Rev. Charles Leonard",male,57,0,0,219533,12.35,,Q
628,1,1,"Longley, Miss. Gretchen Fiske",female,21,0,0,13502,77.9583,D9,S
629,0,3,"Bostandyeff, Mr. Guentcho",male,26,0,0,349224,7.8958,,S
630,0,3,"O'Connell, Mr. Patrick D",male,,0,0,334912,7.7333,,Q
631,1,1,"Barkworth, Mr. Algernon Henry Wilson",male,80,0,0,27042,30,A23,S
632,0,3,"Lundahl, Mr. Johan Svensson",male,51,0,0,347743,7.0542,,S
633,1,1,"Stahelin-Maeglin, Dr. Max",male,32,0,0,13214,30.5,B50,C
634,0,1,"Parr, Mr. William Henry Marsh",male,,0,0,112052,0,,S
635,0,3,"Skoog, Miss. Mabel",female,9,3,2,347088,27.9,,S
636,1,2,"Davis, Miss. Mary",female,28,0,0,237668,13,,S
637,0,3,"Leinonen, Mr. Antti Gustaf",male,32,0,0,STON/O 2. 3101292,7.925,,S
638,0,2,"Collyer, Mr. Harvey",male,31,1,1,C.A. 31921,26.25,,S
639,0,3,"Panula, Mrs. Juha (Maria Emilia Ojala)",female,41,0,5,3101295,39.6875,,S
640,0,3,"Thorneycroft, Mr. Percival",male,,1,0,376564,16.1,,S
641,0,3,"Jensen, Mr. Hans Peder",male,20,0,0,350050,7.8542,,S
642,1,1,"Sagesser, Mlle. Emma",female,24,0,0,PC 17477,69.3,B35,C
643,0,3,"Skoog, Miss. Margit Elizabeth",female,2,3,2,347088,27.9,,S
644,1,3,"Foo, Mr. Choong",male,,0,0,1601,56.4958,,S
645,1,3,"Baclini, Miss. Eugenie",female,0.75,2,1,2666,19.2583,,C
646,1,1,"Harper, Mr. Henry Sleeper",male,48,1,0,PC 17572,76.7292,D33,C
647,0,3,"Cor, Mr. Liudevit",male,19,0,0,349231,7.8958,,S
648,1,1,"Simonius-Blumer, Col. Oberst Alfons",male,56,0,0,13213,35.5,A26,C
649,0,3,"Willey, Mr. Edward",male,,0,0,S.O./P.P. 751,7.55,,S
650,1,3,"Stanley, Miss. Amy Zillah Elsie",female,23,0,0,CA. 2314,7.55,,S
651,0,3,"Mitkoff, Mr. Mito",male,,0,0,349221,7.8958,,S
652,1,2,"Doling, Miss. Elsie",female,18,0,1,231919,23,,S
653,0,3,"Kalvik, Mr. Johannes Halvorsen",male,21,0,0,8475,8.4333,,S
654,1,3,"O'Leary, Miss. Hanora ""Norah""",female,,0,0,330919,7.8292,,Q
655,0,3,"Hegarty, Miss. Hanora ""Nora""",female,18,0,0,365226,6.75,,Q
656,0,2,"Hickman, Mr. Leonard Mark",male,24,2,0,S.O.C. 14879,73.5,,S
657,0,3,"Radeff, Mr. Alexander",male,,0,0,349223,7.8958,,S
658,0,3,"Bourke, Mrs. John (Catherine)",female,32,1,1,364849,15.5,,Q
659,0,2,"Eitemiller, Mr. George Floyd",male,23,0,0,29751,13,,S
660,0,1,"Newell, Mr. Arthur Webster",male,58,0,2,35273,113.275,D48,C
661,1,1,"Frauenthal, Dr. Henry William",male,50,2,0,PC 17611,133.65,,S
662,0,3,"Badt, Mr. Mohamed",male,40,0,0,2623,7.225,,C
663,0,1,"Colley, Mr. Edward Pomeroy",male,47,0,0,5727,25.5875,E58,S
664,0,3,"Coleff, Mr. Peju",male,36,0,0,349210,7.4958,,S
665,1,3,"Lindqvist, Mr. Eino William",male,20,1,0,STON/O 2. 3101285,7.925,,S
666,0,2,"Hickman, Mr. Lewis",male,32,2,0,S.O.C. 14879,73.5,,S
667,0,2,"Butler, Mr. Reginald Fenton",male,25,0,0,234686,13,,S
668,0,3,"Rommetvedt, Mr. Knud Paust",male,,0,0,312993,7.775,,S
669,0,3,"Cook, Mr. Jacob",male,43,0,0,A/5 3536,8.05,,S
670,1,1,"Taylor, Mrs. Elmer Zebley (Juliet Cummins Wright)",female,,1,0,19996,52,C126,S
671,1,2,"Brown, Mrs. Thomas William Solomon (Elizabeth Catherine Ford)",female,40,1,1,29750,39,,S
672,0,1,"Davidson, Mr. Thornton",male,31,1,0,F.C. 12750,52,B71,S
673,0,2,"Mitchell, Mr. Henry Michael",male,70,0,0,C.A. 24580,10.5,,S
674,1,2,"Wilhelms, Mr. Charles",male,31,0,0,244270,13,,S
675,0,2,"Watson, Mr. Ennis Hastings",male,,0,0,239856,0,,S
676,0,3,"Edvardsson, Mr. Gustaf Hjalmar",male,18,0,0,349912,7.775,,S
677,0,3,"Sawyer, Mr. Frederick Charles",male,24.5,0,0,342826,8.05,,S
678,1,3,"Turja, Miss. Anna Sofia",female,18,0,0,4138,9.8417,,S
679,0,3,"Goodwin, Mrs. Frederick (Augusta Tyler)",female,43,1,6,CA 2144,46.9,,S
680,1,1,"Cardeza, Mr. Thomas Drake Martinez",male,36,0,1,PC 17755,512.3292,B51 B53 B55,C
681,0,3,"Peters, Miss. Katie",female,,0,0,330935,8.1375,,Q
682,1,1,"Hassab, Mr. Hammad",male,27,0,0,PC 17572,76.7292,D49,C
683,0,3,"Olsvigen, Mr. Thor Anderson",male,20,0,0,6563,9.225,,S
684,0,3,"Goodwin, Mr. Charles Edward",male,14,5,2,CA 2144,46.9,,S
685,0,2,"Brown, Mr. Thomas William Solomon",male,60,1,1,29750,39,,S
686,0,2,"Laroche, Mr. Joseph Philippe Lemercier",male,25,1,2,SC/Paris 2123,41.5792,,C
687,0,3,"Panula, Mr. Jaako Arnold",male,14,4,1,3101295,39.6875,,S
688,0,3,"Dakic, Mr. Branko",male,19,0,0,349228,10.1708,,S
689,0,3,"Fischer, Mr. Eberhard Thelander",male,18,0,0,350036,7.7958,,S
690,1,1,"Madill, Miss. Georgette Alexandra",female,15,0,1,24160,211.3375,B5,S
691,1,1,"Dick, Mr. Albert Adrian",male,31,1,0,17474,57,B20,S
692,1,3,"Karun, Miss. Manca",female,4,0,1,349256,13.4167,,C
693,1,3,"Lam, Mr. Ali",male,,0,0,1601,56.4958,,S
694,0,3,"Saad, Mr. Khalil",male,25,0,0,2672,7.225,,C
695,0,1,"Weir, Col. John",male,60,0,0,113800,26.55,,S
696,0,2,"Chapman, Mr. Charles Henry",male,52,0,0,248731,13.5,,S
697,0,3,"Kelly, Mr. James",male,44,0,0,363592,8.05,,S
698,1,3,"Mullens, Miss. Katherine ""Katie""",female,,0,0,35852,7.7333,,Q
699,0,1,"Thayer, Mr. John Borland",male,49,1,1,17421,110.8833,C68,C
700,0,3,"Humblen, Mr. Adolf Mathias Nicolai Olsen",male,42,0,0,348121,7.65,F G63,S
701,1,1,"Astor, Mrs. John Jacob (Madeleine Talmadge Force)",female,18,1,0,PC 17757,227.525,C62 C64,C
702,1,1,"Silverthorne, Mr. Spencer Victor",male,35,0,0,PC 17475,26.2875,E24,S
703,0,3,"Barbara, Miss. Saiide",female,18,0,1,2691,14.4542,,C
704,0,3,"Gallagher, Mr. Martin",male,25,0,0,36864,7.7417,,Q
705,0,3,"Hansen, Mr. Henrik Juul",male,26,1,0,350025,7.8542,,S
706,0,2,"Morley, Mr. Henry Samuel (""Mr Henry Marshall"")",male,39,0,0,250655,26,,S
707,1,2,"Kelly, Mrs. Florence ""Fannie""",female,45,0,0,223596,13.5,,S
708,1,1,"Calderhead, Mr. Edward Pennington",male,42,0,0,PC 17476,26.2875,E24,S
709,1,1,"Cleaver, Miss. Alice",female,22,0,0,113781,151.55,,S
710,1,3,"Moubarek, Master. Halim Gonios (""William George"")",male,,1,1,2661,15.2458,,C
711,1,1,"Mayne, Mlle. Berthe Antonine (""Mrs de Villiers"")",female,24,0,0,PC 17482,49.5042,C90,C
712,0,1,"Klaber, Mr. Herman",male,,0,0,113028,26.55,C124,S
713,1,1,"Taylor, Mr. Elmer Zebley",male,48,1,0,19996,52,C126,S
714,0,3,"Larsson, Mr. August Viktor",male,29,0,0,7545,9.4833,,S
715,0,2,"Greenberg, Mr. Samuel",male,52,0,0,250647,13,,S
716,0,3,"Soholt, Mr. Peter Andreas Lauritz Andersen",male,19,0,0,348124,7.65,F G73,S
717,1,1,"Endres, Miss. Caroline Louise",female,38,0,0,PC 17757,227.525,C45,C
718,1,2,"Troutt, Miss. Edwina Celia ""Winnie""",female,27,0,0,34218,10.5,E101,S
719,0,3,"McEvoy, Mr. Michael",male,,0,0,36568,15.5,,Q
720,0,3,"Johnson, Mr. Malkolm Joackim",male,33,0,0,347062,7.775,,S
721,1,2,"Harper, Miss. Annie Jessie ""Nina""",female,6,0,1,248727,33,,S
722,0,3,"Jensen, Mr. Svend Lauritz",male,17,1,0,350048,7.0542,,S
723,0,2,"Gillespie, Mr. William Henry",male,34,0,0,12233,13,,S
724,0,2,"Hodges, Mr. Henry Price",male,50,0,0,250643,13,,S
725,1,1,"Chambers, Mr. Norman Campbell",male,27,1,0,113806,53.1,E8,S
726,0,3,"Oreskovic, Mr. Luka",male,20,0,0,315094,8.6625,,S
727,1,2,"Renouf, Mrs. Peter Henry (Lillian Jefferys)",female,30,3,0,31027,21,,S
728,1,3,"Mannion, Miss. Margareth",female,,0,0,36866,7.7375,,Q
729,0,2,"Bryhl, Mr. Kurt Arnold Gottfrid",male,25,1,0,236853,26,,S
730,0,3,"Ilmakangas, Miss. Pieta Sofia",female,25,1,0,STON/O2. 3101271,7.925,,S
731,1,1,"Allen, Miss. Elisabeth Walton",female,29,0,0,24160,211.3375,B5,S
732,0,3,"Hassan, Mr. Houssein G N",male,11,0,0,2699,18.7875,,C
733,0,2,"Knight, Mr. Robert J",male,,0,0,239855,0,,S
734,0,2,"Berriman, Mr. William John",male,23,0,0,28425,13,,S
735,0,2,"Troupiansky, Mr. Moses Aaron",male,23,0,0,233639,13,,S
736,0,3,"Williams, Mr. Leslie",male,28.5,0,0,54636,16.1,,S
737,0,3,"Ford, Mrs. Edward (Margaret Ann Watson)",female,48,1,3,W./C. 6608,34.375,,S
738,1,1,"Lesurer, Mr. Gustave J",male,35,0,0,PC 17755,512.3292,B101,C
739,0,3,"Ivanoff, Mr. Kanio",male,,0,0,349201,7.8958,,S
740,0,3,"Nankoff, Mr. Minko",male,,0,0,349218,7.8958,,S
741,1,1,"Hawksford, Mr. Walter James",male,,0,0,16988,30,D45,S
742,0,1,"Cavendish, Mr. Tyrell William",male,36,1,0,19877,78.85,C46,S
743,1,1,"Ryerson, Miss. Susan Parker ""Suzette""",female,21,2,2,PC 17608,262.375,B57 B59 B63 B66,C
744,0,3,"McNamee, Mr. Neal",male,24,1,0,376566,16.1,,S
745,1,3,"Stranden, Mr. Juho",male,31,0,0,STON/O 2. 3101288,7.925,,S
746,0,1,"Crosby, Capt. Edward Gifford",male,70,1,1,WE/P 5735,71,B22,S
747,0,3,"Abbott, Mr. Rossmore Edward",male,16,1,1,C.A. 2673,20.25,,S
748,1,2,"Sinkkonen, Miss. Anna",female,30,0,0,250648,13,,S
749,0,1,"Marvin, Mr. Daniel Warner",male,19,1,0,113773,53.1,D30,S
750,0,3,"Connaghton, Mr. Michael",male,31,0,0,335097,7.75,,Q
751,1,2,"Wells, Miss. Joan",female,4,1,1,29103,23,,S
752,1,3,"Moor, Master. Meier",male,6,0,1,392096,12.475,E121,S
753,0,3,"Vande Velde, Mr. Johannes Joseph",male,33,0,0,345780,9.5,,S
754,0,3,"Jonkoff, Mr. Lalio",male,23,0,0,349204,7.8958,,S
755,1,2,"Herman, Mrs. Samuel (Jane Laver)",female,48,1,2,220845,65,,S
756,1,2,"Hamalainen, Master. Viljo",male,0.67,1,1,250649,14.5,,S
757,0,3,"Carlsson, Mr. August Sigfrid",male,28,0,0,350042,7.7958,,S
758,0,2,"Bailey, Mr. Percy Andrew",male,18,0,0,29108,11.5,,S
759,0,3,"Theobald, Mr. Thomas Leonard",male,34,0,0,363294,8.05,,S
760,1,1,"Rothes, the Countess. of (Lucy Noel Martha Dyer-Edwards)",female,33,0,0,110152,86.5,B77,S
761,0,3,"Garfirth, Mr. John",male,,0,0,358585,14.5,,S
762,0,3,"Nirva, Mr. Iisakki Antino Aijo",male,41,0,0,SOTON/O2 3101272,7.125,,S
763,1,3,"Barah, Mr. Hanna Assi",male,20,0,0,2663,7.2292,,C
764,1,1,"Carter, Mrs. William Ernest (Lucile Polk)",female,36,1,2,113760,120,B96 B98,S
765,0,3,"Eklund, Mr. Hans Linus",male,16,0,0,347074,7.775,,S
766,1,1,"Hogeboom, Mrs. John C (Anna Andrews)",female,51,1,0,13502,77.9583,D11,S
767,0,1,"Brewe, Dr. Arthur Jackson",male,,0,0,112379,39.6,,C
768,0,3,"Mangan, Miss. Mary",female,30.5,0,0,364850,7.75,,Q
769,0,3,"Moran, Mr. Daniel J",male,,1,0,371110,24.15,,Q
770,0,3,"Gronnestad, Mr. Daniel Danielsen",male,32,0,0,8471,8.3625,,S
771,0,3,"Lievens, Mr. Rene Aime",male,24,0,0,345781,9.5,,S
772,0,3,"Jensen, Mr. Niels Peder",male,48,0,0,350047,7.8542,,S
773,0,2,"Mack, Mrs. (Mary)",female,57,0,0,S.O./P.P. 3,10.5,E77,S
774,0,3,"Elias, Mr. Dibo",male,,0,0,2674,7.225,,C
775,1,2,"Hocking, Mrs. Elizabeth (Eliza Needs)",female,54,1,3,29105,23,,S
776,0,3,"Myhrman, Mr. Pehr Fabian Oliver Malkolm",male,18,0,0,347078,7.75,,S
777,0,3,"Tobin, Mr. Roger",male,,0,0,383121,7.75,F38,Q
778,1,3,"Emanuel, Miss. Virginia Ethel",female,5,0,0,364516,12.475,,S
779,0,3,"Kilgannon, Mr. Thomas J",male,,0,0,36865,7.7375,,Q
780,1,1,"Robert, Mrs. Edward Scott (Elisabeth Walton McMillan)",female,43,0,1,24160,211.3375,B3,S
781,1,3,"Ayoub, Miss. Banoura",female,13,0,0,2687,7.2292,,C
782,1,1,"Dick, Mrs. Albert Adrian (Vera Gillespie)",female,17,1,0,17474,57,B20,S
783,0,1,"Long, Mr. Milton Clyde",male,29,0,0,113501,30,D6,S
784,0,3,"Johnston, Mr. Andrew G",male,,1,2,W./C. 6607,23.45,,S
785,0,3,"Ali, Mr. William",male,25,0,0,SOTON/O.Q. 3101312,7.05,,S
786,0,3,"Harmer, Mr. Abraham (David Lishin)",male,25,0,0,374887,7.25,,S
787,1,3,"Sjoblom, Miss. Anna Sofia",female,18,0,0,3101265,7.4958,,S
788,0,3,"Rice, Master. George Hugh",male,8,4,1,382652,29.125,,Q
789,1,3,"Dean, Master. Bertram Vere",male,1,1,2,C.A. 2315,20.575,,S
790,0,1,"Guggenheim, Mr. Benjamin",male,46,0,0,PC 17593,79.2,B82 B84,C
791,0,3,"Keane, Mr. Andrew ""Andy""",male,,0,0,12460,7.75,,Q
792,0,2,"Gaskell, Mr. Alfred",male,16,0,0,239865,26,,S
793,0,3,"Sage, Miss. Stella Anna",female,,8,2,CA. 2343,69.55,,S
794,0,1,"Hoyt, Mr. William Fisher",male,,0,0,PC 17600,30.6958,,C
795,0,3,"Dantcheff, Mr. Ristiu",male,25,0,0,349203,7.8958,,S
796,0,2,"Otter, Mr. Richard",male,39,0,0,28213,13,,S
797,1,1,"Leader, Dr. Alice (Farnham)",female,49,0,0,17465,25.9292,D17,S
798,1,3,"Osman, Mrs. Mara",female,31,0,0,349244,8.6833,,S
799,0,3,"Ibrahim Shawah, Mr. Yousseff",male,30,0,0,2685,7.2292,,C
800,0,3,"Van Impe, Mrs. Jean Baptiste (Rosalie Paula Govaert)",female,30,1,1,345773,24.15,,S
801,0,2,"Ponesell, Mr. Martin",male,34,0,0,250647,13,,S
802,1,2,"Collyer, Mrs. Harvey (Charlotte Annie Tate)",female,31,1,1,C.A. 31921,26.25,,S
803,1,1,"Carter, Master. William Thornton II",male,11,1,2,113760,120,B96 B98,S
804,1,3,"Thomas, Master. Assad Alexander",male,0.42,0,1,2625,8.5167,,C
805,1,3,"Hedman, Mr. Oskar Arvid",male,27,0,0,347089,6.975,,S
806,0,3,"Johansson, Mr. Karl Johan",male,31,0,0,347063,7.775,,S
807,0,1,"Andrews, Mr. Thomas Jr",male,39,0,0,112050,0,A36,S
808,0,3,"Pettersson, Miss. Ellen Natalia",female,18,0,0,347087,7.775,,S
809,0,2,"Meyer, Mr. August",male,39,0,0,248723,13,,S
810,1,1,"Chambers, Mrs. Norman Campbell (Bertha Griggs)",female,33,1,0,113806,53.1,E8,S
811,0,3,"Alexander, Mr. William",male,26,0,0,3474,7.8875,,S
812,0,3,"Lester, Mr. James",male,39,0,0,A/4 48871,24.15,,S
813,0,2,"Slemen, Mr. Richard James",male,35,0,0,28206,10.5,,S
814,0,3,"Andersson, Miss. Ebba Iris Alfrida",female,6,4,2,347082,31.275,,S
815,0,3,"Tomlin, Mr. Ernest Portage",male,30.5,0,0,364499,8.05,,S
816,0,1,"Fry, Mr. Richard",male,,0,0,112058,0,B102,S
817,0,3,"Heininen, Miss. Wendla Maria",female,23,0,0,STON/O2. 3101290,7.925,,S
818,0,2,"Mallet, Mr. Albert",male,31,1,1,S.C./PARIS 2079,37.0042,,C
819,0,3,"Holm, Mr. John Fredrik Alexander",male,43,0,0,C 7075,6.45,,S
820,0,3,"Skoog, Master. Karl Thorsten",male,10,3,2,347088,27.9,,S
821,1,1,"Hays, Mrs. Charles Melville (Clara Jennings Gregg)",female,52,1,1,12749,93.5,B69,S
822,1,3,"Lulic, Mr. Nikola",male,27,0,0,315098,8.6625,,S
823,0,1,"Reuchlin, Jonkheer. John George",male,38,0,0,19972,0,,S
824,1,3,"Moor, Mrs. (Beila)",female,27,0,1,392096,12.475,E121,S
825,0,3,"Panula, Master. Urho Abraham",male,2,4,1,3101295,39.6875,,S
826,0,3,"Flynn, Mr. John",male,,0,0,368323,6.95,,Q
827,0,3,"Lam, Mr. Len",male,,0,0,1601,56.4958,,S
828,1,2,"Mallet, Master. Andre",male,1,0,2,S.C./PARIS 2079,37.0042,,C
829,1,3,"McCormack, Mr. Thomas Joseph",male,,0,0,367228,7.75,,Q
830,1,1,"Stone, Mrs. George Nelson (Martha Evelyn)",female,62,0,0,113572,80,B28,
831,1,3,"Yasbeck, Mrs. Antoni (Selini Alexander)",female,15,1,0,2659,14.4542,,C
832,1,2,"Richards, Master. George Sibley",male,0.83,1,1,29106,18.75,,S
833,0,3,"Saad, Mr. Amin",male,,0,0,2671,7.2292,,C
834,0,3,"Augustsson, Mr. Albert",male,23,0,0,347468,7.8542,,S
835,0,3,"Allum, Mr. Owen George",male,18,0,0,2223,8.3,,S
836,1,1,"Compton, Miss. Sara Rebecca",female,39,1,1,PC 17756,83.1583,E49,C
837,0,3,"Pasic, Mr. Jakob",male,21,0,0,315097,8.6625,,S
838,0,3,"Sirota, Mr. Maurice",male,,0,0,392092,8.05,,S
839,1,3,"Chip, Mr. Chang",male,32,0,0,1601,56.4958,,S
840,1,1,"Marechal, Mr. Pierre",male,,0,0,11774,29.7,C47,C
841,0,3,"Alhomaki, Mr. Ilmari Rudolf",male,20,0,0,SOTON/O2 3101287,7.925,,S
842,0,2,"Mudd, Mr. Thomas Charles",male,16,0,0,S.O./P.P. 3,10.5,,S
843,1,1,"Serepeca, Miss. Augusta",female,30,0,0,113798,31,,C
844,0,3,"Lemberopolous, Mr. Peter L",male,34.5,0,0,2683,6.4375,,C
845,0,3,"Culumovic, Mr. Jeso",male,17,0,0,315090,8.6625,,S
846,0,3,"Abbing, Mr. Anthony",male,42,0,0,C.A. 5547,7.55,,S
847,0,3,"Sage, Mr. Douglas Bullen",male,,8,2,CA. 2343,69.55,,S
848,0,3,"Markoff, Mr. Marin",male,35,0,0,349213,7.8958,,C
849,0,2,"Harper, Rev. John",male,28,0,1,248727,33,,S
850,1,1,"Goldenberg, Mrs. Samuel L (Edwiga Grabowska)",female,,1,0,17453,89.1042,C92,C
851,0,3,"Andersson, Master. Sigvard Harald Elias",male,4,4,2,347082,31.275,,S
852,0,3,"Svensson, Mr. Johan",male,74,0,0,347060,7.775,,S
853,0,3,"Boulos, Miss. Nourelain",female,9,1,1,2678,15.2458,,C
854,1,1,"Lines, Miss. Mary Conover",female,16,0,1,PC 17592,39.4,D28,S
855,0,2,"Carter, Mrs. Ernest Courtenay (Lilian Hughes)",female,44,1,0,244252,26,,S
856,1,3,"Aks, Mrs. Sam (Leah Rosen)",female,18,0,1,392091,9.35,,S
857,1,1,"Wick, Mrs. George Dennick (Mary Hitchcock)",female,45,1,1,36928,164.8667,,S
858,1,1,"Daly, Mr. Peter Denis ",male,51,0,0,113055,26.55,E17,S
859,1,3,"Baclini, Mrs. Solomon (Latifa Qurban)",female,24,0,3,2666,19.2583,,C
860,0,3,"Razi, Mr. Raihed",male,,0,0,2629,7.2292,,C
861,0,3,"Hansen, Mr. Claus Peter",male,41,2,0,350026,14.1083,,S
862,0,2,"Giles, Mr. Frederick Edward",male,21,1,0,28134,11.5,,S
863,1,1,"Swift, Mrs. Frederick Joel (Margaret Welles Barron)",female,48,0,0,17466,25.9292,D17,S
864,0,3,"Sage, Miss. Dorothy Edith ""Dolly""",female,,8,2,CA. 2343,69.55,,S
865,0,2,"Gill, Mr. John William",male,24,0,0,233866,13,,S
866,1,2,"Bystrom, Mrs. (Karolina)",female,42,0,0,236852,13,,S
867,1,2,"Duran y More, Miss. Asuncion",female,27,1,0,SC/PARIS 2149,13.8583,,C
868,0,1,"Roebling, Mr. Washington Augustus II",male,31,0,0,PC 17590,50.4958,A24,S
869,0,3,"van Melkebeke, Mr. Philemon",male,,0,0,345777,9.5,,S
870,1,3,"Johnson, Master. Harold Theodor",male,4,1,1,347742,11.1333,,S
871,0,3,"Balkic, Mr. Cerin",male,26,0,0,349248,7.8958,,S
872,1,1,"Beckwith, Mrs. Richard Leonard (Sallie Monypeny)",female,47,1,1,11751,52.5542,D35,S
873,0,1,"Carlsson, Mr. Frans Olof",male,33,0,0,695,5,B51 B53 B55,S
874,0,3,"Vander Cruyssen, Mr. Victor",male,47,0,0,345765,9,,S
875,1,2,"Abelson, Mrs. Samuel (Hannah Wizosky)",female,28,1,0,P/PP 3381,24,,C
876,1,3,"Najib, Miss. Adele Kiamie ""Jane""",female,15,0,0,2667,7.225,,C
877,0,3,"Gustafsson, Mr. Alfred Ossian",male,20,0,0,7534,9.8458,,S
878,0,3,"Petroff, Mr. Nedelio",male,19,0,0,349212,7.8958,,S
879,0,3,"Laleff, Mr. Kristo",male,,0,0,349217,7.8958,,S
880,1,1,"Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)",female,56,0,1,11767,83.1583,C50,C
881,1,2,"Shelley, Mrs. William (Imanita Parrish Hall)",female,25,0,1,230433,26,,S
882,0,3,"Markun, Mr. Johann",male,33,0,0,349257,7.8958,,S
883,0,3,"Dahlberg, Miss. Gerda Ulrika",female,22,0,0,7552,10.5167,,S
884,0,2,"Banfield, Mr. Frederick James",male,28,0,0,C.A./SOTON 34068,10.5,,S
885,0,3,"Sutehall, Mr. Henry Jr",male,25,0,0,SOTON/OQ 392076,7.05,,S
886,0,3,"Rice, Mrs. William (Margaret Norton)",female,39,0,5,382652,29.125,,Q
887,0,2,"Montvila, Rev. Juozas",male,27,0,0,211536,13,,S
888,1,1,"Graham, Miss. Margaret Edith",female,19,0,0,112053,30,B42,S
889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,,S
890,1,1,"Behr, Mr. Karl Howell",male,26,0,0,111369,30,C148,C
891,0,3,"Dooley, Mr. Patrick",male,32,0,0,370376,7.75,,Q
1 PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
2 1 0 3 Braund, Mr. Owen Harris male 22 1 0 A/5 21171 7.25 S
3 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Thayer) female 38 1 0 PC 17599 71.2833 C85 C
4 3 1 3 Heikkinen, Miss. Laina female 26 0 0 STON/O2. 3101282 7.925 S
5 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35 1 0 113803 53.1 C123 S
6 5 0 3 Allen, Mr. William Henry male 35 0 0 373450 8.05 S
7 6 0 3 Moran, Mr. James male 0 0 330877 8.4583 Q
8 7 0 1 McCarthy, Mr. Timothy J male 54 0 0 17463 51.8625 E46 S
9 8 0 3 Palsson, Master. Gosta Leonard male 2 3 1 349909 21.075 S
10 9 1 3 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27 0 2 347742 11.1333 S
11 10 1 2 Nasser, Mrs. Nicholas (Adele Achem) female 14 1 0 237736 30.0708 C
12 11 1 3 Sandstrom, Miss. Marguerite Rut female 4 1 1 PP 9549 16.7 G6 S
13 12 1 1 Bonnell, Miss. Elizabeth female 58 0 0 113783 26.55 C103 S
14 13 0 3 Saundercock, Mr. William Henry male 20 0 0 A/5. 2151 8.05 S
15 14 0 3 Andersson, Mr. Anders Johan male 39 1 5 347082 31.275 S
16 15 0 3 Vestrom, Miss. Hulda Amanda Adolfina female 14 0 0 350406 7.8542 S
17 16 1 2 Hewlett, Mrs. (Mary D Kingcome) female 55 0 0 248706 16 S
18 17 0 3 Rice, Master. Eugene male 2 4 1 382652 29.125 Q
19 18 1 2 Williams, Mr. Charles Eugene male 0 0 244373 13 S
20 19 0 3 Vander Planke, Mrs. Julius (Emelia Maria Vandemoortele) female 31 1 0 345763 18 S
21 20 1 3 Masselmani, Mrs. Fatima female 0 0 2649 7.225 C
22 21 0 2 Fynney, Mr. Joseph J male 35 0 0 239865 26 S
23 22 1 2 Beesley, Mr. Lawrence male 34 0 0 248698 13 D56 S
24 23 1 3 McGowan, Miss. Anna "Annie" female 15 0 0 330923 8.0292 Q
25 24 1 1 Sloper, Mr. William Thompson male 28 0 0 113788 35.5 A6 S
26 25 0 3 Palsson, Miss. Torborg Danira female 8 3 1 349909 21.075 S
27 26 1 3 Asplund, Mrs. Carl Oscar (Selma Augusta Emilia Johansson) female 38 1 5 347077 31.3875 S
28 27 0 3 Emir, Mr. Farred Chehab male 0 0 2631 7.225 C
29 28 0 1 Fortune, Mr. Charles Alexander male 19 3 2 19950 263 C23 C25 C27 S
30 29 1 3 O'Dwyer, Miss. Ellen "Nellie" female 0 0 330959 7.8792 Q
31 30 0 3 Todoroff, Mr. Lalio male 0 0 349216 7.8958 S
32 31 0 1 Uruchurtu, Don. Manuel E male 40 0 0 PC 17601 27.7208 C
33 32 1 1 Spencer, Mrs. William Augustus (Marie Eugenie) female 1 0 PC 17569 146.5208 B78 C
34 33 1 3 Glynn, Miss. Mary Agatha female 0 0 335677 7.75 Q
35 34 0 2 Wheadon, Mr. Edward H male 66 0 0 C.A. 24579 10.5 S
36 35 0 1 Meyer, Mr. Edgar Joseph male 28 1 0 PC 17604 82.1708 C
37 36 0 1 Holverson, Mr. Alexander Oskar male 42 1 0 113789 52 S
38 37 1 3 Mamee, Mr. Hanna male 0 0 2677 7.2292 C
39 38 0 3 Cann, Mr. Ernest Charles male 21 0 0 A./5. 2152 8.05 S
40 39 0 3 Vander Planke, Miss. Augusta Maria female 18 2 0 345764 18 S
41 40 1 3 Nicola-Yarred, Miss. Jamila female 14 1 0 2651 11.2417 C
42 41 0 3 Ahlin, Mrs. Johan (Johanna Persdotter Larsson) female 40 1 0 7546 9.475 S
43 42 0 2 Turpin, Mrs. William John Robert (Dorothy Ann Wonnacott) female 27 1 0 11668 21 S
44 43 0 3 Kraeff, Mr. Theodor male 0 0 349253 7.8958 C
45 44 1 2 Laroche, Miss. Simonne Marie Anne Andree female 3 1 2 SC/Paris 2123 41.5792 C
46 45 1 3 Devaney, Miss. Margaret Delia female 19 0 0 330958 7.8792 Q
47 46 0 3 Rogers, Mr. William John male 0 0 S.C./A.4. 23567 8.05 S
48 47 0 3 Lennon, Mr. Denis male 1 0 370371 15.5 Q
49 48 1 3 O'Driscoll, Miss. Bridget female 0 0 14311 7.75 Q
50 49 0 3 Samaan, Mr. Youssef male 2 0 2662 21.6792 C
51 50 0 3 Arnold-Franchi, Mrs. Josef (Josefine Franchi) female 18 1 0 349237 17.8 S
52 51 0 3 Panula, Master. Juha Niilo male 7 4 1 3101295 39.6875 S
53 52 0 3 Nosworthy, Mr. Richard Cater male 21 0 0 A/4. 39886 7.8 S
54 53 1 1 Harper, Mrs. Henry Sleeper (Myna Haxtun) female 49 1 0 PC 17572 76.7292 D33 C
55 54 1 2 Faunthorpe, Mrs. Lizzie (Elizabeth Anne Wilkinson) female 29 1 0 2926 26 S
56 55 0 1 Ostby, Mr. Engelhart Cornelius male 65 0 1 113509 61.9792 B30 C
57 56 1 1 Woolner, Mr. Hugh male 0 0 19947 35.5 C52 S
58 57 1 2 Rugg, Miss. Emily female 21 0 0 C.A. 31026 10.5 S
59 58 0 3 Novel, Mr. Mansouer male 28.5 0 0 2697 7.2292 C
60 59 1 2 West, Miss. Constance Mirium female 5 1 2 C.A. 34651 27.75 S
61 60 0 3 Goodwin, Master. William Frederick male 11 5 2 CA 2144 46.9 S
62 61 0 3 Sirayanian, Mr. Orsen male 22 0 0 2669 7.2292 C
63 62 1 1 Icard, Miss. Amelie female 38 0 0 113572 80 B28
64 63 0 1 Harris, Mr. Henry Birkhardt male 45 1 0 36973 83.475 C83 S
65 64 0 3 Skoog, Master. Harald male 4 3 2 347088 27.9 S
66 65 0 1 Stewart, Mr. Albert A male 0 0 PC 17605 27.7208 C
67 66 1 3 Moubarek, Master. Gerios male 1 1 2661 15.2458 C
68 67 1 2 Nye, Mrs. (Elizabeth Ramell) female 29 0 0 C.A. 29395 10.5 F33 S
69 68 0 3 Crease, Mr. Ernest James male 19 0 0 S.P. 3464 8.1583 S
70 69 1 3 Andersson, Miss. Erna Alexandra female 17 4 2 3101281 7.925 S
71 70 0 3 Kink, Mr. Vincenz male 26 2 0 315151 8.6625 S
72 71 0 2 Jenkin, Mr. Stephen Curnow male 32 0 0 C.A. 33111 10.5 S
73 72 0 3 Goodwin, Miss. Lillian Amy female 16 5 2 CA 2144 46.9 S
74 73 0 2 Hood, Mr. Ambrose Jr male 21 0 0 S.O.C. 14879 73.5 S
75 74 0 3 Chronopoulos, Mr. Apostolos male 26 1 0 2680 14.4542 C
76 75 1 3 Bing, Mr. Lee male 32 0 0 1601 56.4958 S
77 76 0 3 Moen, Mr. Sigurd Hansen male 25 0 0 348123 7.65 F G73 S
78 77 0 3 Staneff, Mr. Ivan male 0 0 349208 7.8958 S
79 78 0 3 Moutal, Mr. Rahamin Haim male 0 0 374746 8.05 S
80 79 1 2 Caldwell, Master. Alden Gates male 0.83 0 2 248738 29 S
81 80 1 3 Dowdell, Miss. Elizabeth female 30 0 0 364516 12.475 S
82 81 0 3 Waelens, Mr. Achille male 22 0 0 345767 9 S
83 82 1 3 Sheerlinck, Mr. Jan Baptist male 29 0 0 345779 9.5 S
84 83 1 3 McDermott, Miss. Brigdet Delia female 0 0 330932 7.7875 Q
85 84 0 1 Carrau, Mr. Francisco M male 28 0 0 113059 47.1 S
86 85 1 2 Ilett, Miss. Bertha female 17 0 0 SO/C 14885 10.5 S
87 86 1 3 Backstrom, Mrs. Karl Alfred (Maria Mathilda Gustafsson) female 33 3 0 3101278 15.85 S
88 87 0 3 Ford, Mr. William Neal male 16 1 3 W./C. 6608 34.375 S
89 88 0 3 Slocovski, Mr. Selman Francis male 0 0 SOTON/OQ 392086 8.05 S
90 89 1 1 Fortune, Miss. Mabel Helen female 23 3 2 19950 263 C23 C25 C27 S
91 90 0 3 Celotti, Mr. Francesco male 24 0 0 343275 8.05 S
92 91 0 3 Christmann, Mr. Emil male 29 0 0 343276 8.05 S
93 92 0 3 Andreasson, Mr. Paul Edvin male 20 0 0 347466 7.8542 S
94 93 0 1 Chaffee, Mr. Herbert Fuller male 46 1 0 W.E.P. 5734 61.175 E31 S
95 94 0 3 Dean, Mr. Bertram Frank male 26 1 2 C.A. 2315 20.575 S
96 95 0 3 Coxon, Mr. Daniel male 59 0 0 364500 7.25 S
97 96 0 3 Shorney, Mr. Charles Joseph male 0 0 374910 8.05 S
98 97 0 1 Goldschmidt, Mr. George B male 71 0 0 PC 17754 34.6542 A5 C
99 98 1 1 Greenfield, Mr. William Bertram male 23 0 1 PC 17759 63.3583 D10 D12 C
100 99 1 2 Doling, Mrs. John T (Ada Julia Bone) female 34 0 1 231919 23 S
101 100 0 2 Kantor, Mr. Sinai male 34 1 0 244367 26 S
102 101 0 3 Petranec, Miss. Matilda female 28 0 0 349245 7.8958 S
103 102 0 3 Petroff, Mr. Pastcho ("Pentcho") male 0 0 349215 7.8958 S
104 103 0 1 White, Mr. Richard Frasar male 21 0 1 35281 77.2875 D26 S
105 104 0 3 Johansson, Mr. Gustaf Joel male 33 0 0 7540 8.6542 S
106 105 0 3 Gustafsson, Mr. Anders Vilhelm male 37 2 0 3101276 7.925 S
107 106 0 3 Mionoff, Mr. Stoytcho male 28 0 0 349207 7.8958 S
108 107 1 3 Salkjelsvik, Miss. Anna Kristine female 21 0 0 343120 7.65 S
109 108 1 3 Moss, Mr. Albert Johan male 0 0 312991 7.775 S
110 109 0 3 Rekic, Mr. Tido male 38 0 0 349249 7.8958 S
111 110 1 3 Moran, Miss. Bertha female 1 0 371110 24.15 Q
112 111 0 1 Porter, Mr. Walter Chamberlain male 47 0 0 110465 52 C110 S
113 112 0 3 Zabour, Miss. Hileni female 14.5 1 0 2665 14.4542 C
114 113 0 3 Barton, Mr. David John male 22 0 0 324669 8.05 S
115 114 0 3 Jussila, Miss. Katriina female 20 1 0 4136 9.825 S
116 115 0 3 Attalah, Miss. Malake female 17 0 0 2627 14.4583 C
117 116 0 3 Pekoniemi, Mr. Edvard male 21 0 0 STON/O 2. 3101294 7.925 S
118 117 0 3 Connors, Mr. Patrick male 70.5 0 0 370369 7.75 Q
119 118 0 2 Turpin, Mr. William John Robert male 29 1 0 11668 21 S
120 119 0 1 Baxter, Mr. Quigg Edmond male 24 0 1 PC 17558 247.5208 B58 B60 C
121 120 0 3 Andersson, Miss. Ellis Anna Maria female 2 4 2 347082 31.275 S
122 121 0 2 Hickman, Mr. Stanley George male 21 2 0 S.O.C. 14879 73.5 S
123 122 0 3 Moore, Mr. Leonard Charles male 0 0 A4. 54510 8.05 S
124 123 0 2 Nasser, Mr. Nicholas male 32.5 1 0 237736 30.0708 C
125 124 1 2 Webber, Miss. Susan female 32.5 0 0 27267 13 E101 S
126 125 0 1 White, Mr. Percival Wayland male 54 0 1 35281 77.2875 D26 S
127 126 1 3 Nicola-Yarred, Master. Elias male 12 1 0 2651 11.2417 C
128 127 0 3 McMahon, Mr. Martin male 0 0 370372 7.75 Q
129 128 1 3 Madsen, Mr. Fridtjof Arne male 24 0 0 C 17369 7.1417 S
130 129 1 3 Peter, Miss. Anna female 1 1 2668 22.3583 F E69 C
131 130 0 3 Ekstrom, Mr. Johan male 45 0 0 347061 6.975 S
132 131 0 3 Drazenoic, Mr. Jozef male 33 0 0 349241 7.8958 C
133 132 0 3 Coelho, Mr. Domingos Fernandeo male 20 0 0 SOTON/O.Q. 3101307 7.05 S
134 133 0 3 Robins, Mrs. Alexander A (Grace Charity Laury) female 47 1 0 A/5. 3337 14.5 S
135 134 1 2 Weisz, Mrs. Leopold (Mathilde Francoise Pede) female 29 1 0 228414 26 S
136 135 0 2 Sobey, Mr. Samuel James Hayden male 25 0 0 C.A. 29178 13 S
137 136 0 2 Richard, Mr. Emile male 23 0 0 SC/PARIS 2133 15.0458 C
138 137 1 1 Newsom, Miss. Helen Monypeny female 19 0 2 11752 26.2833 D47 S
139 138 0 1 Futrelle, Mr. Jacques Heath male 37 1 0 113803 53.1 C123 S
140 139 0 3 Osen, Mr. Olaf Elon male 16 0 0 7534 9.2167 S
141 140 0 1 Giglio, Mr. Victor male 24 0 0 PC 17593 79.2 B86 C
142 141 0 3 Boulos, Mrs. Joseph (Sultana) female 0 2 2678 15.2458 C
143 142 1 3 Nysten, Miss. Anna Sofia female 22 0 0 347081 7.75 S
144 143 1 3 Hakkarainen, Mrs. Pekka Pietari (Elin Matilda Dolck) female 24 1 0 STON/O2. 3101279 15.85 S
145 144 0 3 Burke, Mr. Jeremiah male 19 0 0 365222 6.75 Q
146 145 0 2 Andrew, Mr. Edgardo Samuel male 18 0 0 231945 11.5 S
147 146 0 2 Nicholls, Mr. Joseph Charles male 19 1 1 C.A. 33112 36.75 S
148 147 1 3 Andersson, Mr. August Edvard ("Wennerstrom") male 27 0 0 350043 7.7958 S
149 148 0 3 Ford, Miss. Robina Maggie "Ruby" female 9 2 2 W./C. 6608 34.375 S
150 149 0 2 Navratil, Mr. Michel ("Louis M Hoffman") male 36.5 0 2 230080 26 F2 S
151 150 0 2 Byles, Rev. Thomas Roussel Davids male 42 0 0 244310 13 S
152 151 0 2 Bateman, Rev. Robert James male 51 0 0 S.O.P. 1166 12.525 S
153 152 1 1 Pears, Mrs. Thomas (Edith Wearne) female 22 1 0 113776 66.6 C2 S
154 153 0 3 Meo, Mr. Alfonzo male 55.5 0 0 A.5. 11206 8.05 S
155 154 0 3 van Billiard, Mr. Austin Blyler male 40.5 0 2 A/5. 851 14.5 S
156 155 0 3 Olsen, Mr. Ole Martin male 0 0 Fa 265302 7.3125 S
157 156 0 1 Williams, Mr. Charles Duane male 51 0 1 PC 17597 61.3792 C
158 157 1 3 Gilnagh, Miss. Katherine "Katie" female 16 0 0 35851 7.7333 Q
159 158 0 3 Corn, Mr. Harry male 30 0 0 SOTON/OQ 392090 8.05 S
160 159 0 3 Smiljanic, Mr. Mile male 0 0 315037 8.6625 S
161 160 0 3 Sage, Master. Thomas Henry male 8 2 CA. 2343 69.55 S
162 161 0 3 Cribb, Mr. John Hatfield male 44 0 1 371362 16.1 S
163 162 1 2 Watt, Mrs. James (Elizabeth "Bessie" Inglis Milne) female 40 0 0 C.A. 33595 15.75 S
164 163 0 3 Bengtsson, Mr. John Viktor male 26 0 0 347068 7.775 S
165 164 0 3 Calic, Mr. Jovo male 17 0 0 315093 8.6625 S
166 165 0 3 Panula, Master. Eino Viljami male 1 4 1 3101295 39.6875 S
167 166 1 3 Goldsmith, Master. Frank John William "Frankie" male 9 0 2 363291 20.525 S
168 167 1 1 Chibnall, Mrs. (Edith Martha Bowerman) female 0 1 113505 55 E33 S
169 168 0 3 Skoog, Mrs. William (Anna Bernhardina Karlsson) female 45 1 4 347088 27.9 S
170 169 0 1 Baumann, Mr. John D male 0 0 PC 17318 25.925 S
171 170 0 3 Ling, Mr. Lee male 28 0 0 1601 56.4958 S
172 171 0 1 Van der hoef, Mr. Wyckoff male 61 0 0 111240 33.5 B19 S
173 172 0 3 Rice, Master. Arthur male 4 4 1 382652 29.125 Q
174 173 1 3 Johnson, Miss. Eleanor Ileen female 1 1 1 347742 11.1333 S
175 174 0 3 Sivola, Mr. Antti Wilhelm male 21 0 0 STON/O 2. 3101280 7.925 S
176 175 0 1 Smith, Mr. James Clinch male 56 0 0 17764 30.6958 A7 C
177 176 0 3 Klasen, Mr. Klas Albin male 18 1 1 350404 7.8542 S
178 177 0 3 Lefebre, Master. Henry Forbes male 3 1 4133 25.4667 S
179 178 0 1 Isham, Miss. Ann Elizabeth female 50 0 0 PC 17595 28.7125 C49 C
180 179 0 2 Hale, Mr. Reginald male 30 0 0 250653 13 S
181 180 0 3 Leonard, Mr. Lionel male 36 0 0 LINE 0 S
182 181 0 3 Sage, Miss. Constance Gladys female 8 2 CA. 2343 69.55 S
183 182 0 2 Pernot, Mr. Rene male 0 0 SC/PARIS 2131 15.05 C
184 183 0 3 Asplund, Master. Clarence Gustaf Hugo male 9 4 2 347077 31.3875 S
185 184 1 2 Becker, Master. Richard F male 1 2 1 230136 39 F4 S
186 185 1 3 Kink-Heilmann, Miss. Luise Gretchen female 4 0 2 315153 22.025 S
187 186 0 1 Rood, Mr. Hugh Roscoe male 0 0 113767 50 A32 S
188 187 1 3 O'Brien, Mrs. Thomas (Johanna "Hannah" Godfrey) female 1 0 370365 15.5 Q
189 188 1 1 Romaine, Mr. Charles Hallace ("Mr C Rolmane") male 45 0 0 111428 26.55 S
190 189 0 3 Bourke, Mr. John male 40 1 1 364849 15.5 Q
191 190 0 3 Turcin, Mr. Stjepan male 36 0 0 349247 7.8958 S
192 191 1 2 Pinsky, Mrs. (Rosa) female 32 0 0 234604 13 S
193 192 0 2 Carbines, Mr. William male 19 0 0 28424 13 S
194 193 1 3 Andersen-Jensen, Miss. Carla Christine Nielsine female 19 1 0 350046 7.8542 S
195 194 1 2 Navratil, Master. Michel M male 3 1 1 230080 26 F2 S
196 195 1 1 Brown, Mrs. James Joseph (Margaret Tobin) female 44 0 0 PC 17610 27.7208 B4 C
197 196 1 1 Lurette, Miss. Elise female 58 0 0 PC 17569 146.5208 B80 C
198 197 0 3 Mernagh, Mr. Robert male 0 0 368703 7.75 Q
199 198 0 3 Olsen, Mr. Karl Siegwart Andreas male 42 0 1 4579 8.4042 S
200 199 1 3 Madigan, Miss. Margaret "Maggie" female 0 0 370370 7.75 Q
201 200 0 2 Yrois, Miss. Henriette ("Mrs Harbeck") female 24 0 0 248747 13 S
202 201 0 3 Vande Walle, Mr. Nestor Cyriel male 28 0 0 345770 9.5 S
203 202 0 3 Sage, Mr. Frederick male 8 2 CA. 2343 69.55 S
204 203 0 3 Johanson, Mr. Jakob Alfred male 34 0 0 3101264 6.4958 S
205 204 0 3 Youseff, Mr. Gerious male 45.5 0 0 2628 7.225 C
206 205 1 3 Cohen, Mr. Gurshon "Gus" male 18 0 0 A/5 3540 8.05 S
207 206 0 3 Strom, Miss. Telma Matilda female 2 0 1 347054 10.4625 G6 S
208 207 0 3 Backstrom, Mr. Karl Alfred male 32 1 0 3101278 15.85 S
209 208 1 3 Albimona, Mr. Nassef Cassem male 26 0 0 2699 18.7875 C
210 209 1 3 Carr, Miss. Helen "Ellen" female 16 0 0 367231 7.75 Q
211 210 1 1 Blank, Mr. Henry male 40 0 0 112277 31 A31 C
212 211 0 3 Ali, Mr. Ahmed male 24 0 0 SOTON/O.Q. 3101311 7.05 S
213 212 1 2 Cameron, Miss. Clear Annie female 35 0 0 F.C.C. 13528 21 S
214 213 0 3 Perkin, Mr. John Henry male 22 0 0 A/5 21174 7.25 S
215 214 0 2 Givard, Mr. Hans Kristensen male 30 0 0 250646 13 S
216 215 0 3 Kiernan, Mr. Philip male 1 0 367229 7.75 Q
217 216 1 1 Newell, Miss. Madeleine female 31 1 0 35273 113.275 D36 C
218 217 1 3 Honkanen, Miss. Eliina female 27 0 0 STON/O2. 3101283 7.925 S
219 218 0 2 Jacobsohn, Mr. Sidney Samuel male 42 1 0 243847 27 S
220 219 1 1 Bazzani, Miss. Albina female 32 0 0 11813 76.2917 D15 C
221 220 0 2 Harris, Mr. Walter male 30 0 0 W/C 14208 10.5 S
222 221 1 3 Sunderland, Mr. Victor Francis male 16 0 0 SOTON/OQ 392089 8.05 S
223 222 0 2 Bracken, Mr. James H male 27 0 0 220367 13 S
224 223 0 3 Green, Mr. George Henry male 51 0 0 21440 8.05 S
225 224 0 3 Nenkoff, Mr. Christo male 0 0 349234 7.8958 S
226 225 1 1 Hoyt, Mr. Frederick Maxfield male 38 1 0 19943 90 C93 S
227 226 0 3 Berglund, Mr. Karl Ivar Sven male 22 0 0 PP 4348 9.35 S
228 227 1 2 Mellors, Mr. William John male 19 0 0 SW/PP 751 10.5 S
229 228 0 3 Lovell, Mr. John Hall ("Henry") male 20.5 0 0 A/5 21173 7.25 S
230 229 0 2 Fahlstrom, Mr. Arne Jonas male 18 0 0 236171 13 S
231 230 0 3 Lefebre, Miss. Mathilde female 3 1 4133 25.4667 S
232 231 1 1 Harris, Mrs. Henry Birkhardt (Irene Wallach) female 35 1 0 36973 83.475 C83 S
233 232 0 3 Larsson, Mr. Bengt Edvin male 29 0 0 347067 7.775 S
234 233 0 2 Sjostedt, Mr. Ernst Adolf male 59 0 0 237442 13.5 S
235 234 1 3 Asplund, Miss. Lillian Gertrud female 5 4 2 347077 31.3875 S
236 235 0 2 Leyson, Mr. Robert William Norman male 24 0 0 C.A. 29566 10.5 S
237 236 0 3 Harknett, Miss. Alice Phoebe female 0 0 W./C. 6609 7.55 S
238 237 0 2 Hold, Mr. Stephen male 44 1 0 26707 26 S
239 238 1 2 Collyer, Miss. Marjorie "Lottie" female 8 0 2 C.A. 31921 26.25 S
240 239 0 2 Pengelly, Mr. Frederick William male 19 0 0 28665 10.5 S
241 240 0 2 Hunt, Mr. George Henry male 33 0 0 SCO/W 1585 12.275 S
242 241 0 3 Zabour, Miss. Thamine female 1 0 2665 14.4542 C
243 242 1 3 Murphy, Miss. Katherine "Kate" female 1 0 367230 15.5 Q
244 243 0 2 Coleridge, Mr. Reginald Charles male 29 0 0 W./C. 14263 10.5 S
245 244 0 3 Maenpaa, Mr. Matti Alexanteri male 22 0 0 STON/O 2. 3101275 7.125 S
246 245 0 3 Attalah, Mr. Sleiman male 30 0 0 2694 7.225 C
247 246 0 1 Minahan, Dr. William Edward male 44 2 0 19928 90 C78 Q
248 247 0 3 Lindahl, Miss. Agda Thorilda Viktoria female 25 0 0 347071 7.775 S
249 248 1 2 Hamalainen, Mrs. William (Anna) female 24 0 2 250649 14.5 S
250 249 1 1 Beckwith, Mr. Richard Leonard male 37 1 1 11751 52.5542 D35 S
251 250 0 2 Carter, Rev. Ernest Courtenay male 54 1 0 244252 26 S
252 251 0 3 Reed, Mr. James George male 0 0 362316 7.25 S
253 252 0 3 Strom, Mrs. Wilhelm (Elna Matilda Persson) female 29 1 1 347054 10.4625 G6 S
254 253 0 1 Stead, Mr. William Thomas male 62 0 0 113514 26.55 C87 S
255 254 0 3 Lobb, Mr. William Arthur male 30 1 0 A/5. 3336 16.1 S
256 255 0 3 Rosblom, Mrs. Viktor (Helena Wilhelmina) female 41 0 2 370129 20.2125 S
257 256 1 3 Touma, Mrs. Darwis (Hanne Youssef Razi) female 29 0 2 2650 15.2458 C
258 257 1 1 Thorne, Mrs. Gertrude Maybelle female 0 0 PC 17585 79.2 C
259 258 1 1 Cherry, Miss. Gladys female 30 0 0 110152 86.5 B77 S
260 259 1 1 Ward, Miss. Anna female 35 0 0 PC 17755 512.3292 C
261 260 1 2 Parrish, Mrs. (Lutie Davis) female 50 0 1 230433 26 S
262 261 0 3 Smith, Mr. Thomas male 0 0 384461 7.75 Q
263 262 1 3 Asplund, Master. Edvin Rojj Felix male 3 4 2 347077 31.3875 S
264 263 0 1 Taussig, Mr. Emil male 52 1 1 110413 79.65 E67 S
265 264 0 1 Harrison, Mr. William male 40 0 0 112059 0 B94 S
266 265 0 3 Henry, Miss. Delia female 0 0 382649 7.75 Q
267 266 0 2 Reeves, Mr. David male 36 0 0 C.A. 17248 10.5 S
268 267 0 3 Panula, Mr. Ernesti Arvid male 16 4 1 3101295 39.6875 S
269 268 1 3 Persson, Mr. Ernst Ulrik male 25 1 0 347083 7.775 S
270 269 1 1 Graham, Mrs. William Thompson (Edith Junkins) female 58 0 1 PC 17582 153.4625 C125 S
271 270 1 1 Bissette, Miss. Amelia female 35 0 0 PC 17760 135.6333 C99 S
272 271 0 1 Cairns, Mr. Alexander male 0 0 113798 31 S
273 272 1 3 Tornquist, Mr. William Henry male 25 0 0 LINE 0 S
274 273 1 2 Mellinger, Mrs. (Elizabeth Anne Maidment) female 41 0 1 250644 19.5 S
275 274 0 1 Natsch, Mr. Charles H male 37 0 1 PC 17596 29.7 C118 C
276 275 1 3 Healy, Miss. Hanora "Nora" female 0 0 370375 7.75 Q
277 276 1 1 Andrews, Miss. Kornelia Theodosia female 63 1 0 13502 77.9583 D7 S
278 277 0 3 Lindblom, Miss. Augusta Charlotta female 45 0 0 347073 7.75 S
279 278 0 2 Parkes, Mr. Francis "Frank" male 0 0 239853 0 S
280 279 0 3 Rice, Master. Eric male 7 4 1 382652 29.125 Q
281 280 1 3 Abbott, Mrs. Stanton (Rosa Hunt) female 35 1 1 C.A. 2673 20.25 S
282 281 0 3 Duane, Mr. Frank male 65 0 0 336439 7.75 Q
283 282 0 3 Olsson, Mr. Nils Johan Goransson male 28 0 0 347464 7.8542 S
284 283 0 3 de Pelsmaeker, Mr. Alfons male 16 0 0 345778 9.5 S
285 284 1 3 Dorking, Mr. Edward Arthur male 19 0 0 A/5. 10482 8.05 S
286 285 0 1 Smith, Mr. Richard William male 0 0 113056 26 A19 S
287 286 0 3 Stankovic, Mr. Ivan male 33 0 0 349239 8.6625 C
288 287 1 3 de Mulder, Mr. Theodore male 30 0 0 345774 9.5 S
289 288 0 3 Naidenoff, Mr. Penko male 22 0 0 349206 7.8958 S
290 289 1 2 Hosono, Mr. Masabumi male 42 0 0 237798 13 S
291 290 1 3 Connolly, Miss. Kate female 22 0 0 370373 7.75 Q
292 291 1 1 Barber, Miss. Ellen "Nellie" female 26 0 0 19877 78.85 S
293 292 1 1 Bishop, Mrs. Dickinson H (Helen Walton) female 19 1 0 11967 91.0792 B49 C
294 293 0 2 Levy, Mr. Rene Jacques male 36 0 0 SC/Paris 2163 12.875 D C
295 294 0 3 Haas, Miss. Aloisia female 24 0 0 349236 8.85 S
296 295 0 3 Mineff, Mr. Ivan male 24 0 0 349233 7.8958 S
297 296 0 1 Lewy, Mr. Ervin G male 0 0 PC 17612 27.7208 C
298 297 0 3 Hanna, Mr. Mansour male 23.5 0 0 2693 7.2292 C
299 298 0 1 Allison, Miss. Helen Loraine female 2 1 2 113781 151.55 C22 C26 S
300 299 1 1 Saalfeld, Mr. Adolphe male 0 0 19988 30.5 C106 S
301 300 1 1 Baxter, Mrs. James (Helene DeLaudeniere Chaput) female 50 0 1 PC 17558 247.5208 B58 B60 C
302 301 1 3 Kelly, Miss. Anna Katherine "Annie Kate" female 0 0 9234 7.75 Q
303 302 1 3 McCoy, Mr. Bernard male 2 0 367226 23.25 Q
304 303 0 3 Johnson, Mr. William Cahoone Jr male 19 0 0 LINE 0 S
305 304 1 2 Keane, Miss. Nora A female 0 0 226593 12.35 E101 Q
306 305 0 3 Williams, Mr. Howard Hugh "Harry" male 0 0 A/5 2466 8.05 S
307 306 1 1 Allison, Master. Hudson Trevor male 0.92 1 2 113781 151.55 C22 C26 S
308 307 1 1 Fleming, Miss. Margaret female 0 0 17421 110.8833 C
309 308 1 1 Penasco y Castellana, Mrs. Victor de Satode (Maria Josefa Perez de Soto y Vallejo) female 17 1 0 PC 17758 108.9 C65 C
310 309 0 2 Abelson, Mr. Samuel male 30 1 0 P/PP 3381 24 C
311 310 1 1 Francatelli, Miss. Laura Mabel female 30 0 0 PC 17485 56.9292 E36 C
312 311 1 1 Hays, Miss. Margaret Bechstein female 24 0 0 11767 83.1583 C54 C
313 312 1 1 Ryerson, Miss. Emily Borie female 18 2 2 PC 17608 262.375 B57 B59 B63 B66 C
314 313 0 2 Lahtinen, Mrs. William (Anna Sylfven) female 26 1 1 250651 26 S
315 314 0 3 Hendekovic, Mr. Ignjac male 28 0 0 349243 7.8958 S
316 315 0 2 Hart, Mr. Benjamin male 43 1 1 F.C.C. 13529 26.25 S
317 316 1 3 Nilsson, Miss. Helmina Josefina female 26 0 0 347470 7.8542 S
318 317 1 2 Kantor, Mrs. Sinai (Miriam Sternin) female 24 1 0 244367 26 S
319 318 0 2 Moraweck, Dr. Ernest male 54 0 0 29011 14 S
320 319 1 1 Wick, Miss. Mary Natalie female 31 0 2 36928 164.8667 C7 S
321 320 1 1 Spedden, Mrs. Frederic Oakley (Margaretta Corning Stone) female 40 1 1 16966 134.5 E34 C
322 321 0 3 Dennis, Mr. Samuel male 22 0 0 A/5 21172 7.25 S
323 322 0 3 Danoff, Mr. Yoto male 27 0 0 349219 7.8958 S
324 323 1 2 Slayter, Miss. Hilda Mary female 30 0 0 234818 12.35 Q
325 324 1 2 Caldwell, Mrs. Albert Francis (Sylvia Mae Harbaugh) female 22 1 1 248738 29 S
326 325 0 3 Sage, Mr. George John Jr male 8 2 CA. 2343 69.55 S
327 326 1 1 Young, Miss. Marie Grice female 36 0 0 PC 17760 135.6333 C32 C
328 327 0 3 Nysveen, Mr. Johan Hansen male 61 0 0 345364 6.2375 S
329 328 1 2 Ball, Mrs. (Ada E Hall) female 36 0 0 28551 13 D S
330 329 1 3 Goldsmith, Mrs. Frank John (Emily Alice Brown) female 31 1 1 363291 20.525 S
331 330 1 1 Hippach, Miss. Jean Gertrude female 16 0 1 111361 57.9792 B18 C
332 331 1 3 McCoy, Miss. Agnes female 2 0 367226 23.25 Q
333 332 0 1 Partner, Mr. Austen male 45.5 0 0 113043 28.5 C124 S
334 333 0 1 Graham, Mr. George Edward male 38 0 1 PC 17582 153.4625 C91 S
335 334 0 3 Vander Planke, Mr. Leo Edmondus male 16 2 0 345764 18 S
336 335 1 1 Frauenthal, Mrs. Henry William (Clara Heinsheimer) female 1 0 PC 17611 133.65 S
337 336 0 3 Denkoff, Mr. Mitto male 0 0 349225 7.8958 S
338 337 0 1 Pears, Mr. Thomas Clinton male 29 1 0 113776 66.6 C2 S
339 338 1 1 Burns, Miss. Elizabeth Margaret female 41 0 0 16966 134.5 E40 C
340 339 1 3 Dahl, Mr. Karl Edwart male 45 0 0 7598 8.05 S
341 340 0 1 Blackwell, Mr. Stephen Weart male 45 0 0 113784 35.5 T S
342 341 1 2 Navratil, Master. Edmond Roger male 2 1 1 230080 26 F2 S
343 342 1 1 Fortune, Miss. Alice Elizabeth female 24 3 2 19950 263 C23 C25 C27 S
344 343 0 2 Collander, Mr. Erik Gustaf male 28 0 0 248740 13 S
345 344 0 2 Sedgwick, Mr. Charles Frederick Waddington male 25 0 0 244361 13 S
346 345 0 2 Fox, Mr. Stanley Hubert male 36 0 0 229236 13 S
347 346 1 2 Brown, Miss. Amelia "Mildred" female 24 0 0 248733 13 F33 S
348 347 1 2 Smith, Miss. Marion Elsie female 40 0 0 31418 13 S
349 348 1 3 Davison, Mrs. Thomas Henry (Mary E Finck) female 1 0 386525 16.1 S
350 349 1 3 Coutts, Master. William Loch "William" male 3 1 1 C.A. 37671 15.9 S
351 350 0 3 Dimic, Mr. Jovan male 42 0 0 315088 8.6625 S
352 351 0 3 Odahl, Mr. Nils Martin male 23 0 0 7267 9.225 S
353 352 0 1 Williams-Lambert, Mr. Fletcher Fellows male 0 0 113510 35 C128 S
354 353 0 3 Elias, Mr. Tannous male 15 1 1 2695 7.2292 C
355 354 0 3 Arnold-Franchi, Mr. Josef male 25 1 0 349237 17.8 S
356 355 0 3 Yousif, Mr. Wazli male 0 0 2647 7.225 C
357 356 0 3 Vanden Steen, Mr. Leo Peter male 28 0 0 345783 9.5 S
358 357 1 1 Bowerman, Miss. Elsie Edith female 22 0 1 113505 55 E33 S
359 358 0 2 Funk, Miss. Annie Clemmer female 38 0 0 237671 13 S
360 359 1 3 McGovern, Miss. Mary female 0 0 330931 7.8792 Q
361 360 1 3 Mockler, Miss. Helen Mary "Ellie" female 0 0 330980 7.8792 Q
362 361 0 3 Skoog, Mr. Wilhelm male 40 1 4 347088 27.9 S
363 362 0 2 del Carlo, Mr. Sebastiano male 29 1 0 SC/PARIS 2167 27.7208 C
364 363 0 3 Barbara, Mrs. (Catherine David) female 45 0 1 2691 14.4542 C
365 364 0 3 Asim, Mr. Adola male 35 0 0 SOTON/O.Q. 3101310 7.05 S
366 365 0 3 O'Brien, Mr. Thomas male 1 0 370365 15.5 Q
367 366 0 3 Adahl, Mr. Mauritz Nils Martin male 30 0 0 C 7076 7.25 S
368 367 1 1 Warren, Mrs. Frank Manley (Anna Sophia Atkinson) female 60 1 0 110813 75.25 D37 C
369 368 1 3 Moussa, Mrs. (Mantoura Boulos) female 0 0 2626 7.2292 C
370 369 1 3 Jermyn, Miss. Annie female 0 0 14313 7.75 Q
371 370 1 1 Aubart, Mme. Leontine Pauline female 24 0 0 PC 17477 69.3 B35 C
372 371 1 1 Harder, Mr. George Achilles male 25 1 0 11765 55.4417 E50 C
373 372 0 3 Wiklund, Mr. Jakob Alfred male 18 1 0 3101267 6.4958 S
374 373 0 3 Beavan, Mr. William Thomas male 19 0 0 323951 8.05 S
375 374 0 1 Ringhini, Mr. Sante male 22 0 0 PC 17760 135.6333 C
376 375 0 3 Palsson, Miss. Stina Viola female 3 3 1 349909 21.075 S
377 376 1 1 Meyer, Mrs. Edgar Joseph (Leila Saks) female 1 0 PC 17604 82.1708 C
378 377 1 3 Landergren, Miss. Aurora Adelia female 22 0 0 C 7077 7.25 S
379 378 0 1 Widener, Mr. Harry Elkins male 27 0 2 113503 211.5 C82 C
380 379 0 3 Betros, Mr. Tannous male 20 0 0 2648 4.0125 C
381 380 0 3 Gustafsson, Mr. Karl Gideon male 19 0 0 347069 7.775 S
382 381 1 1 Bidois, Miss. Rosalie female 42 0 0 PC 17757 227.525 C
383 382 1 3 Nakid, Miss. Maria ("Mary") female 1 0 2 2653 15.7417 C
384 383 0 3 Tikkanen, Mr. Juho male 32 0 0 STON/O 2. 3101293 7.925 S
385 384 1 1 Holverson, Mrs. Alexander Oskar (Mary Aline Towner) female 35 1 0 113789 52 S
386 385 0 3 Plotcharsky, Mr. Vasil male 0 0 349227 7.8958 S
387 386 0 2 Davies, Mr. Charles Henry male 18 0 0 S.O.C. 14879 73.5 S
388 387 0 3 Goodwin, Master. Sidney Leonard male 1 5 2 CA 2144 46.9 S
389 388 1 2 Buss, Miss. Kate female 36 0 0 27849 13 S
390 389 0 3 Sadlier, Mr. Matthew male 0 0 367655 7.7292 Q
391 390 1 2 Lehmann, Miss. Bertha female 17 0 0 SC 1748 12 C
392 391 1 1 Carter, Mr. William Ernest male 36 1 2 113760 120 B96 B98 S
393 392 1 3 Jansson, Mr. Carl Olof male 21 0 0 350034 7.7958 S
394 393 0 3 Gustafsson, Mr. Johan Birger male 28 2 0 3101277 7.925 S
395 394 1 1 Newell, Miss. Marjorie female 23 1 0 35273 113.275 D36 C
396 395 1 3 Sandstrom, Mrs. Hjalmar (Agnes Charlotta Bengtsson) female 24 0 2 PP 9549 16.7 G6 S
397 396 0 3 Johansson, Mr. Erik male 22 0 0 350052 7.7958 S
398 397 0 3 Olsson, Miss. Elina female 31 0 0 350407 7.8542 S
399 398 0 2 McKane, Mr. Peter David male 46 0 0 28403 26 S
400 399 0 2 Pain, Dr. Alfred male 23 0 0 244278 10.5 S
401 400 1 2 Trout, Mrs. William H (Jessie L) female 28 0 0 240929 12.65 S
402 401 1 3 Niskanen, Mr. Juha male 39 0 0 STON/O 2. 3101289 7.925 S
403 402 0 3 Adams, Mr. John male 26 0 0 341826 8.05 S
404 403 0 3 Jussila, Miss. Mari Aina female 21 1 0 4137 9.825 S
405 404 0 3 Hakkarainen, Mr. Pekka Pietari male 28 1 0 STON/O2. 3101279 15.85 S
406 405 0 3 Oreskovic, Miss. Marija female 20 0 0 315096 8.6625 S
407 406 0 2 Gale, Mr. Shadrach male 34 1 0 28664 21 S
408 407 0 3 Widegren, Mr. Carl/Charles Peter male 51 0 0 347064 7.75 S
409 408 1 2 Richards, Master. William Rowe male 3 1 1 29106 18.75 S
410 409 0 3 Birkeland, Mr. Hans Martin Monsen male 21 0 0 312992 7.775 S
411 410 0 3 Lefebre, Miss. Ida female 3 1 4133 25.4667 S
412 411 0 3 Sdycoff, Mr. Todor male 0 0 349222 7.8958 S
413 412 0 3 Hart, Mr. Henry male 0 0 394140 6.8583 Q
414 413 1 1 Minahan, Miss. Daisy E female 33 1 0 19928 90 C78 Q
415 414 0 2 Cunningham, Mr. Alfred Fleming male 0 0 239853 0 S
416 415 1 3 Sundman, Mr. Johan Julian male 44 0 0 STON/O 2. 3101269 7.925 S
417 416 0 3 Meek, Mrs. Thomas (Annie Louise Rowley) female 0 0 343095 8.05 S
418 417 1 2 Drew, Mrs. James Vivian (Lulu Thorne Christian) female 34 1 1 28220 32.5 S
419 418 1 2 Silven, Miss. Lyyli Karoliina female 18 0 2 250652 13 S
420 419 0 2 Matthews, Mr. William John male 30 0 0 28228 13 S
421 420 0 3 Van Impe, Miss. Catharina female 10 0 2 345773 24.15 S
422 421 0 3 Gheorgheff, Mr. Stanio male 0 0 349254 7.8958 C
423 422 0 3 Charters, Mr. David male 21 0 0 A/5. 13032 7.7333 Q
424 423 0 3 Zimmerman, Mr. Leo male 29 0 0 315082 7.875 S
425 424 0 3 Danbom, Mrs. Ernst Gilbert (Anna Sigrid Maria Brogren) female 28 1 1 347080 14.4 S
426 425 0 3 Rosblom, Mr. Viktor Richard male 18 1 1 370129 20.2125 S
427 426 0 3 Wiseman, Mr. Phillippe male 0 0 A/4. 34244 7.25 S
428 427 1 2 Clarke, Mrs. Charles V (Ada Maria Winfield) female 28 1 0 2003 26 S
429 428 1 2 Phillips, Miss. Kate Florence ("Mrs Kate Louise Phillips Marshall") female 19 0 0 250655 26 S
430 429 0 3 Flynn, Mr. James male 0 0 364851 7.75 Q
431 430 1 3 Pickard, Mr. Berk (Berk Trembisky) male 32 0 0 SOTON/O.Q. 392078 8.05 E10 S
432 431 1 1 Bjornstrom-Steffansson, Mr. Mauritz Hakan male 28 0 0 110564 26.55 C52 S
433 432 1 3 Thorneycroft, Mrs. Percival (Florence Kate White) female 1 0 376564 16.1 S
434 433 1 2 Louch, Mrs. Charles Alexander (Alice Adelaide Slow) female 42 1 0 SC/AH 3085 26 S
435 434 0 3 Kallio, Mr. Nikolai Erland male 17 0 0 STON/O 2. 3101274 7.125 S
436 435 0 1 Silvey, Mr. William Baird male 50 1 0 13507 55.9 E44 S
437 436 1 1 Carter, Miss. Lucile Polk female 14 1 2 113760 120 B96 B98 S
438 437 0 3 Ford, Miss. Doolina Margaret "Daisy" female 21 2 2 W./C. 6608 34.375 S
439 438 1 2 Richards, Mrs. Sidney (Emily Hocking) female 24 2 3 29106 18.75 S
440 439 0 1 Fortune, Mr. Mark male 64 1 4 19950 263 C23 C25 C27 S
441 440 0 2 Kvillner, Mr. Johan Henrik Johannesson male 31 0 0 C.A. 18723 10.5 S
442 441 1 2 Hart, Mrs. Benjamin (Esther Ada Bloomfield) female 45 1 1 F.C.C. 13529 26.25 S
443 442 0 3 Hampe, Mr. Leon male 20 0 0 345769 9.5 S
444 443 0 3 Petterson, Mr. Johan Emil male 25 1 0 347076 7.775 S
445 444 1 2 Reynaldo, Ms. Encarnacion female 28 0 0 230434 13 S
446 445 1 3 Johannesen-Bratthammer, Mr. Bernt male 0 0 65306 8.1125 S
447 446 1 1 Dodge, Master. Washington male 4 0 2 33638 81.8583 A34 S
448 447 1 2 Mellinger, Miss. Madeleine Violet female 13 0 1 250644 19.5 S
449 448 1 1 Seward, Mr. Frederic Kimber male 34 0 0 113794 26.55 S
450 449 1 3 Baclini, Miss. Marie Catherine female 5 2 1 2666 19.2583 C
451 450 1 1 Peuchen, Major. Arthur Godfrey male 52 0 0 113786 30.5 C104 S
452 451 0 2 West, Mr. Edwy Arthur male 36 1 2 C.A. 34651 27.75 S
453 452 0 3 Hagland, Mr. Ingvald Olai Olsen male 1 0 65303 19.9667 S
454 453 0 1 Foreman, Mr. Benjamin Laventall male 30 0 0 113051 27.75 C111 C
455 454 1 1 Goldenberg, Mr. Samuel L male 49 1 0 17453 89.1042 C92 C
456 455 0 3 Peduzzi, Mr. Joseph male 0 0 A/5 2817 8.05 S
457 456 1 3 Jalsevac, Mr. Ivan male 29 0 0 349240 7.8958 C
458 457 0 1 Millet, Mr. Francis Davis male 65 0 0 13509 26.55 E38 S
459 458 1 1 Kenyon, Mrs. Frederick R (Marion) female 1 0 17464 51.8625 D21 S
460 459 1 2 Toomey, Miss. Ellen female 50 0 0 F.C.C. 13531 10.5 S
461 460 0 3 O'Connor, Mr. Maurice male 0 0 371060 7.75 Q
462 461 1 1 Anderson, Mr. Harry male 48 0 0 19952 26.55 E12 S
463 462 0 3 Morley, Mr. William male 34 0 0 364506 8.05 S
464 463 0 1 Gee, Mr. Arthur H male 47 0 0 111320 38.5 E63 S
465 464 0 2 Milling, Mr. Jacob Christian male 48 0 0 234360 13 S
466 465 0 3 Maisner, Mr. Simon male 0 0 A/S 2816 8.05 S
467 466 0 3 Goncalves, Mr. Manuel Estanslas male 38 0 0 SOTON/O.Q. 3101306 7.05 S
468 467 0 2 Campbell, Mr. William male 0 0 239853 0 S
469 468 0 1 Smart, Mr. John Montgomery male 56 0 0 113792 26.55 S
470 469 0 3 Scanlan, Mr. James male 0 0 36209 7.725 Q
471 470 1 3 Baclini, Miss. Helene Barbara female 0.75 2 1 2666 19.2583 C
472 471 0 3 Keefe, Mr. Arthur male 0 0 323592 7.25 S
473 472 0 3 Cacic, Mr. Luka male 38 0 0 315089 8.6625 S
474 473 1 2 West, Mrs. Edwy Arthur (Ada Mary Worth) female 33 1 2 C.A. 34651 27.75 S
475 474 1 2 Jerwan, Mrs. Amin S (Marie Marthe Thuillard) female 23 0 0 SC/AH Basle 541 13.7917 D C
476 475 0 3 Strandberg, Miss. Ida Sofia female 22 0 0 7553 9.8375 S
477 476 0 1 Clifford, Mr. George Quincy male 0 0 110465 52 A14 S
478 477 0 2 Renouf, Mr. Peter Henry male 34 1 0 31027 21 S
479 478 0 3 Braund, Mr. Lewis Richard male 29 1 0 3460 7.0458 S
480 479 0 3 Karlsson, Mr. Nils August male 22 0 0 350060 7.5208 S
481 480 1 3 Hirvonen, Miss. Hildur E female 2 0 1 3101298 12.2875 S
482 481 0 3 Goodwin, Master. Harold Victor male 9 5 2 CA 2144 46.9 S
483 482 0 2 Frost, Mr. Anthony Wood "Archie" male 0 0 239854 0 S
484 483 0 3 Rouse, Mr. Richard Henry male 50 0 0 A/5 3594 8.05 S
485 484 1 3 Turkula, Mrs. (Hedwig) female 63 0 0 4134 9.5875 S
486 485 1 1 Bishop, Mr. Dickinson H male 25 1 0 11967 91.0792 B49 C
487 486 0 3 Lefebre, Miss. Jeannie female 3 1 4133 25.4667 S
488 487 1 1 Hoyt, Mrs. Frederick Maxfield (Jane Anne Forby) female 35 1 0 19943 90 C93 S
489 488 0 1 Kent, Mr. Edward Austin male 58 0 0 11771 29.7 B37 C
490 489 0 3 Somerton, Mr. Francis William male 30 0 0 A.5. 18509 8.05 S
491 490 1 3 Coutts, Master. Eden Leslie "Neville" male 9 1 1 C.A. 37671 15.9 S
492 491 0 3 Hagland, Mr. Konrad Mathias Reiersen male 1 0 65304 19.9667 S
493 492 0 3 Windelov, Mr. Einar male 21 0 0 SOTON/OQ 3101317 7.25 S
494 493 0 1 Molson, Mr. Harry Markland male 55 0 0 113787 30.5 C30 S
495 494 0 1 Artagaveytia, Mr. Ramon male 71 0 0 PC 17609 49.5042 C
496 495 0 3 Stanley, Mr. Edward Roland male 21 0 0 A/4 45380 8.05 S
497 496 0 3 Yousseff, Mr. Gerious male 0 0 2627 14.4583 C
498 497 1 1 Eustis, Miss. Elizabeth Mussey female 54 1 0 36947 78.2667 D20 C
499 498 0 3 Shellard, Mr. Frederick William male 0 0 C.A. 6212 15.1 S
500 499 0 1 Allison, Mrs. Hudson J C (Bessie Waldo Daniels) female 25 1 2 113781 151.55 C22 C26 S
501 500 0 3 Svensson, Mr. Olof male 24 0 0 350035 7.7958 S
502 501 0 3 Calic, Mr. Petar male 17 0 0 315086 8.6625 S
503 502 0 3 Canavan, Miss. Mary female 21 0 0 364846 7.75 Q
504 503 0 3 O'Sullivan, Miss. Bridget Mary female 0 0 330909 7.6292 Q
505 504 0 3 Laitinen, Miss. Kristina Sofia female 37 0 0 4135 9.5875 S
506 505 1 1 Maioni, Miss. Roberta female 16 0 0 110152 86.5 B79 S
507 506 0 1 Penasco y Castellana, Mr. Victor de Satode male 18 1 0 PC 17758 108.9 C65 C
508 507 1 2 Quick, Mrs. Frederick Charles (Jane Richards) female 33 0 2 26360 26 S
509 508 1 1 Bradley, Mr. George ("George Arthur Brayton") male 0 0 111427 26.55 S
510 509 0 3 Olsen, Mr. Henry Margido male 28 0 0 C 4001 22.525 S
511 510 1 3 Lang, Mr. Fang male 26 0 0 1601 56.4958 S
512 511 1 3 Daly, Mr. Eugene Patrick male 29 0 0 382651 7.75 Q
513 512 0 3 Webber, Mr. James male 0 0 SOTON/OQ 3101316 8.05 S
514 513 1 1 McGough, Mr. James Robert male 36 0 0 PC 17473 26.2875 E25 S
515 514 1 1 Rothschild, Mrs. Martin (Elizabeth L. Barrett) female 54 1 0 PC 17603 59.4 C
516 515 0 3 Coleff, Mr. Satio male 24 0 0 349209 7.4958 S
517 516 0 1 Walker, Mr. William Anderson male 47 0 0 36967 34.0208 D46 S
518 517 1 2 Lemore, Mrs. (Amelia Milley) female 34 0 0 C.A. 34260 10.5 F33 S
519 518 0 3 Ryan, Mr. Patrick male 0 0 371110 24.15 Q
520 519 1 2 Angle, Mrs. William A (Florence "Mary" Agnes Hughes) female 36 1 0 226875 26 S
521 520 0 3 Pavlovic, Mr. Stefo male 32 0 0 349242 7.8958 S
522 521 1 1 Perreault, Miss. Anne female 30 0 0 12749 93.5 B73 S
523 522 0 3 Vovk, Mr. Janko male 22 0 0 349252 7.8958 S
524 523 0 3 Lahoud, Mr. Sarkis male 0 0 2624 7.225 C
525 524 1 1 Hippach, Mrs. Louis Albert (Ida Sophia Fischer) female 44 0 1 111361 57.9792 B18 C
526 525 0 3 Kassem, Mr. Fared male 0 0 2700 7.2292 C
527 526 0 3 Farrell, Mr. James male 40.5 0 0 367232 7.75 Q
528 527 1 2 Ridsdale, Miss. Lucy female 50 0 0 W./C. 14258 10.5 S
529 528 0 1 Farthing, Mr. John male 0 0 PC 17483 221.7792 C95 S
530 529 0 3 Salonen, Mr. Johan Werner male 39 0 0 3101296 7.925 S
531 530 0 2 Hocking, Mr. Richard George male 23 2 1 29104 11.5 S
532 531 1 2 Quick, Miss. Phyllis May female 2 1 1 26360 26 S
533 532 0 3 Toufik, Mr. Nakli male 0 0 2641 7.2292 C
534 533 0 3 Elias, Mr. Joseph Jr male 17 1 1 2690 7.2292 C
535 534 1 3 Peter, Mrs. Catherine (Catherine Rizk) female 0 2 2668 22.3583 C
536 535 0 3 Cacic, Miss. Marija female 30 0 0 315084 8.6625 S
537 536 1 2 Hart, Miss. Eva Miriam female 7 0 2 F.C.C. 13529 26.25 S
538 537 0 1 Butt, Major. Archibald Willingham male 45 0 0 113050 26.55 B38 S
539 538 1 1 LeRoy, Miss. Bertha female 30 0 0 PC 17761 106.425 C
540 539 0 3 Risien, Mr. Samuel Beard male 0 0 364498 14.5 S
541 540 1 1 Frolicher, Miss. Hedwig Margaritha female 22 0 2 13568 49.5 B39 C
542 541 1 1 Crosby, Miss. Harriet R female 36 0 2 WE/P 5735 71 B22 S
543 542 0 3 Andersson, Miss. Ingeborg Constanzia female 9 4 2 347082 31.275 S
544 543 0 3 Andersson, Miss. Sigrid Elisabeth female 11 4 2 347082 31.275 S
545 544 1 2 Beane, Mr. Edward male 32 1 0 2908 26 S
546 545 0 1 Douglas, Mr. Walter Donald male 50 1 0 PC 17761 106.425 C86 C
547 546 0 1 Nicholson, Mr. Arthur Ernest male 64 0 0 693 26 S
548 547 1 2 Beane, Mrs. Edward (Ethel Clarke) female 19 1 0 2908 26 S
549 548 1 2 Padro y Manent, Mr. Julian male 0 0 SC/PARIS 2146 13.8625 C
550 549 0 3 Goldsmith, Mr. Frank John male 33 1 1 363291 20.525 S
551 550 1 2 Davies, Master. John Morgan Jr male 8 1 1 C.A. 33112 36.75 S
552 551 1 1 Thayer, Mr. John Borland Jr male 17 0 2 17421 110.8833 C70 C
553 552 0 2 Sharp, Mr. Percival James R male 27 0 0 244358 26 S
554 553 0 3 O'Brien, Mr. Timothy male 0 0 330979 7.8292 Q
555 554 1 3 Leeni, Mr. Fahim ("Philip Zenni") male 22 0 0 2620 7.225 C
556 555 1 3 Ohman, Miss. Velin female 22 0 0 347085 7.775 S
557 556 0 1 Wright, Mr. George male 62 0 0 113807 26.55 S
558 557 1 1 Duff Gordon, Lady. (Lucille Christiana Sutherland) ("Mrs Morgan") female 48 1 0 11755 39.6 A16 C
559 558 0 1 Robbins, Mr. Victor male 0 0 PC 17757 227.525 C
560 559 1 1 Taussig, Mrs. Emil (Tillie Mandelbaum) female 39 1 1 110413 79.65 E67 S
561 560 1 3 de Messemaeker, Mrs. Guillaume Joseph (Emma) female 36 1 0 345572 17.4 S
562 561 0 3 Morrow, Mr. Thomas Rowan male 0 0 372622 7.75 Q
563 562 0 3 Sivic, Mr. Husein male 40 0 0 349251 7.8958 S
564 563 0 2 Norman, Mr. Robert Douglas male 28 0 0 218629 13.5 S
565 564 0 3 Simmons, Mr. John male 0 0 SOTON/OQ 392082 8.05 S
566 565 0 3 Meanwell, Miss. (Marion Ogden) female 0 0 SOTON/O.Q. 392087 8.05 S
567 566 0 3 Davies, Mr. Alfred J male 24 2 0 A/4 48871 24.15 S
568 567 0 3 Stoytcheff, Mr. Ilia male 19 0 0 349205 7.8958 S
569 568 0 3 Palsson, Mrs. Nils (Alma Cornelia Berglund) female 29 0 4 349909 21.075 S
570 569 0 3 Doharr, Mr. Tannous male 0 0 2686 7.2292 C
571 570 1 3 Jonsson, Mr. Carl male 32 0 0 350417 7.8542 S
572 571 1 2 Harris, Mr. George male 62 0 0 S.W./PP 752 10.5 S
573 572 1 1 Appleton, Mrs. Edward Dale (Charlotte Lamson) female 53 2 0 11769 51.4792 C101 S
574 573 1 1 Flynn, Mr. John Irwin ("Irving") male 36 0 0 PC 17474 26.3875 E25 S
575 574 1 3 Kelly, Miss. Mary female 0 0 14312 7.75 Q
576 575 0 3 Rush, Mr. Alfred George John male 16 0 0 A/4. 20589 8.05 S
577 576 0 3 Patchett, Mr. George male 19 0 0 358585 14.5 S
578 577 1 2 Garside, Miss. Ethel female 34 0 0 243880 13 S
579 578 1 1 Silvey, Mrs. William Baird (Alice Munger) female 39 1 0 13507 55.9 E44 S
580 579 0 3 Caram, Mrs. Joseph (Maria Elias) female 1 0 2689 14.4583 C
581 580 1 3 Jussila, Mr. Eiriik male 32 0 0 STON/O 2. 3101286 7.925 S
582 581 1 2 Christy, Miss. Julie Rachel female 25 1 1 237789 30 S
583 582 1 1 Thayer, Mrs. John Borland (Marian Longstreth Morris) female 39 1 1 17421 110.8833 C68 C
584 583 0 2 Downton, Mr. William James male 54 0 0 28403 26 S
585 584 0 1 Ross, Mr. John Hugo male 36 0 0 13049 40.125 A10 C
586 585 0 3 Paulner, Mr. Uscher male 0 0 3411 8.7125 C
587 586 1 1 Taussig, Miss. Ruth female 18 0 2 110413 79.65 E68 S
588 587 0 2 Jarvis, Mr. John Denzil male 47 0 0 237565 15 S
589 588 1 1 Frolicher-Stehli, Mr. Maxmillian male 60 1 1 13567 79.2 B41 C
590 589 0 3 Gilinski, Mr. Eliezer male 22 0 0 14973 8.05 S
591 590 0 3 Murdlin, Mr. Joseph male 0 0 A./5. 3235 8.05 S
592 591 0 3 Rintamaki, Mr. Matti male 35 0 0 STON/O 2. 3101273 7.125 S
593 592 1 1 Stephenson, Mrs. Walter Bertram (Martha Eustis) female 52 1 0 36947 78.2667 D20 C
594 593 0 3 Elsbury, Mr. William James male 47 0 0 A/5 3902 7.25 S
595 594 0 3 Bourke, Miss. Mary female 0 2 364848 7.75 Q
596 595 0 2 Chapman, Mr. John Henry male 37 1 0 SC/AH 29037 26 S
597 596 0 3 Van Impe, Mr. Jean Baptiste male 36 1 1 345773 24.15 S
598 597 1 2 Leitch, Miss. Jessie Wills female 0 0 248727 33 S
599 598 0 3 Johnson, Mr. Alfred male 49 0 0 LINE 0 S
600 599 0 3 Boulos, Mr. Hanna male 0 0 2664 7.225 C
601 600 1 1 Duff Gordon, Sir. Cosmo Edmund ("Mr Morgan") male 49 1 0 PC 17485 56.9292 A20 C
602 601 1 2 Jacobsohn, Mrs. Sidney Samuel (Amy Frances Christy) female 24 2 1 243847 27 S
603 602 0 3 Slabenoff, Mr. Petco male 0 0 349214 7.8958 S
604 603 0 1 Harrington, Mr. Charles H male 0 0 113796 42.4 S
605 604 0 3 Torber, Mr. Ernst William male 44 0 0 364511 8.05 S
606 605 1 1 Homer, Mr. Harry ("Mr E Haven") male 35 0 0 111426 26.55 C
607 606 0 3 Lindell, Mr. Edvard Bengtsson male 36 1 0 349910 15.55 S
608 607 0 3 Karaic, Mr. Milan male 30 0 0 349246 7.8958 S
609 608 1 1 Daniel, Mr. Robert Williams male 27 0 0 113804 30.5 S
610 609 1 2 Laroche, Mrs. Joseph (Juliette Marie Louise Lafargue) female 22 1 2 SC/Paris 2123 41.5792 C
611 610 1 1 Shutes, Miss. Elizabeth W female 40 0 0 PC 17582 153.4625 C125 S
612 611 0 3 Andersson, Mrs. Anders Johan (Alfrida Konstantia Brogren) female 39 1 5 347082 31.275 S
613 612 0 3 Jardin, Mr. Jose Neto male 0 0 SOTON/O.Q. 3101305 7.05 S
614 613 1 3 Murphy, Miss. Margaret Jane female 1 0 367230 15.5 Q
615 614 0 3 Horgan, Mr. John male 0 0 370377 7.75 Q
616 615 0 3 Brocklebank, Mr. William Alfred male 35 0 0 364512 8.05 S
617 616 1 2 Herman, Miss. Alice female 24 1 2 220845 65 S
618 617 0 3 Danbom, Mr. Ernst Gilbert male 34 1 1 347080 14.4 S
619 618 0 3 Lobb, Mrs. William Arthur (Cordelia K Stanlick) female 26 1 0 A/5. 3336 16.1 S
620 619 1 2 Becker, Miss. Marion Louise female 4 2 1 230136 39 F4 S
621 620 0 2 Gavey, Mr. Lawrence male 26 0 0 31028 10.5 S
622 621 0 3 Yasbeck, Mr. Antoni male 27 1 0 2659 14.4542 C
623 622 1 1 Kimball, Mr. Edwin Nelson Jr male 42 1 0 11753 52.5542 D19 S
624 623 1 3 Nakid, Mr. Sahid male 20 1 1 2653 15.7417 C
625 624 0 3 Hansen, Mr. Henry Damsgaard male 21 0 0 350029 7.8542 S
626 625 0 3 Bowen, Mr. David John "Dai" male 21 0 0 54636 16.1 S
627 626 0 1 Sutton, Mr. Frederick male 61 0 0 36963 32.3208 D50 S
628 627 0 2 Kirkland, Rev. Charles Leonard male 57 0 0 219533 12.35 Q
629 628 1 1 Longley, Miss. Gretchen Fiske female 21 0 0 13502 77.9583 D9 S
630 629 0 3 Bostandyeff, Mr. Guentcho male 26 0 0 349224 7.8958 S
631 630 0 3 O'Connell, Mr. Patrick D male 0 0 334912 7.7333 Q
632 631 1 1 Barkworth, Mr. Algernon Henry Wilson male 80 0 0 27042 30 A23 S
633 632 0 3 Lundahl, Mr. Johan Svensson male 51 0 0 347743 7.0542 S
634 633 1 1 Stahelin-Maeglin, Dr. Max male 32 0 0 13214 30.5 B50 C
635 634 0 1 Parr, Mr. William Henry Marsh male 0 0 112052 0 S
636 635 0 3 Skoog, Miss. Mabel female 9 3 2 347088 27.9 S
637 636 1 2 Davis, Miss. Mary female 28 0 0 237668 13 S
638 637 0 3 Leinonen, Mr. Antti Gustaf male 32 0 0 STON/O 2. 3101292 7.925 S
639 638 0 2 Collyer, Mr. Harvey male 31 1 1 C.A. 31921 26.25 S
640 639 0 3 Panula, Mrs. Juha (Maria Emilia Ojala) female 41 0 5 3101295 39.6875 S
641 640 0 3 Thorneycroft, Mr. Percival male 1 0 376564 16.1 S
642 641 0 3 Jensen, Mr. Hans Peder male 20 0 0 350050 7.8542 S
643 642 1 1 Sagesser, Mlle. Emma female 24 0 0 PC 17477 69.3 B35 C
644 643 0 3 Skoog, Miss. Margit Elizabeth female 2 3 2 347088 27.9 S
645 644 1 3 Foo, Mr. Choong male 0 0 1601 56.4958 S
646 645 1 3 Baclini, Miss. Eugenie female 0.75 2 1 2666 19.2583 C
647 646 1 1 Harper, Mr. Henry Sleeper male 48 1 0 PC 17572 76.7292 D33 C
648 647 0 3 Cor, Mr. Liudevit male 19 0 0 349231 7.8958 S
649 648 1 1 Simonius-Blumer, Col. Oberst Alfons male 56 0 0 13213 35.5 A26 C
650 649 0 3 Willey, Mr. Edward male 0 0 S.O./P.P. 751 7.55 S
651 650 1 3 Stanley, Miss. Amy Zillah Elsie female 23 0 0 CA. 2314 7.55 S
652 651 0 3 Mitkoff, Mr. Mito male 0 0 349221 7.8958 S
653 652 1 2 Doling, Miss. Elsie female 18 0 1 231919 23 S
654 653 0 3 Kalvik, Mr. Johannes Halvorsen male 21 0 0 8475 8.4333 S
655 654 1 3 O'Leary, Miss. Hanora "Norah" female 0 0 330919 7.8292 Q
656 655 0 3 Hegarty, Miss. Hanora "Nora" female 18 0 0 365226 6.75 Q
657 656 0 2 Hickman, Mr. Leonard Mark male 24 2 0 S.O.C. 14879 73.5 S
658 657 0 3 Radeff, Mr. Alexander male 0 0 349223 7.8958 S
659 658 0 3 Bourke, Mrs. John (Catherine) female 32 1 1 364849 15.5 Q
660 659 0 2 Eitemiller, Mr. George Floyd male 23 0 0 29751 13 S
661 660 0 1 Newell, Mr. Arthur Webster male 58 0 2 35273 113.275 D48 C
662 661 1 1 Frauenthal, Dr. Henry William male 50 2 0 PC 17611 133.65 S
663 662 0 3 Badt, Mr. Mohamed male 40 0 0 2623 7.225 C
664 663 0 1 Colley, Mr. Edward Pomeroy male 47 0 0 5727 25.5875 E58 S
665 664 0 3 Coleff, Mr. Peju male 36 0 0 349210 7.4958 S
666 665 1 3 Lindqvist, Mr. Eino William male 20 1 0 STON/O 2. 3101285 7.925 S
667 666 0 2 Hickman, Mr. Lewis male 32 2 0 S.O.C. 14879 73.5 S
668 667 0 2 Butler, Mr. Reginald Fenton male 25 0 0 234686 13 S
669 668 0 3 Rommetvedt, Mr. Knud Paust male 0 0 312993 7.775 S
670 669 0 3 Cook, Mr. Jacob male 43 0 0 A/5 3536 8.05 S
671 670 1 1 Taylor, Mrs. Elmer Zebley (Juliet Cummins Wright) female 1 0 19996 52 C126 S
672 671 1 2 Brown, Mrs. Thomas William Solomon (Elizabeth Catherine Ford) female 40 1 1 29750 39 S
673 672 0 1 Davidson, Mr. Thornton male 31 1 0 F.C. 12750 52 B71 S
674 673 0 2 Mitchell, Mr. Henry Michael male 70 0 0 C.A. 24580 10.5 S
675 674 1 2 Wilhelms, Mr. Charles male 31 0 0 244270 13 S
676 675 0 2 Watson, Mr. Ennis Hastings male 0 0 239856 0 S
677 676 0 3 Edvardsson, Mr. Gustaf Hjalmar male 18 0 0 349912 7.775 S
678 677 0 3 Sawyer, Mr. Frederick Charles male 24.5 0 0 342826 8.05 S
679 678 1 3 Turja, Miss. Anna Sofia female 18 0 0 4138 9.8417 S
680 679 0 3 Goodwin, Mrs. Frederick (Augusta Tyler) female 43 1 6 CA 2144 46.9 S
681 680 1 1 Cardeza, Mr. Thomas Drake Martinez male 36 0 1 PC 17755 512.3292 B51 B53 B55 C
682 681 0 3 Peters, Miss. Katie female 0 0 330935 8.1375 Q
683 682 1 1 Hassab, Mr. Hammad male 27 0 0 PC 17572 76.7292 D49 C
684 683 0 3 Olsvigen, Mr. Thor Anderson male 20 0 0 6563 9.225 S
685 684 0 3 Goodwin, Mr. Charles Edward male 14 5 2 CA 2144 46.9 S
686 685 0 2 Brown, Mr. Thomas William Solomon male 60 1 1 29750 39 S
687 686 0 2 Laroche, Mr. Joseph Philippe Lemercier male 25 1 2 SC/Paris 2123 41.5792 C
688 687 0 3 Panula, Mr. Jaako Arnold male 14 4 1 3101295 39.6875 S
689 688 0 3 Dakic, Mr. Branko male 19 0 0 349228 10.1708 S
690 689 0 3 Fischer, Mr. Eberhard Thelander male 18 0 0 350036 7.7958 S
691 690 1 1 Madill, Miss. Georgette Alexandra female 15 0 1 24160 211.3375 B5 S
692 691 1 1 Dick, Mr. Albert Adrian male 31 1 0 17474 57 B20 S
693 692 1 3 Karun, Miss. Manca female 4 0 1 349256 13.4167 C
694 693 1 3 Lam, Mr. Ali male 0 0 1601 56.4958 S
695 694 0 3 Saad, Mr. Khalil male 25 0 0 2672 7.225 C
696 695 0 1 Weir, Col. John male 60 0 0 113800 26.55 S
697 696 0 2 Chapman, Mr. Charles Henry male 52 0 0 248731 13.5 S
698 697 0 3 Kelly, Mr. James male 44 0 0 363592 8.05 S
699 698 1 3 Mullens, Miss. Katherine "Katie" female 0 0 35852 7.7333 Q
700 699 0 1 Thayer, Mr. John Borland male 49 1 1 17421 110.8833 C68 C
701 700 0 3 Humblen, Mr. Adolf Mathias Nicolai Olsen male 42 0 0 348121 7.65 F G63 S
702 701 1 1 Astor, Mrs. John Jacob (Madeleine Talmadge Force) female 18 1 0 PC 17757 227.525 C62 C64 C
703 702 1 1 Silverthorne, Mr. Spencer Victor male 35 0 0 PC 17475 26.2875 E24 S
704 703 0 3 Barbara, Miss. Saiide female 18 0 1 2691 14.4542 C
705 704 0 3 Gallagher, Mr. Martin male 25 0 0 36864 7.7417 Q
706 705 0 3 Hansen, Mr. Henrik Juul male 26 1 0 350025 7.8542 S
707 706 0 2 Morley, Mr. Henry Samuel ("Mr Henry Marshall") male 39 0 0 250655 26 S
708 707 1 2 Kelly, Mrs. Florence "Fannie" female 45 0 0 223596 13.5 S
709 708 1 1 Calderhead, Mr. Edward Pennington male 42 0 0 PC 17476 26.2875 E24 S
710 709 1 1 Cleaver, Miss. Alice female 22 0 0 113781 151.55 S
711 710 1 3 Moubarek, Master. Halim Gonios ("William George") male 1 1 2661 15.2458 C
712 711 1 1 Mayne, Mlle. Berthe Antonine ("Mrs de Villiers") female 24 0 0 PC 17482 49.5042 C90 C
713 712 0 1 Klaber, Mr. Herman male 0 0 113028 26.55 C124 S
714 713 1 1 Taylor, Mr. Elmer Zebley male 48 1 0 19996 52 C126 S
715 714 0 3 Larsson, Mr. August Viktor male 29 0 0 7545 9.4833 S
716 715 0 2 Greenberg, Mr. Samuel male 52 0 0 250647 13 S
717 716 0 3 Soholt, Mr. Peter Andreas Lauritz Andersen male 19 0 0 348124 7.65 F G73 S
718 717 1 1 Endres, Miss. Caroline Louise female 38 0 0 PC 17757 227.525 C45 C
719 718 1 2 Troutt, Miss. Edwina Celia "Winnie" female 27 0 0 34218 10.5 E101 S
720 719 0 3 McEvoy, Mr. Michael male 0 0 36568 15.5 Q
721 720 0 3 Johnson, Mr. Malkolm Joackim male 33 0 0 347062 7.775 S
722 721 1 2 Harper, Miss. Annie Jessie "Nina" female 6 0 1 248727 33 S
723 722 0 3 Jensen, Mr. Svend Lauritz male 17 1 0 350048 7.0542 S
724 723 0 2 Gillespie, Mr. William Henry male 34 0 0 12233 13 S
725 724 0 2 Hodges, Mr. Henry Price male 50 0 0 250643 13 S
726 725 1 1 Chambers, Mr. Norman Campbell male 27 1 0 113806 53.1 E8 S
727 726 0 3 Oreskovic, Mr. Luka male 20 0 0 315094 8.6625 S
728 727 1 2 Renouf, Mrs. Peter Henry (Lillian Jefferys) female 30 3 0 31027 21 S
729 728 1 3 Mannion, Miss. Margareth female 0 0 36866 7.7375 Q
730 729 0 2 Bryhl, Mr. Kurt Arnold Gottfrid male 25 1 0 236853 26 S
731 730 0 3 Ilmakangas, Miss. Pieta Sofia female 25 1 0 STON/O2. 3101271 7.925 S
732 731 1 1 Allen, Miss. Elisabeth Walton female 29 0 0 24160 211.3375 B5 S
733 732 0 3 Hassan, Mr. Houssein G N male 11 0 0 2699 18.7875 C
734 733 0 2 Knight, Mr. Robert J male 0 0 239855 0 S
735 734 0 2 Berriman, Mr. William John male 23 0 0 28425 13 S
736 735 0 2 Troupiansky, Mr. Moses Aaron male 23 0 0 233639 13 S
737 736 0 3 Williams, Mr. Leslie male 28.5 0 0 54636 16.1 S
738 737 0 3 Ford, Mrs. Edward (Margaret Ann Watson) female 48 1 3 W./C. 6608 34.375 S
739 738 1 1 Lesurer, Mr. Gustave J male 35 0 0 PC 17755 512.3292 B101 C
740 739 0 3 Ivanoff, Mr. Kanio male 0 0 349201 7.8958 S
741 740 0 3 Nankoff, Mr. Minko male 0 0 349218 7.8958 S
742 741 1 1 Hawksford, Mr. Walter James male 0 0 16988 30 D45 S
743 742 0 1 Cavendish, Mr. Tyrell William male 36 1 0 19877 78.85 C46 S
744 743 1 1 Ryerson, Miss. Susan Parker "Suzette" female 21 2 2 PC 17608 262.375 B57 B59 B63 B66 C
745 744 0 3 McNamee, Mr. Neal male 24 1 0 376566 16.1 S
746 745 1 3 Stranden, Mr. Juho male 31 0 0 STON/O 2. 3101288 7.925 S
747 746 0 1 Crosby, Capt. Edward Gifford male 70 1 1 WE/P 5735 71 B22 S
748 747 0 3 Abbott, Mr. Rossmore Edward male 16 1 1 C.A. 2673 20.25 S
749 748 1 2 Sinkkonen, Miss. Anna female 30 0 0 250648 13 S
750 749 0 1 Marvin, Mr. Daniel Warner male 19 1 0 113773 53.1 D30 S
751 750 0 3 Connaghton, Mr. Michael male 31 0 0 335097 7.75 Q
752 751 1 2 Wells, Miss. Joan female 4 1 1 29103 23 S
753 752 1 3 Moor, Master. Meier male 6 0 1 392096 12.475 E121 S
754 753 0 3 Vande Velde, Mr. Johannes Joseph male 33 0 0 345780 9.5 S
755 754 0 3 Jonkoff, Mr. Lalio male 23 0 0 349204 7.8958 S
756 755 1 2 Herman, Mrs. Samuel (Jane Laver) female 48 1 2 220845 65 S
757 756 1 2 Hamalainen, Master. Viljo male 0.67 1 1 250649 14.5 S
758 757 0 3 Carlsson, Mr. August Sigfrid male 28 0 0 350042 7.7958 S
759 758 0 2 Bailey, Mr. Percy Andrew male 18 0 0 29108 11.5 S
760 759 0 3 Theobald, Mr. Thomas Leonard male 34 0 0 363294 8.05 S
761 760 1 1 Rothes, the Countess. of (Lucy Noel Martha Dyer-Edwards) female 33 0 0 110152 86.5 B77 S
762 761 0 3 Garfirth, Mr. John male 0 0 358585 14.5 S
763 762 0 3 Nirva, Mr. Iisakki Antino Aijo male 41 0 0 SOTON/O2 3101272 7.125 S
764 763 1 3 Barah, Mr. Hanna Assi male 20 0 0 2663 7.2292 C
765 764 1 1 Carter, Mrs. William Ernest (Lucile Polk) female 36 1 2 113760 120 B96 B98 S
766 765 0 3 Eklund, Mr. Hans Linus male 16 0 0 347074 7.775 S
767 766 1 1 Hogeboom, Mrs. John C (Anna Andrews) female 51 1 0 13502 77.9583 D11 S
768 767 0 1 Brewe, Dr. Arthur Jackson male 0 0 112379 39.6 C
769 768 0 3 Mangan, Miss. Mary female 30.5 0 0 364850 7.75 Q
770 769 0 3 Moran, Mr. Daniel J male 1 0 371110 24.15 Q
771 770 0 3 Gronnestad, Mr. Daniel Danielsen male 32 0 0 8471 8.3625 S
772 771 0 3 Lievens, Mr. Rene Aime male 24 0 0 345781 9.5 S
773 772 0 3 Jensen, Mr. Niels Peder male 48 0 0 350047 7.8542 S
774 773 0 2 Mack, Mrs. (Mary) female 57 0 0 S.O./P.P. 3 10.5 E77 S
775 774 0 3 Elias, Mr. Dibo male 0 0 2674 7.225 C
776 775 1 2 Hocking, Mrs. Elizabeth (Eliza Needs) female 54 1 3 29105 23 S
777 776 0 3 Myhrman, Mr. Pehr Fabian Oliver Malkolm male 18 0 0 347078 7.75 S
778 777 0 3 Tobin, Mr. Roger male 0 0 383121 7.75 F38 Q
779 778 1 3 Emanuel, Miss. Virginia Ethel female 5 0 0 364516 12.475 S
780 779 0 3 Kilgannon, Mr. Thomas J male 0 0 36865 7.7375 Q
781 780 1 1 Robert, Mrs. Edward Scott (Elisabeth Walton McMillan) female 43 0 1 24160 211.3375 B3 S
782 781 1 3 Ayoub, Miss. Banoura female 13 0 0 2687 7.2292 C
783 782 1 1 Dick, Mrs. Albert Adrian (Vera Gillespie) female 17 1 0 17474 57 B20 S
784 783 0 1 Long, Mr. Milton Clyde male 29 0 0 113501 30 D6 S
785 784 0 3 Johnston, Mr. Andrew G male 1 2 W./C. 6607 23.45 S
786 785 0 3 Ali, Mr. William male 25 0 0 SOTON/O.Q. 3101312 7.05 S
787 786 0 3 Harmer, Mr. Abraham (David Lishin) male 25 0 0 374887 7.25 S
788 787 1 3 Sjoblom, Miss. Anna Sofia female 18 0 0 3101265 7.4958 S
789 788 0 3 Rice, Master. George Hugh male 8 4 1 382652 29.125 Q
790 789 1 3 Dean, Master. Bertram Vere male 1 1 2 C.A. 2315 20.575 S
791 790 0 1 Guggenheim, Mr. Benjamin male 46 0 0 PC 17593 79.2 B82 B84 C
792 791 0 3 Keane, Mr. Andrew "Andy" male 0 0 12460 7.75 Q
793 792 0 2 Gaskell, Mr. Alfred male 16 0 0 239865 26 S
794 793 0 3 Sage, Miss. Stella Anna female 8 2 CA. 2343 69.55 S
795 794 0 1 Hoyt, Mr. William Fisher male 0 0 PC 17600 30.6958 C
796 795 0 3 Dantcheff, Mr. Ristiu male 25 0 0 349203 7.8958 S
797 796 0 2 Otter, Mr. Richard male 39 0 0 28213 13 S
798 797 1 1 Leader, Dr. Alice (Farnham) female 49 0 0 17465 25.9292 D17 S
799 798 1 3 Osman, Mrs. Mara female 31 0 0 349244 8.6833 S
800 799 0 3 Ibrahim Shawah, Mr. Yousseff male 30 0 0 2685 7.2292 C
801 800 0 3 Van Impe, Mrs. Jean Baptiste (Rosalie Paula Govaert) female 30 1 1 345773 24.15 S
802 801 0 2 Ponesell, Mr. Martin male 34 0 0 250647 13 S
803 802 1 2 Collyer, Mrs. Harvey (Charlotte Annie Tate) female 31 1 1 C.A. 31921 26.25 S
804 803 1 1 Carter, Master. William Thornton II male 11 1 2 113760 120 B96 B98 S
805 804 1 3 Thomas, Master. Assad Alexander male 0.42 0 1 2625 8.5167 C
806 805 1 3 Hedman, Mr. Oskar Arvid male 27 0 0 347089 6.975 S
807 806 0 3 Johansson, Mr. Karl Johan male 31 0 0 347063 7.775 S
808 807 0 1 Andrews, Mr. Thomas Jr male 39 0 0 112050 0 A36 S
809 808 0 3 Pettersson, Miss. Ellen Natalia female 18 0 0 347087 7.775 S
810 809 0 2 Meyer, Mr. August male 39 0 0 248723 13 S
811 810 1 1 Chambers, Mrs. Norman Campbell (Bertha Griggs) female 33 1 0 113806 53.1 E8 S
812 811 0 3 Alexander, Mr. William male 26 0 0 3474 7.8875 S
813 812 0 3 Lester, Mr. James male 39 0 0 A/4 48871 24.15 S
814 813 0 2 Slemen, Mr. Richard James male 35 0 0 28206 10.5 S
815 814 0 3 Andersson, Miss. Ebba Iris Alfrida female 6 4 2 347082 31.275 S
816 815 0 3 Tomlin, Mr. Ernest Portage male 30.5 0 0 364499 8.05 S
817 816 0 1 Fry, Mr. Richard male 0 0 112058 0 B102 S
818 817 0 3 Heininen, Miss. Wendla Maria female 23 0 0 STON/O2. 3101290 7.925 S
819 818 0 2 Mallet, Mr. Albert male 31 1 1 S.C./PARIS 2079 37.0042 C
820 819 0 3 Holm, Mr. John Fredrik Alexander male 43 0 0 C 7075 6.45 S
821 820 0 3 Skoog, Master. Karl Thorsten male 10 3 2 347088 27.9 S
822 821 1 1 Hays, Mrs. Charles Melville (Clara Jennings Gregg) female 52 1 1 12749 93.5 B69 S
823 822 1 3 Lulic, Mr. Nikola male 27 0 0 315098 8.6625 S
824 823 0 1 Reuchlin, Jonkheer. John George male 38 0 0 19972 0 S
825 824 1 3 Moor, Mrs. (Beila) female 27 0 1 392096 12.475 E121 S
826 825 0 3 Panula, Master. Urho Abraham male 2 4 1 3101295 39.6875 S
827 826 0 3 Flynn, Mr. John male 0 0 368323 6.95 Q
828 827 0 3 Lam, Mr. Len male 0 0 1601 56.4958 S
829 828 1 2 Mallet, Master. Andre male 1 0 2 S.C./PARIS 2079 37.0042 C
830 829 1 3 McCormack, Mr. Thomas Joseph male 0 0 367228 7.75 Q
831 830 1 1 Stone, Mrs. George Nelson (Martha Evelyn) female 62 0 0 113572 80 B28
832 831 1 3 Yasbeck, Mrs. Antoni (Selini Alexander) female 15 1 0 2659 14.4542 C
833 832 1 2 Richards, Master. George Sibley male 0.83 1 1 29106 18.75 S
834 833 0 3 Saad, Mr. Amin male 0 0 2671 7.2292 C
835 834 0 3 Augustsson, Mr. Albert male 23 0 0 347468 7.8542 S
836 835 0 3 Allum, Mr. Owen George male 18 0 0 2223 8.3 S
837 836 1 1 Compton, Miss. Sara Rebecca female 39 1 1 PC 17756 83.1583 E49 C
838 837 0 3 Pasic, Mr. Jakob male 21 0 0 315097 8.6625 S
839 838 0 3 Sirota, Mr. Maurice male 0 0 392092 8.05 S
840 839 1 3 Chip, Mr. Chang male 32 0 0 1601 56.4958 S
841 840 1 1 Marechal, Mr. Pierre male 0 0 11774 29.7 C47 C
842 841 0 3 Alhomaki, Mr. Ilmari Rudolf male 20 0 0 SOTON/O2 3101287 7.925 S
843 842 0 2 Mudd, Mr. Thomas Charles male 16 0 0 S.O./P.P. 3 10.5 S
844 843 1 1 Serepeca, Miss. Augusta female 30 0 0 113798 31 C
845 844 0 3 Lemberopolous, Mr. Peter L male 34.5 0 0 2683 6.4375 C
846 845 0 3 Culumovic, Mr. Jeso male 17 0 0 315090 8.6625 S
847 846 0 3 Abbing, Mr. Anthony male 42 0 0 C.A. 5547 7.55 S
848 847 0 3 Sage, Mr. Douglas Bullen male 8 2 CA. 2343 69.55 S
849 848 0 3 Markoff, Mr. Marin male 35 0 0 349213 7.8958 C
850 849 0 2 Harper, Rev. John male 28 0 1 248727 33 S
851 850 1 1 Goldenberg, Mrs. Samuel L (Edwiga Grabowska) female 1 0 17453 89.1042 C92 C
852 851 0 3 Andersson, Master. Sigvard Harald Elias male 4 4 2 347082 31.275 S
853 852 0 3 Svensson, Mr. Johan male 74 0 0 347060 7.775 S
854 853 0 3 Boulos, Miss. Nourelain female 9 1 1 2678 15.2458 C
855 854 1 1 Lines, Miss. Mary Conover female 16 0 1 PC 17592 39.4 D28 S
856 855 0 2 Carter, Mrs. Ernest Courtenay (Lilian Hughes) female 44 1 0 244252 26 S
857 856 1 3 Aks, Mrs. Sam (Leah Rosen) female 18 0 1 392091 9.35 S
858 857 1 1 Wick, Mrs. George Dennick (Mary Hitchcock) female 45 1 1 36928 164.8667 S
859 858 1 1 Daly, Mr. Peter Denis male 51 0 0 113055 26.55 E17 S
860 859 1 3 Baclini, Mrs. Solomon (Latifa Qurban) female 24 0 3 2666 19.2583 C
861 860 0 3 Razi, Mr. Raihed male 0 0 2629 7.2292 C
862 861 0 3 Hansen, Mr. Claus Peter male 41 2 0 350026 14.1083 S
863 862 0 2 Giles, Mr. Frederick Edward male 21 1 0 28134 11.5 S
864 863 1 1 Swift, Mrs. Frederick Joel (Margaret Welles Barron) female 48 0 0 17466 25.9292 D17 S
865 864 0 3 Sage, Miss. Dorothy Edith "Dolly" female 8 2 CA. 2343 69.55 S
866 865 0 2 Gill, Mr. John William male 24 0 0 233866 13 S
867 866 1 2 Bystrom, Mrs. (Karolina) female 42 0 0 236852 13 S
868 867 1 2 Duran y More, Miss. Asuncion female 27 1 0 SC/PARIS 2149 13.8583 C
869 868 0 1 Roebling, Mr. Washington Augustus II male 31 0 0 PC 17590 50.4958 A24 S
870 869 0 3 van Melkebeke, Mr. Philemon male 0 0 345777 9.5 S
871 870 1 3 Johnson, Master. Harold Theodor male 4 1 1 347742 11.1333 S
872 871 0 3 Balkic, Mr. Cerin male 26 0 0 349248 7.8958 S
873 872 1 1 Beckwith, Mrs. Richard Leonard (Sallie Monypeny) female 47 1 1 11751 52.5542 D35 S
874 873 0 1 Carlsson, Mr. Frans Olof male 33 0 0 695 5 B51 B53 B55 S
875 874 0 3 Vander Cruyssen, Mr. Victor male 47 0 0 345765 9 S
876 875 1 2 Abelson, Mrs. Samuel (Hannah Wizosky) female 28 1 0 P/PP 3381 24 C
877 876 1 3 Najib, Miss. Adele Kiamie "Jane" female 15 0 0 2667 7.225 C
878 877 0 3 Gustafsson, Mr. Alfred Ossian male 20 0 0 7534 9.8458 S
879 878 0 3 Petroff, Mr. Nedelio male 19 0 0 349212 7.8958 S
880 879 0 3 Laleff, Mr. Kristo male 0 0 349217 7.8958 S
881 880 1 1 Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) female 56 0 1 11767 83.1583 C50 C
882 881 1 2 Shelley, Mrs. William (Imanita Parrish Hall) female 25 0 1 230433 26 S
883 882 0 3 Markun, Mr. Johann male 33 0 0 349257 7.8958 S
884 883 0 3 Dahlberg, Miss. Gerda Ulrika female 22 0 0 7552 10.5167 S
885 884 0 2 Banfield, Mr. Frederick James male 28 0 0 C.A./SOTON 34068 10.5 S
886 885 0 3 Sutehall, Mr. Henry Jr male 25 0 0 SOTON/OQ 392076 7.05 S
887 886 0 3 Rice, Mrs. William (Margaret Norton) female 39 0 5 382652 29.125 Q
888 887 0 2 Montvila, Rev. Juozas male 27 0 0 211536 13 S
889 888 1 1 Graham, Miss. Margaret Edith female 19 0 0 112053 30 B42 S
890 889 0 3 Johnston, Miss. Catherine Helen "Carrie" female 1 2 W./C. 6607 23.45 S
891 890 1 1 Behr, Mr. Karl Howell male 26 0 0 111369 30 C148 C
892 891 0 3 Dooley, Mr. Patrick male 32 0 0 370376 7.75 Q

207
data_exploration/explore.py Normal file
View File

@@ -0,0 +1,207 @@
#import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
plt.style.use('seaborn-colorblind')
# 2018.11.07 Created by Eamon.Zhang
def get_dtypes(data,drop_col=[]):
"""Return the dtypes for each column of a pandas Dataframe
Parameters
----------
data : pandas Dataframe
drop_col : columns to omit in a list
Returns
-------
str_var_list, num_var_list, all_var_list
"""
name_of_col = list(data.columns)
num_var_list = []
str_var_list = []
all_var_list = []
str_var_list = name_of_col.copy()
for var in name_of_col:
# check if column belongs to numeric type
if (data[var].dtypes in (np.int, np.int64, np.uint, np.int32, np.float,
np.float64, np.float32, np.double)):
str_var_list.remove(var)
num_var_list.append(var)
# drop the omit column from list
for var in drop_col:
if var in str_var_list:
str_var_list.remove(var)
if var in num_var_list:
num_var_list.remove(var)
all_var_list.extend(str_var_list)
all_var_list.extend(num_var_list)
return str_var_list, num_var_list, all_var_list
def describe(data,output_path=None):
"""output the general description of a pandas Dataframe
into a csv file
"""
result = data.describe(include='all')
if output_path is not None:
output = os.path.join(output_path,'describe.csv')
result.to_csv(output)
print('result saved at:', str(output))
return result
def discrete_var_barplot(x,y,data,output_path=None):
"""draw the barplot of a discrete variable x against y(target variable).
By default the bar shows the mean value of y.
Parameters
----------
Returns
-------
figure save as PNG
"""
plt.figure(figsize=(15,10))
sns.barplot(x=x,y=y,data=data)
if output_path is not None:
output = os.path.join(output_path,'Barplot_'+str(x)+'_'+str(y)+'.png')
plt.savefig(output)
print('Image saved at', str(output))
def discrete_var_countplot(x,data,output_path=None):
"""draw the countplot of a discrete variable x.
Parameters
----------
Returns
-------
figure save as PNG
"""
plt.figure(figsize=(15,10))
sns.countplot(x=x,data=data)
if output_path is not None:
output = os.path.join(output_path,'Countplot_'+str(x)+'.png')
plt.savefig(output)
print('Image saved at',str(output))
def discrete_var_boxplot(x,y,data,output_path=None):
"""draw the boxplot of a discrete variable x against y.
Parameters
----------
Returns
-------
figure save as PNG
"""
plt.figure(figsize=(15,10))
sns.boxplot(x=x,y=y,data=data)
if output_path is not None:
output = os.path.join(output_path,'Boxplot_'+str(x)+'_'+str(y)+'.png')
plt.savefig(output)
print('Image saved at',str(output))
def continuous_var_distplot(x,output_path=None,bins=None):
"""draw the distplot of a continuous variable x.
Parameters
----------
Returns
-------
figure save as PNG
"""
plt.figure(figsize=(15,10))
sns.distplot(a=x,kde=False,bins=bins)
if output_path is not None:
output=os.path.join(output_path,'Distplot_'+str(x.name)+'.png')
plt.savefig(output)
print('Image saved at',str(output))
# 2018.11.28 Created by Eamon.Zhang
def scatter_plot(x,y,data,output_path=None):
"""draw the scatter-plot of two variables.
Parameters
----------
Returns
-------
figure save as PNG
"""
plt.figure(figsize=(15,10))
sns.scatterplot(x=x,y=y,data=data)
if output_path is not None:
output = os.path.join(output_path,'Scatter_plot_'+str(x.name)+'_'+str(y.name)+'.png')
plt.savefig(output)
print('Image saved at',str(output))
def correlation_plot(data,output_path=None):
"""draw the correlation plot between variables.
Parameters
----------
Returns
-------
figure save as PNG
"""
corrmat = data.corr()
fig, ax = plt.subplots()
fig.set_size_inches(11,11)
sns.heatmap(corrmat,cmap="YlGnBu",linewidths=.5,annot=True)
if output_path is not None:
output = os.path.join(output_path,'Corr_plot'+'.png')
plt.savefig(output)
print('Image saved at',str(output))
def heatmap(data,output_path=None,fmt='d'):
"""draw the heatmap between 2 variables.
Parameters
----------
Returns
-------
figure save as PNG
"""
fig, ax = plt.subplots()
fig.set_size_inches(11,11)
sns.heatmap(data,cmap="YlGnBu",linewidths=.5,annot=True,fmt=fmt)
if output_path is not None:
output = os.path.join(output_path,'Heatmap'+'.png')
plt.savefig(output)
print('Image saved at',str(output))

View File

@@ -0,0 +1,122 @@
import pandas as pd
import numpy as np
from warnings import warn
# 2018.11.07 Created by Eamon.Zhang
def check_missing(data,output_path=None):
"""
check the total number & percentage of missing values
per variable of a pandas Dataframe
"""
result = pd.concat([data.isnull().sum(),data.isnull().mean()],axis=1)
result = result.rename(index=str,columns={0:'total missing',1:'proportion'})
if output_path is not None:
result.to_csv(output_path+'missing.csv')
print('result saved at', output_path, 'missing.csv')
return result
def drop_missing(data,axis=0):
"""
Listwise deletion:
excluding all cases (listwise) that have missing values
Parameters
----------
axis: drop cases(0)/columns(1),default 0
Returns
-------
Pandas dataframe with missing cases/columns dropped
"""
data_copy = data.copy(deep=True)
data_copy = data_copy.dropna(axis=axis,inplace=False)
return data_copy
def add_var_denote_NA(data,NA_col=[]):
"""
creating an additional variable indicating whether the data
was missing for that observation (1) or not (0).
"""
data_copy = data.copy(deep=True)
for i in NA_col:
if data_copy[i].isnull().sum()>0:
data_copy[i+'_is_NA'] = np.where(data_copy[i].isnull(),1,0)
else:
warn("Column %s has no missing cases" % i)
return data_copy
def impute_NA_with_arbitrary(data,impute_value,NA_col=[]):
"""
replacing NA with arbitrary values.
"""
data_copy = data.copy(deep=True)
for i in NA_col:
if data_copy[i].isnull().sum()>0:
data_copy[i+'_'+str(impute_value)] = data_copy[i].fillna(impute_value)
else:
warn("Column %s has no missing cases" % i)
return data_copy
def impute_NA_with_avg(data,strategy='mean',NA_col=[]):
"""
replacing the NA with mean/median/most frequent values of that variable.
Note it should only be performed over training set and then propagated to test set.
"""
data_copy = data.copy(deep=True)
for i in NA_col:
if data_copy[i].isnull().sum()>0:
if strategy=='mean':
data_copy[i+'_impute_mean'] = data_copy[i].fillna(data[i].mean())
elif strategy=='median':
data_copy[i+'_impute_median'] = data_copy[i].fillna(data[i].median())
elif strategy=='mode':
data_copy[i+'_impute_mode'] = data_copy[i].fillna(data[i].mode()[0])
else:
warn("Column %s has no missing" % i)
return data_copy
def impute_NA_with_end_of_distribution(data,NA_col=[]):
"""
replacing the NA by values that are at the far end of the distribution of that variable
calculated by mean + 3*std
"""
data_copy = data.copy(deep=True)
for i in NA_col:
if data_copy[i].isnull().sum()>0:
data_copy[i+'_impute_end_of_distri'] = data_copy[i].fillna(data[i].mean()+3*data[i].std())
else:
warn("Column %s has no missing" % i)
return data_copy
def impute_NA_with_random(data,NA_col=[],random_state=0):
"""
replacing the NA with random sampling from the pool of available observations of the variable
"""
data_copy = data.copy(deep=True)
for i in NA_col:
if data_copy[i].isnull().sum()>0:
data_copy[i+'_random'] = data_copy[i]
# extract the random sample to fill the na
random_sample = data_copy[i].dropna().sample(data_copy[i].isnull().sum(), random_state=random_state)
random_sample.index = data_copy[data_copy[i].isnull()].index
data_copy.loc[data_copy[i].isnull(), str(i)+'_random'] = random_sample
else:
warn("Column %s has no missing" % i)
return data_copy

138
feature_cleaning/outlier.py Normal file
View File

@@ -0,0 +1,138 @@
import pandas as pd
import numpy as np
# from warnings import warn
# 2018.11.07 Created by Eamon.Zhang
def outlier_detect_arbitrary(data,col,upper_fence,lower_fence):
'''
identify outliers based on arbitrary boundaries passed to the function.
'''
para = (upper_fence, lower_fence)
tmp = pd.concat([data[col]>upper_fence,data[col]<lower_fence],axis=1)
outlier_index = tmp.any(axis=1)
print('Num of outlier detected:',outlier_index.value_counts()[1])
print('Proportion of outlier detected',outlier_index.value_counts()[1]/len(outlier_index))
return outlier_index, para
def outlier_detect_IQR(data,col,threshold=3):
'''
outlier detection by Interquartile Ranges Rule, also known as Tukey's test.
calculate the IQR ( 75th quantile - 25th quantile)
and the 25th 75th quantile.
Any value beyond:
upper bound = 75th quantile + IQR * threshold
lower bound = 25th quantile - IQR * threshold
are regarded as outliers. Default threshold is 3.
'''
IQR = data[col].quantile(0.75) - data[col].quantile(0.25)
Lower_fence = data[col].quantile(0.25) - (IQR * threshold)
Upper_fence = data[col].quantile(0.75) + (IQR * threshold)
para = (Upper_fence, Lower_fence)
tmp = pd.concat([data[col]>Upper_fence,data[col]<Lower_fence],axis=1)
outlier_index = tmp.any(axis=1)
print('Num of outlier detected:',outlier_index.value_counts()[1])
print('Proportion of outlier detected',outlier_index.value_counts()[1]/len(outlier_index))
return outlier_index, para
def outlier_detect_mean_std(data,col,threshold=3):
'''
outlier detection by Mean and Standard Deviation Method.
If a value is a certain number(called threshold) of standard deviations away
from the mean, that data point is identified as an outlier.
Default threshold is 3.
This method can fail to detect outliers because the outliers increase the standard deviation.
The more extreme the outlier, the more the standard deviation is affected.
'''
Upper_fence = data[col].mean() + threshold * data[col].std()
Lower_fence = data[col].mean() - threshold * data[col].std()
para = (Upper_fence, Lower_fence)
tmp = pd.concat([data[col]>Upper_fence,data[col]<Lower_fence],axis=1)
outlier_index = tmp.any(axis=1)
print('Num of outlier detected:',outlier_index.value_counts()[1])
print('Proportion of outlier detected',outlier_index.value_counts()[1]/len(outlier_index))
return outlier_index, para
def outlier_detect_MAD(data,col,threshold=3.5):
"""
outlier detection by Median and Median Absolute Deviation Method (MAD)
The median of the residuals is calculated. Then, the difference is calculated between each historical value and this median.
These differences are expressed as their absolute values, and a new median is calculated and multiplied by
an empirically derived constant to yield the median absolute deviation (MAD).
If a value is a certain number of MAD away from the median of the residuals,
that value is classified as an outlier. The default threshold is 3 MAD.
This method is generally more effective than the mean and standard deviation method for detecting outliers,
but it can be too aggressive in classifying values that are not really extremely different.
Also, if more than 50% of the data points have the same value, MAD is computed to be 0,
so any value different from the residual median is classified as an outlier.
"""
median = data[col].median()
median_absolute_deviation = np.median([np.abs(y - median) for y in data[col]])
modified_z_scores = pd.Series([0.6745 * (y - median) / median_absolute_deviation for y in data[col]])
outlier_index = np.abs(modified_z_scores) > threshold
print('Num of outlier detected:',outlier_index.value_counts()[1])
print('Proportion of outlier detected',outlier_index.value_counts()[1]/len(outlier_index))
return outlier_index
# 2018.11.10 outlier treatment
def impute_outlier_with_arbitrary(data,outlier_index,value,col=[]):
"""
impute outliers with arbitrary value
"""
data_copy = data.copy(deep=True)
for i in col:
data_copy.loc[outlier_index,i] = value
return data_copy
def windsorization(data,col,para,strategy='both'):
"""
top-coding & bottom coding (capping the maximum of a distribution at an arbitrarily set value,vice versa)
"""
data_copy = data.copy(deep=True)
if strategy == 'both':
data_copy.loc[data_copy[col]>para[0],col] = para[0]
data_copy.loc[data_copy[col]<para[1],col] = para[1]
elif strategy == 'top':
data_copy.loc[data_copy[col]>para[0],col] = para[0]
elif strategy == 'bottom':
data_copy.loc[data_copy[col]<para[1],col] = para[1]
return data_copy
def drop_outlier(data,outlier_index):
"""
drop the cases that are outliers
"""
data_copy = data[~outlier_index]
return data_copy
def impute_outlier_with_avg(data,col,outlier_index,strategy='mean'):
"""
impute outlier with mean/median/most frequent values of that variable.
"""
data_copy = data.copy(deep=True)
if strategy=='mean':
data_copy.loc[outlier_index,col] = data_copy[col].mean()
elif strategy=='median':
data_copy.loc[outlier_index,col] = data_copy[col].median()
elif strategy=='mode':
data_copy.loc[outlier_index,col] = data_copy[col].mode()[0]
return data_copy

View File

@@ -0,0 +1,247 @@
import pandas as pd
# import numpy as np
# from warnings import warn
# 2018.11.07 Created by Eamon.Zhang
# 2018.11.12 change into fit() transform() format
class GroupingRareValues():
"""
Grouping the observations that show rare labels into a unique category ('rare')
Parameters
----------
"""
def __init__(self, mapping=None, cols=None, threshold=0.01):
self.cols = cols
self.mapping = mapping
self._dim = None
self.threshold = threshold
def fit(self, X, y=None, **kwargs):
"""Fit encoder according to X and y.
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples
and n_features is the number of features.
y : array-like, shape = [n_samples]
Target values.
Returns
-------
self : encoder
Returns self.
"""
self._dim = X.shape[1]
_, categories = self.grouping(
X,
mapping=self.mapping,
cols=self.cols,
threshold=self.threshold
)
self.mapping = categories
return self
def transform(self, X):
"""Perform the transformation to new categorical data.
Will use the mapping (if available) and the column list to encode the
data.
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Returns
-------
X : Transformed values with encoding applied.
"""
if self._dim is None:
raise ValueError('Must train encoder before it can be used to transform data.')
# make sure that it is the right size
if X.shape[1] != self._dim:
raise ValueError('Unexpected input dimension %d, expected %d' % (X.shape[1], self._dim,))
X, _ = self.grouping(
X,
mapping=self.mapping,
cols=self.cols,
threshold=self.threshold
)
return X
def grouping(self, X_in, threshold, mapping=None, cols=None):
"""
Grouping the observations that show rare labels into a unique category ('rare')
"""
X = X_in.copy(deep=True)
# if cols is None:
# cols = X.columns.values
if mapping is not None: # transform
mapping_out = mapping
for i in mapping:
column = i.get('col') # get the column name
X[column] = X[column].map(i['mapping'])
# try:
# X[column] = X[column].astype(int)
# except ValueError as e:
# X[column] = X[column].astype(float)
else: # fit
mapping_out = []
for col in cols:
# if util.is_category(X[col].dtype):
# categories = X[col].cat.categories
# else:
temp_df = pd.Series(X[col].value_counts()/len(X))
mapping = { k: ('rare' if k not in temp_df[temp_df >= threshold].index else k)
for k in temp_df.index}
mapping = pd.Series(mapping)
mapping_out.append({'col': col, 'mapping': mapping, 'data_type': X[col].dtype}, )
return X, mapping_out
#==============================================================================
# def rare_imputation(X_train, X_test, variable):
#
# # find the most frequent category
# frequent_cat = X_train.groupby(variable)[variable].count().sort_values().tail(1).index.values[0]
#
# # find rare labels
# temp = X_train.groupby([variable])[variable].count()/np.float(len(X_train))
# rare_cat = [x for x in temp.loc[temp<0.05].index.values]
#
# # create new variables, with Rare labels imputed
#
# # by the most frequent category
# X_train[variable+'_freq_imp'] = np.where(X_train[variable].isin(rare_cat), frequent_cat, X_train[variable])
# X_test[variable+'_freq_imp'] = np.where(X_test[variable].isin(rare_cat), frequent_cat, X_test[variable])
#
# # by adding a new label 'Rare'
# X_train[variable+'_rare_imp'] = np.where(X_train[variable].isin(rare_cat), 'Rare', X_train[variable])
# X_test[variable+'_rare_imp'] = np.where(X_test[variable].isin(rare_cat), 'Rare', X_test[variable])
#==============================================================================
# 2018.11.26 created by Eamon.Zhang
class ModeImputation():
"""
Replacing the rare label by most frequent label
Parameters
----------
"""
def __init__(self, mapping=None, cols=None, threshold=0.01):
self.cols = cols
self.mapping = mapping
self._dim = None
self.threshold = threshold
def fit(self, X, y=None, **kwargs):
"""Fit encoder according to X and y.
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples
and n_features is the number of features.
y : array-like, shape = [n_samples]
Target values.
Returns
-------
self : encoder
Returns self.
"""
self._dim = X.shape[1]
_, categories = self.impute_with_mode(
X,
mapping=self.mapping,
cols=self.cols,
threshold=self.threshold
)
self.mapping = categories
return self
def transform(self, X):
"""Perform the transformation to new categorical data.
Will use the mapping (if available) and the column list to encode the
data.
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Returns
-------
X : Transformed values with encoding applied.
"""
if self._dim is None:
raise ValueError('Must train encoder before it can be used to transform data.')
# make sure that it is the right size
if X.shape[1] != self._dim:
raise ValueError('Unexpected input dimension %d, expected %d' % (X.shape[1], self._dim,))
X, _ = self.impute_with_mode(
X,
mapping=self.mapping,
cols=self.cols,
threshold=self.threshold
)
return X
def impute_with_mode(self, X_in, threshold, mapping=None, cols=None):
"""
Grouping the observations that show rare labels into a unique category ('rare')
"""
X = X_in.copy(deep=True)
# if cols is None:
# cols = X.columns.values
if mapping is not None: # transform
mapping_out = mapping
for i in mapping:
column = i.get('col') # get the column name
X[column] = X[column].map(i['mapping'])
# try:
# X[column] = X[column].astype(int)
# except ValueError as e:
# X[column] = X[column].astype(float)
else: # fit
mapping_out = []
for col in cols:
# if util.is_category(X[col].dtype):
# categories = X[col].cat.categories
# else:
temp_df = pd.Series(X[col].value_counts()/len(X))
median = X[col].mode()[0]
mapping = { k: (median if k not in temp_df[temp_df >= threshold].index else k)
for k in temp_df.index}
mapping = pd.Series(mapping)
mapping_out.append({'col': col, 'mapping': mapping, 'data_type': X[col].dtype}, )
return X, mapping_out

View File

@@ -0,0 +1,329 @@
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score
import numpy as np
# from warnings import warn
# 2018.11.17 Created by Eamon.Zhang
# ChiMerge method modeified from https://github.com/tatsumiw/ChiMerge/blob/master/ChiMerge.py
# TODO: add more constraits to the discretized result.
class ChiMerge():
"""
supervised discretization using the ChiMerge method.
Parameters
----------
confidenceVal: number
default=3.841, correspond to p=0.05 dof=1
num_of_bins: int
number of bins after discretize
col: str
the column to be performed
"""
def __init__(self, col=None, bins=None, confidenceVal=3.841, num_of_bins=10):
self.col = col
self._dim = None
self.confidenceVal = confidenceVal
self.bins = bins
self.num_of_bins = num_of_bins
def fit(self, X, y, **kwargs):
"""Fit encoder according to X and y.
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples
and n_features is the number of features.
y : array-like, shape = [n_samples]
Target values.
Returns
-------
self : encoder
Returns self.
"""
self._dim = X.shape[1]
_, bins = self.chimerge(
X_in=X,
y=y,
confidenceVal=self.confidenceVal,
col=self.col,
num_of_bins=self.num_of_bins
)
self.bins = bins
return self
def transform(self, X):
"""Perform the transformation to new data.
Will use the tree model and the column list to discretize the
column.
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Returns
-------
X : new dataframe with discretized new column.
"""
if self._dim is None:
raise ValueError('Must train encoder before it can be used to transform data.')
# make sure that it is the right size
if X.shape[1] != self._dim:
raise ValueError('Unexpected input dimension %d, expected %d' % (X.shape[1], self._dim,))
X, _ = self.chimerge(
X_in=X,
col=self.col,
bins=self.bins
)
return X
def chimerge(self, X_in, y=None, confidenceVal=None, num_of_bins=None, col=None, bins=None):
"""
discretize a variable using ChiMerge
"""
X = X_in.copy(deep=True)
if bins is not None: # transform
try:
X[col+'_chimerge'] = pd.cut(X[col],bins=bins,include_lowest=True)
except Exception as e:
print(e)
else: # fit
try:
# create an array which save the num of 0/1 samples of the column to be chimerge
total_num = X.groupby([col])[y].count()
total_num = pd.DataFrame({'total_num': total_num})
positive_class = X.groupby([col])[y].sum()
positive_class = pd.DataFrame({'positive_class': positive_class})
regroup = pd.merge(total_num, positive_class, left_index=True, right_index=True,how='inner')
regroup.reset_index(inplace=True)
regroup['negative_class'] = regroup['total_num'] - regroup['positive_class']
regroup = regroup.drop('total_num', axis=1)
np_regroup = np.array(regroup)
# merge interval that have 0 pos/neg samples
i = 0
while (i <= np_regroup.shape[0] - 2):
if ((np_regroup[i, 1] == 0 and np_regroup[i + 1, 1] == 0) or ( np_regroup[i, 2] == 0 and np_regroup[i + 1, 2] == 0)):
np_regroup[i, 1] = np_regroup[i, 1] + np_regroup[i + 1, 1] # pos
np_regroup[i, 2] = np_regroup[i, 2] + np_regroup[i + 1, 2] # neg
np_regroup[i, 0] = np_regroup[i + 1, 0]
np_regroup = np.delete(np_regroup, i + 1, 0)
i = i - 1
i = i + 1
# calculate chi for neighboring intervals
# ∑[(yA-yB)²/yB]
chi_table = np.array([])
for i in np.arange(np_regroup.shape[0] - 1):
chi = (np_regroup[i, 1] * np_regroup[i + 1, 2] - np_regroup[i, 2] * np_regroup[i + 1, 1]) ** 2 \
* (np_regroup[i, 1] + np_regroup[i, 2] + np_regroup[i + 1, 1] + np_regroup[i + 1, 2]) / \
((np_regroup[i, 1] + np_regroup[i, 2]) * (np_regroup[i + 1, 1] + np_regroup[i + 1, 2]) * (
np_regroup[i, 1] + np_regroup[i + 1, 1]) * (np_regroup[i, 2] + np_regroup[i + 1, 2]))
chi_table = np.append(chi_table, chi)
# merge intervals that have closing chi
while (1):
if (len(chi_table) <= (num_of_bins - 1) and min(chi_table) >= confidenceVal):
break
chi_min_index = np.argwhere(chi_table == min(chi_table))[0]
np_regroup[chi_min_index, 1] = np_regroup[chi_min_index, 1] + np_regroup[chi_min_index + 1, 1]
np_regroup[chi_min_index, 2] = np_regroup[chi_min_index, 2] + np_regroup[chi_min_index + 1, 2]
np_regroup[chi_min_index, 0] = np_regroup[chi_min_index + 1, 0]
np_regroup = np.delete(np_regroup, chi_min_index + 1, 0)
if (chi_min_index == np_regroup.shape[0] - 1):
chi_table[chi_min_index - 1] = (np_regroup[chi_min_index - 1, 1] * np_regroup[chi_min_index, 2] - np_regroup[chi_min_index - 1, 2] * np_regroup[chi_min_index, 1]) ** 2 \
* (np_regroup[chi_min_index - 1, 1] + np_regroup[chi_min_index - 1, 2] + np_regroup[chi_min_index, 1] + np_regroup[chi_min_index, 2]) / \
((np_regroup[chi_min_index - 1, 1] + np_regroup[chi_min_index - 1, 2]) * (np_regroup[chi_min_index, 1] + np_regroup[chi_min_index, 2]) * (np_regroup[chi_min_index - 1, 1] + np_regroup[chi_min_index, 1]) * (np_regroup[chi_min_index - 1, 2] + np_regroup[chi_min_index, 2]))
chi_table = np.delete(chi_table, chi_min_index, axis=0)
else:
chi_table[chi_min_index - 1] = (np_regroup[chi_min_index - 1, 1] * np_regroup[chi_min_index, 2] - np_regroup[chi_min_index - 1, 2] * np_regroup[chi_min_index, 1]) ** 2 \
* (np_regroup[chi_min_index - 1, 1] + np_regroup[chi_min_index - 1, 2] + np_regroup[chi_min_index, 1] + np_regroup[chi_min_index, 2]) / \
((np_regroup[chi_min_index - 1, 1] + np_regroup[chi_min_index - 1, 2]) * (np_regroup[chi_min_index, 1] + np_regroup[chi_min_index, 2]) * (np_regroup[chi_min_index - 1, 1] + np_regroup[chi_min_index, 1]) * (np_regroup[chi_min_index - 1, 2] + np_regroup[chi_min_index, 2]))
chi_table[chi_min_index] = (np_regroup[chi_min_index, 1] * np_regroup[chi_min_index + 1, 2] - np_regroup[chi_min_index, 2] * np_regroup[chi_min_index + 1, 1]) ** 2 \
* (np_regroup[chi_min_index, 1] + np_regroup[chi_min_index, 2] + np_regroup[chi_min_index + 1, 1] + np_regroup[chi_min_index + 1, 2]) / \
((np_regroup[chi_min_index, 1] + np_regroup[chi_min_index, 2]) * (np_regroup[chi_min_index + 1, 1] + np_regroup[chi_min_index + 1, 2]) * (np_regroup[chi_min_index, 1] + np_regroup[chi_min_index + 1, 1]) * (np_regroup[chi_min_index, 2] + np_regroup[chi_min_index + 1, 2]))
chi_table = np.delete(chi_table, chi_min_index + 1, axis=0)
result_data = pd.DataFrame()
result_data['variable'] = [col] * np_regroup.shape[0]
bins = []
tmp = []
for i in np.arange(np_regroup.shape[0]):
if i == 0:
y = '-inf' + ',' + str(np_regroup[i, 0])
#x = np_regroup[i, 0]
#list_temp.append(x)
elif i == np_regroup.shape[0] - 1:
y = str(np_regroup[i - 1, 0]) + '+'
#x = 100000000.
#list_temp.append(x)
else:
y = str(np_regroup[i - 1, 0]) + ',' + str(np_regroup[i, 0])
#x = np_regroup[i, 0]
#list_temp.append(x)
bins.append(np_regroup[i - 1, 0])
tmp.append(y)
#list_temp.append(df[variable].max()+0.1)
bins.append(X[col].min()-0.1)
result_data['interval'] = tmp
result_data['flag_0'] = np_regroup[:, 2]
result_data['flag_1'] = np_regroup[:, 1]
bins.sort(reverse=False)
print('Interval for variable %s' % col)
print(result_data)
except Exception as e:
print(e)
return X, bins
# 2018.11.15 Created by Eamon.Zhang
class DiscretizeByDecisionTree():
"""
Discretisation with Decision Trees consists of using a decision tree
to identify the optimal splitting points that would determine the bins
or contiguous intervals:
1.train a decision tree of limited depth (2, 3 or 4) using the variable
we want to discretise to predict the target.
2.the original variable values are then replaced by the
probability returned by the tree.
Parameters
----------
col: str
column to discretise
max_depth: int or list of int
max depth of the tree. Can be an int or a list of int we want the tree model to search
for the optimal depth.
"""
def __init__(self, col=None, max_depth=None, tree_model=None):
self.col = col
self._dim = None
self.max_depth = max_depth
self.tree_model = tree_model
def fit(self, X, y, **kwargs):
"""Fit encoder according to X and y.
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples
and n_features is the number of features.
y : array-like, shape = [n_samples]
Target values.
Returns
-------
self : encoder
Returns self.
"""
self._dim = X.shape[1]
_, tree = self.discretize(
X_in=X,
y=y,
max_depth=self.max_depth,
col=self.col,
tree_model=self.tree_model
)
self.tree_model = tree
return self
def transform(self, X):
"""Perform the transformation to new categorical data.
Will use the tree model and the column list to discretize the
column.
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Returns
-------
X : new dataframe with discretized new column.
"""
if self._dim is None:
raise ValueError('Must train encoder before it can be used to transform data.')
# make sure that it is the right size
if X.shape[1] != self._dim:
raise ValueError('Unexpected input dimension %d, expected %d' % (X.shape[1], self._dim,))
X, _ = self.discretize(
X_in=X,
col=self.col,
tree_model=self.tree_model
)
return X
def discretize(self, X_in, y=None, max_depth=None, tree_model=None, col=None):
"""
discretize a variable using DecisionTreeClassifier
"""
X = X_in.copy(deep=True)
if tree_model is not None: # transform
X[col+'_tree_discret'] = tree_model.predict_proba(X[col].to_frame())[:,1]
else: # fit
if isinstance(max_depth,int):
tree_model = DecisionTreeClassifier(max_depth=max_depth)
tree_model.fit(X[col].to_frame(), y)
# X[col+'_tree_discret'] = tree_model.predict_proba(X[col].to_frame())[:,1]
#print(x.tree_discret.unique())
# bins = pd.concat( [X.groupby([col+'_tree_discret'])[col].min(),
# X.groupby([col+'_tree_discret'])[col].max()], axis=1)
# print('bins:')
# print(bins)
elif len(max_depth)>1:
score_ls = [] # here I will store the roc auc
score_std_ls = [] # here I will store the standard deviation of the roc_auc
for tree_depth in max_depth:
tree_model = DecisionTreeClassifier(max_depth=tree_depth)
scores = cross_val_score(tree_model, X[col].to_frame(), y, cv=3, scoring='roc_auc')
score_ls.append(np.mean(scores))
score_std_ls.append(np.std(scores))
temp = pd.concat([pd.Series(max_depth), pd.Series(score_ls), pd.Series(score_std_ls)], axis=1)
temp.columns = ['depth', 'roc_auc_mean', 'roc_auc_std']
print('result ROC-AUC for each depth')
print(temp)
max_roc = temp.roc_auc_mean.max()
optimal_depth=temp[temp.roc_auc_mean==max_roc]['depth'].values
print('optimal_depth:',optimal_depth)
tree_model = DecisionTreeClassifier(max_depth=optimal_depth)
tree_model.fit(X[col].to_frame(), y)
# bins = pd.concat( [X.groupby([col+'_tree_discret'])[col].min(),
# X.groupby([col+'_tree_discret'])[col].max()], axis=1)
# print('bins:')
# print(bins)
else:
raise ValueError('max_depth of a tree must be an integer or a list')
return X, tree_model

View File

@@ -0,0 +1,109 @@
import pandas as pd
# 2018.11.28 Created by Eamon.Zhang
class MeanEncoding():
"""
replacing the label by the mean of the target for that label.
Parameters
----------
"""
def __init__(self, mapping=None, cols=None):
self.cols = cols
self.mapping = mapping
self._dim = None
# self.threshold = threshold
def fit(self, X, y=None, **kwargs):
"""Fit encoder according to X and y.
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples
and n_features is the number of features.
y : array-like, shape = [n_samples]
Target values.
Returns
-------
self : encoder
Returns self.
"""
self._dim = X.shape[1]
_, categories = self.mean_encoding(
X,
y,
mapping=self.mapping,
cols=self.cols
# threshold=self.threshold
)
self.mapping = categories
return self
def transform(self, X):
"""Perform the transformation to new categorical data.
Will use the mapping (if available) and the column list to encode the
data.
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Returns
-------
X : Transformed values with encoding applied.
"""
if self._dim is None:
raise ValueError('Must train encoder before it can be used to transform data.')
# make sure that it is the right size
if X.shape[1] != self._dim:
raise ValueError('Unexpected input dimension %d, expected %d' % (X.shape[1], self._dim,))
X, _ = self.mean_encoding(
X,
mapping=self.mapping,
cols=self.cols
# threshold=self.threshold
)
return X
def mean_encoding(self, X_in, y=None, mapping=None, cols=None):
"""
Grouping the observations that show rare labels into a unique category ('rare')
"""
X = X_in.copy(deep=True)
# if cols is None:
# cols = X.columns.values
if mapping is not None: # transform
mapping_out = mapping
for i in mapping:
column = i.get('col') # get the column name
X[column] = X[column].map(i['mapping'])
# try:
# X[column] = X[column].astype(int)
# except ValueError as e:
# X[column] = X[column].astype(float)
else: # fit
mapping_out = []
for col in cols:
# if util.is_category(X[col].dtype):
# categories = X[col].cat.categories
# else:
mapping = X[y.name].groupby(X[col]).mean().to_dict()
mapping = pd.Series(mapping)
mapping_out.append({'col': col, 'mapping': mapping, 'data_type': X[col].dtype}, )
return X, mapping_out

View File

@@ -0,0 +1,73 @@
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import pylab
# from warnings import warn
# 2018.11.26 Created by Eamon.Zhang
def diagnostic_plots(df, variable):
# function to plot a histogram and a Q-Q plot
# side by side, for a certain variable
plt.figure(figsize=(15,6))
plt.subplot(1, 2, 1)
df[variable].hist()
plt.subplot(1, 2, 2)
stats.probplot(df[variable], dist="norm", plot=pylab)
plt.show()
def log_transform(data,cols=[]):
"""
Logarithmic transformation
"""
data_copy = data.copy(deep=True)
for i in cols:
data_copy[i+'_log'] = np.log(data_copy[i]+1)
print('Variable ' + i +' Q-Q plot')
diagnostic_plots(data_copy,str(i+'_log'))
return data_copy
def reciprocal_transform(data,cols=[]):
"""
Reciprocal transformation
"""
data_copy = data.copy(deep=True)
for i in cols:
data_copy[i+'_reciprocal'] = 1/(data_copy[i])
print('Variable ' + i +' Q-Q plot')
diagnostic_plots(data_copy,str(i+'_reciprocal'))
return data_copy
def square_root_transform(data,cols=[]):
"""
square root transformation
"""
data_copy = data.copy(deep=True)
for i in cols:
data_copy[i+'_square_root'] = (data_copy[i])**(0.5)
print('Variable ' + i +' Q-Q plot')
diagnostic_plots(data_copy,str(i+'_square_root'))
return data_copy
def exp_transform(data,coef,cols=[]):
"""
exp transformation
"""
data_copy = data.copy(deep=True)
for i in cols:
data_copy[i+'_exp'] = (data_copy[i])**coef
print('Variable ' + i +' Q-Q plot')
diagnostic_plots(data_copy,str(i+'_exp'))
return data_copy

View File

@@ -0,0 +1,76 @@
#import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#import seaborn as sns
#from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier #RandomForestRegressor
#from sklearn.feature_selection import SelectFromModel
# 2018.11.27 Created by Eamon.Zhang
def rf_importance(X_train,y_train,max_depth=10,class_weight=None,top_n=15,n_estimators=50,random_state=0):
model = RandomForestClassifier(n_estimators=n_estimators,max_depth=max_depth,
random_state=random_state,class_weight=class_weight,
n_jobs=-1)
model.fit(X_train, y_train)
importances = model.feature_importances_
indices = np.argsort(importances)[::-1]
feat_labels = X_train.columns
std = np.std([tree.feature_importances_ for tree in model.estimators_],
axis=0) # inter-trees variability.
print("Feature ranking:")
# l1,l2,l3,l4 = [],[],[],[]
for f in range(X_train.shape[1]):
print("%d. feature no:%d feature name:%s (%f)" % (f + 1, indices[f], feat_labels[indices[f]], importances[indices[f]]))
# l1.append(f+1)
# l2.append(indices[f])
# l3.append(feat_labels[indices[f]])
# l4.append(importances[indices[f]])
#feature_rank = pd.Dataframe(zip(l1,l2,l3,l4),columns=['id','indice','feature','importances'])
# plotting
indices = indices[0:top_n]
plt.figure()
plt.title("Feature importances top %d" % top_n)
plt.bar(range(top_n), importances[indices],
color="r", yerr=std[indices], align="center")
plt.xticks(range(top_n), indices)
plt.xlim([-1,top_n])
plt.show()
return model
def gbt_importance(X_train,y_train,max_depth=10,top_n=15,n_estimators=50,random_state=0):
model = GradientBoostingClassifier(n_estimators=n_estimators,max_depth=max_depth,
random_state=random_state)
model.fit(X_train, y_train)
importances = model.feature_importances_
indices = np.argsort(importances)[::-1]
feat_labels = X_train.columns
std = np.std([tree[0].feature_importances_ for tree in model.estimators_],
axis=0) # inter-trees variability.
print("Feature ranking:")
# l1,l2,l3,l4 = [],[],[],[]
for f in range(X_train.shape[1]):
print("%d. feature no:%d feature name:%s (%f)" % (f + 1, indices[f], feat_labels[indices[f]], importances[indices[f]]))
# l1.append(f+1)
# l2.append(indices[f])
# l3.append(feat_labels[indices[f]])
# l4.append(importances[indices[f]])
# feature_rank = pd.Dataframe(zip(l1,l2,l3,l4),columns=['id','indice','feature','importances'])
# plotting
indices = indices[0:top_n]
plt.figure()
plt.title("Feature importances top %d" % top_n)
plt.bar(range(top_n), importances[indices],
color="r", yerr=std[indices], align="center")
plt.xticks(range(top_n), indices)
plt.xlim([-1,top_n])
plt.show()
return model

View File

@@ -0,0 +1,43 @@
import pandas as pd
#import numpy as np
from sklearn.ensemble import RandomForestClassifier #, RandomForestRegressor
from sklearn.metrics import roc_auc_score #, mean_squared_error
# 2018.11.28 Created by Eamon.Zhang
def feature_shuffle_rf(X_train,y_train,max_depth=None,class_weight=None,top_n=15,n_estimators=50,random_state=0):
model = RandomForestClassifier(n_estimators=n_estimators,max_depth=max_depth,
random_state=random_state,class_weight=class_weight,
n_jobs=-1)
model.fit(X_train, y_train)
train_auc = roc_auc_score(y_train, (model.predict_proba(X_train))[:, 1])
feature_dict = {}
# selection logic
for feature in X_train.columns:
X_train_c = X_train.copy().reset_index(drop=True)
y_train_c = y_train.copy().reset_index(drop=True)
# shuffle individual feature
X_train_c[feature] = X_train_c[feature].sample(frac=1,random_state=random_state).reset_index(
drop=True)
#print(X_train_c.isnull().sum())
# make prediction with shuffled feature and calculate roc-auc
shuff_auc = roc_auc_score(y_train_c,
(model.predict_proba(X_train_c))[:, 1])
#print(shuff_auc)
# save the drop in roc-auc
feature_dict[feature] = (train_auc - shuff_auc)
#print(feature_dict)
auc_drop = pd.Series(feature_dict).reset_index()
auc_drop.columns = ['feature', 'auc_drop']
auc_drop.sort_values(by=['auc_drop'], ascending=False, inplace=True)
selected_features = auc_drop[auc_drop.auc_drop>0]['feature']
return auc_drop, selected_features

View File

@@ -0,0 +1,156 @@
import pandas as pd
import numpy as np
#from sklearn.feature_selection import VarianceThreshold
from sklearn.feature_selection import mutual_info_classif,chi2
from sklearn.feature_selection import SelectKBest, SelectPercentile
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.metrics import roc_auc_score, mean_squared_error
# 2018.11.17 Created by Eamon.Zhang
def constant_feature_detect(data,threshold=0.98):
""" detect features that show the same value for the
majority/all of the observations (constant/quasi-constant features)
Parameters
----------
data : pd.Dataframe
threshold : threshold to identify the variable as constant
Returns
-------
list of variables names
"""
data_copy = data.copy(deep=True)
quasi_constant_feature = []
for feature in data_copy.columns:
predominant = (data_copy[feature].value_counts() / np.float(
len(data_copy))).sort_values(ascending=False).values[0]
if predominant >= threshold:
quasi_constant_feature.append(feature)
print(len(quasi_constant_feature),' variables are found to be almost constant')
return quasi_constant_feature
def corr_feature_detect(data,threshold=0.8):
""" detect highly-correlated features of a Dataframe
Parameters
----------
data : pd.Dataframe
threshold : threshold to identify the variable correlated
Returns
-------
pairs of correlated variables
"""
corrmat = data.corr()
corrmat = corrmat.abs().unstack() # absolute value of corr coef
corrmat = corrmat.sort_values(ascending=False)
corrmat = corrmat[corrmat >= threshold]
corrmat = corrmat[corrmat < 1] # remove the digonal
corrmat = pd.DataFrame(corrmat).reset_index()
corrmat.columns = ['feature1', 'feature2', 'corr']
grouped_feature_ls = []
correlated_groups = []
for feature in corrmat.feature1.unique():
if feature not in grouped_feature_ls:
# find all features correlated to a single feature
correlated_block = corrmat[corrmat.feature1 == feature]
grouped_feature_ls = grouped_feature_ls + list(
correlated_block.feature2.unique()) + [feature]
# append the block of features to the list
correlated_groups.append(correlated_block)
return correlated_groups
def mutual_info(X,y,select_k=10):
# mi = mutual_info_classif(X,y)
# mi = pd.Series(mi)
# mi.index = X.columns
# mi.sort_values(ascending=False)
if select_k >= 1:
sel_ = SelectKBest(mutual_info_classif, k=select_k).fit(X,y)
col = X.columns[sel_.get_support()]
elif 0 < select_k < 1:
sel_ = SelectPercentile(mutual_info_classif, percentile=select_k*100).fit(X,y)
col = X.columns[sel_.get_support()]
else:
raise ValueError("select_k must be a positive number")
return col
# 2018.11.27 edit Chi-square test
def chi_square_test(X,y,select_k=10):
"""
Compute chi-squared stats between each non-negative feature and class.
This score should be used to evaluate categorical variables in a classification task
"""
if select_k >= 1:
sel_ = SelectKBest(chi2, k=select_k).fit(X,y)
col = X.columns[sel_.get_support()]
elif 0 < select_k < 1:
sel_ = SelectPercentile(chi2, percentile=select_k*100).fit(X,y)
col = X.columns[sel_.get_support()]
else:
raise ValueError("select_k must be a positive number")
return col
def univariate_roc_auc(X_train,y_train,X_test,y_test,threshold):
"""
First, it builds one decision tree per feature, to predict the target
Second, it makes predictions using the decision tree and the mentioned feature
Third, it ranks the features according to the machine learning metric (roc-auc or mse)
It selects the highest ranked features
"""
roc_values = []
for feature in X_train.columns:
clf = DecisionTreeClassifier()
clf.fit(X_train[feature].to_frame(), y_train)
y_scored = clf.predict_proba(X_test[feature].to_frame())
roc_values.append(roc_auc_score(y_test, y_scored[:, 1]))
roc_values = pd.Series(roc_values)
roc_values.index = X_train.columns
print(roc_values.sort_values(ascending=False))
print(len(roc_values[roc_values > threshold]),'out of the %s featues are kept'% len(X_train.columns))
keep_col = roc_values[roc_values > threshold]
return keep_col
def univariate_mse(X_train,y_train,X_test,y_test,threshold):
"""
First, it builds one decision tree per feature, to predict the target
Second, it makes predictions using the decision tree and the mentioned feature
Third, it ranks the features according to the machine learning metric (roc-auc or mse)
It selects the highest ranked features
"""
mse_values = []
for feature in X_train.columns:
clf = DecisionTreeRegressor()
clf.fit(X_train[feature].to_frame(), y_train)
y_scored = clf.predict(X_test[feature].to_frame())
mse_values.append(mean_squared_error(y_test, y_scored))
mse_values = pd.Series(mse_values)
mse_values.index = X_train.columns
print(mse_values.sort_values(ascending=False))
print(len(mse_values[mse_values > threshold]),'out of the %s featues are kept'% len(X_train.columns))
keep_col = mse_values[mse_values > threshold]
return keep_col

128
feature_selection/hybrid.py Normal file
View File

@@ -0,0 +1,128 @@
#import pandas as pd
#import numpy as np
from sklearn.ensemble import RandomForestClassifier #, RandomForestRegressor
from sklearn.metrics import roc_auc_score #, mean_squared_error
# 2018.12.02 Created by Eamon.Zhang
def recursive_feature_elimination_rf(X_train,y_train,X_test,y_test,
tol=0.001,max_depth=None,
class_weight=None,
top_n=15,n_estimators=50,random_state=0):
features_to_remove = []
count = 1
# initial model using all the features
model_all_features = RandomForestClassifier(n_estimators=n_estimators,max_depth=max_depth,
random_state=random_state,class_weight=class_weight,
n_jobs=-1)
model_all_features.fit(X_train, y_train)
y_pred_test = model_all_features.predict_proba(X_test)[:, 1]
auc_score_all = roc_auc_score(y_test, y_pred_test)
for feature in X_train.columns:
print()
print('testing feature: ', feature, ' which is feature ', count,
' out of ', len(X_train.columns))
count += 1
model = RandomForestClassifier(n_estimators=n_estimators,max_depth=max_depth,
random_state=random_state,class_weight=class_weight,
n_jobs=-1)
# fit model with all variables minus the removed features
# and the feature to be evaluated
model.fit(X_train.drop(features_to_remove + [feature], axis=1), y_train)
y_pred_test = model.predict_proba(
X_test.drop(features_to_remove + [feature], axis=1))[:, 1]
auc_score_int = roc_auc_score(y_test, y_pred_test)
print('New Test ROC AUC={}'.format((auc_score_int)))
# print the original roc-auc with all the features
print('All features Test ROC AUC={}'.format((auc_score_all)))
# determine the drop in the roc-auc
diff_auc = auc_score_all - auc_score_int
# compare the drop in roc-auc with the tolerance
if diff_auc >= tol:
print('Drop in ROC AUC={}'.format(diff_auc))
print('keep: ', feature)
else:
print('Drop in ROC AUC={}'.format(diff_auc))
print('remove: ', feature)
# if the drop in the roc is small and we remove the
# feature, we need to set the new roc to the one based on
# the remaining features
auc_score_all = auc_score_int
# and append the feature to remove to the list
features_to_remove.append(feature)
print('DONE!!')
print('total features to remove: ', len(features_to_remove))
features_to_keep = [x for x in X_train.columns if x not in features_to_remove]
print('total features to keep: ', len(features_to_keep))
return features_to_keep
def recursive_feature_addition_rf(X_train,y_train,X_test,y_test,
tol=0.001,max_depth=None,
class_weight=None,
top_n=15,n_estimators=50,random_state=0):
features_to_keep = [X_train.columns[0]]
count = 1
# initial model using only one feature
model_one_feature = RandomForestClassifier(n_estimators=n_estimators,max_depth=max_depth,
random_state=random_state,class_weight=class_weight,
n_jobs=-1)
model_one_feature.fit(X_train[[X_train.columns[0]]], y_train)
y_pred_test = model_one_feature.predict_proba(X_test[[X_train.columns[0]]])[:, 1]
auc_score_all = roc_auc_score(y_test, y_pred_test)
for feature in X_train.columns[1:]:
print()
print('testing feature: ', feature, ' which is feature ', count,
' out of ', len(X_train.columns))
count += 1
model = RandomForestClassifier(n_estimators=n_estimators,max_depth=max_depth,
random_state=random_state,class_weight=class_weight,
n_jobs=-1)
# fit model with the selected features
# and the feature to be evaluated
model.fit(X_train[features_to_keep + [feature]], y_train)
y_pred_test = model.predict_proba(
X_test[features_to_keep + [feature]])[:, 1]
auc_score_int = roc_auc_score(y_test, y_pred_test)
print('New Test ROC AUC={}'.format((auc_score_int)))
# print the original roc-auc with all the features
print('All features Test ROC AUC={}'.format((auc_score_all)))
# determine the drop in the roc-auc
diff_auc = auc_score_int - auc_score_all
# compare the drop in roc-auc with the tolerance
if diff_auc >= tol:
# if the increase in the roc is bigger than the threshold
# we keep the feature and re-adjust the roc-auc to the new value
# considering the added feature
print('Increase in ROC AUC={}'.format(diff_auc))
print('keep: ', feature)
auc_score_all = auc_score_int
features_to_keep.append(feature)
else:
print('Increase in ROC AUC={}'.format(diff_auc))
print('remove: ', feature)
print('DONE!!')
print('total features to keep: ', len(features_to_keep))
return features_to_keep

BIN
images/001.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

BIN
images/IV.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

BIN
images/box-cox.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.1 KiB

BIN
images/embedded.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

BIN
images/featuretools.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.8 KiB

BIN
images/filter.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

BIN
images/scaling.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 143 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

BIN
images/workflow2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

BIN
images/wrapper.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

BIN
output/Corr_plot.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

BIN
output/Countplot_Pclass.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.5 KiB

BIN
output/Distplot_Fare.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

BIN
output/Heatmap.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 76 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

12
output/describe.csv Normal file
View File

@@ -0,0 +1,12 @@
,Survived,Pclass,Sex,Age,SibSp,Fare
count,891.0,891.0,891,714.0,891.0,891.0
unique,,,2,,,
top,,,male,,,
freq,,,577,,,
mean,0.3838383838383838,2.308641975308642,,29.69911764705882,0.5230078563411896,32.2042079685746
std,0.4865924542648585,0.8360712409770513,,14.526497332334044,1.1027434322934275,49.693428597180905
min,0.0,1.0,,0.42,0.0,0.0
25%,0.0,2.0,,20.125,0.0,7.9104
50%,0.0,3.0,,28.0,0.0,14.4542
75%,1.0,3.0,,38.0,1.0,31.0
max,1.0,3.0,,80.0,8.0,512.3292
1 Survived Pclass Sex Age SibSp Fare
2 count 891.0 891.0 891 714.0 891.0 891.0
3 unique 2
4 top male
5 freq 577
6 mean 0.3838383838383838 2.308641975308642 29.69911764705882 0.5230078563411896 32.2042079685746
7 std 0.4865924542648585 0.8360712409770513 14.526497332334044 1.1027434322934275 49.693428597180905
8 min 0.0 1.0 0.42 0.0 0.0
9 25% 0.0 2.0 20.125 0.0 7.9104
10 50% 0.0 3.0 28.0 0.0 14.4542
11 75% 1.0 3.0 38.0 1.0 31.0
12 max 1.0 3.0 80.0 8.0 512.3292

7
output/missing.csv Normal file
View File

@@ -0,0 +1,7 @@
,total missing,proportion
Survived,0,0.0
Pclass,0,0.0
Sex,0,0.0
Age,177,0.19865319865319866
SibSp,0,0.0
Fare,0,0.0
1 total missing proportion
2 Survived 0 0.0
3 Pclass 0 0.0
4 Sex 0 0.0
5 Age 177 0.19865319865319866
6 SibSp 0 0.0
7 Fare 0 0.0