1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
| """ Spyder Editor Author: Yihangbao """ from __future__ import print_function import pandas as pd import numpy as np from keras.models import Sequential from keras.optimizers import SGD,RMSprop,Adam from keras.layers import Dense,Activation,Dropout
raw_train = pd.read_csv('train.csv', index_col=0) raw_train['is_test'] = 0 raw_test = pd.read_csv('test.csv', index_col=0) raw_test['is_test'] = 1 all_data = pd.concat((raw_train,raw_test), axis=0) def get_titles(df): df['Title'] = df['Name'].map(lambda name:name.split(',')[1].split('.')[0].strip()) Title_Dictionary = { "Capt": "Officer", "Col": "Officer", "Major": "Officer", "Jonkheer": "Royalty", "Don": "Royalty", "Sir" : "Royalty", "Dr": "Officer", "Rev": "Officer", "the Countess":"Royalty", "Dona": "Royalty", "Mme": "Mrs", "Mlle": "Miss", "Ms": "Mrs", "Mr" : "Mr", "Mrs" : "Mrs", "Miss" : "Miss", "Master" : "Master", "Lady" : "Royalty"
} df['Title'] = df.Title.map(Title_Dictionary) df = df.drop(['Name'], axis=1) return df def get_dummy_cats(df): return(pd.get_dummies(df,columns=['Title','Pclass','Sex','Embarked','Cabin','Cabin_letter']))
def get_cabin_letter(df): df['Cabin'].fillna('Z', inplace=True) df['Cabin_letter'] = df['Cabin'].str[0] return(df)
def process_data(df): df = get_titles(df) df['Embarked'].fillna('S', inplace=True) df = get_cabin_letter(df) df = df.drop(['Ticket','Fare'], axis=1) df = get_dummy_cats(df) return df
proc_data = process_data(all_data) proc_train = proc_data[proc_data['is_test'] == 0] proc_test = proc_data[proc_data['is_test'] == 1]
for_age_train = proc_data.drop(['Survived','is_test'], axis=1).dropna(axis=0) X_train_age = for_age_train.drop('Age',axis=1) Y_train_age = for_age_train['Age'] tmodel = Sequential() tmodel.add(Dense(input_dim=X_train_age.shape[1], units=128, kernel_initializer='normal', bias_initializer='zeros')) tmodel.add(Activation('relu')) for i in range(0,8): tmodel.add(Dense(units=64, kernel_initializer='normal',bias_initializer='zeros')) tmodel.add(Dropout(.25)) tmodel.add(Dense(units=1)) tmodel.add(Activation('linear')) tmodel.compile(loss='mean_squared_error', optimizer='rmsprop') tmodel.fit(X_train_age.values, Y_train_age.values,epochs=600,verbose=2)
train_data = proc_train train_data.loc[train_data['Age'].isnull()] to_pred = train_data.loc[train_data['Age'].isnull()].drop(['Age','Survived','is_test'], axis=1) p = tmodel.predict(to_pred.values) train_data.loc[train_data['Age'].isnull(),'Age'] = p
test_data = proc_test to_pred = test_data.loc[test_data['Age'].isnull()].drop(['Age','Survived','is_test'], axis=1) p = tmodel.predict(to_pred.values) test_data.loc[test_data['Age'].isnull(),'Age'] = p y = pd.get_dummies(train_data['Survived'])
x= train_data.drop(['Survived','is_test'],axis=1) model = Sequential() model.add(Dense(input_dim=x.shape[1], units=128, kernel_initializer='normal', bias_initializer='zeros')) model.add(Activation('relu')) for i in range(15): model.add(Dense(units=128, kernel_initializer='normal' ,bias_initializer='zeros')) model.add(Activation('relu')) model.add(Dropout(.40)) model.add(Dense(units=2)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy']) model.fit(x.values,y.values,epochs=500, verbose=2) p_survived = model.predict_classes(test_data.drop(['Survived','is_test'],axis=1).values) submission = pd.DataFrame() submission['PassengerID'] = test_data.index submission['Survived'] = p_survived submission.to_csv('ans1.csv',index=False)
|