Advertisement
Guest User

Untitled

a guest
Sep 19th, 2019
126
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.31 KB | None | 0 0
  1. '''
  2. FIRST SUBMISSION
  3. ACC 0.88864
  4. '''
  5. import sys
  6. import numpy as np
  7. import pandas as pd
  8. import keras
  9. from keras.layers import Dense, Input,Dropout
  10. from keras.models import Sequential
  11. from keras.utils import to_categorical
  12. from keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping
  13. import seaborn as sns
  14. import matplotlib.pyplot as plt
  15. import math
  16.  
  17.  
  18. def loadCVS(pd_dataName):
  19. pd_dataName = str(pd_dataName)
  20.  
  21. pd_data = pd.read_csv(pd_dataName)
  22.  
  23. # fill nan values
  24. pd_data.replace(np.nan, 0)
  25.  
  26. # fill null values
  27. print(np.sum(pd_data.isnull()))
  28. pd_data = pd_data.fillna(0)
  29.  
  30. # clip from 0
  31. pd_data['Homepage _Duration'] = pd_data['Homepage _Duration'].clip(lower=0.01)
  32. pd_data['Homepage _Duration']= np.log(pd_data['Homepage _Duration'])
  33. pd_data['Aboutus_Duration'] = pd_data['Aboutus_Duration'].clip(lower=0.01)
  34. pd_data['Aboutus_Duration'] = np.log(pd_data['Aboutus_Duration'])
  35. pd_data['Contactus_Duration'] = pd_data['Contactus_Duration'].clip(lower=0.01)
  36. pd_data['Contactus_Duration'] = np.log(pd_data['Contactus_Duration'])
  37.  
  38.  
  39. # remap strings
  40. month_mapping = {'Jan': 0, 'Feb': 1, 'Mar': 2, 'Apr': 3, 'May': 4, 'Aug': 7, 'Dec': 11, 'Jul': 6, 'June': 5,
  41. 'Nov': 10, 'Oct': 9, 'Sep': 8}
  42. pd_data['Month'] = pd_data['Month'].apply(lambda s: month_mapping.get(s) if s in month_mapping else s).astype('int')
  43.  
  44. # pd_data['Month']=pd_data['Month'].astype('category')
  45.  
  46. vt_mapping = {'Returning_Visitor': 0, 'New_Visitor': 1, 'Other': 2}
  47. pd_data['VisitorType'] = pd_data['VisitorType'].apply(lambda s: vt_mapping.get(s) if s in vt_mapping else s).astype(
  48. 'int')
  49.  
  50. # remap bool
  51. pd_data['Weekend'] = pd_data['Weekend'].astype('int')
  52.  
  53. pd_data = pd_data.astype('float64')
  54.  
  55. # print(pd_data.describe())
  56. print(pd_data.info())
  57.  
  58. if 'ID' in pd_data.columns: # test csv
  59. Y = None
  60. pd_data = pd_data.drop('ID', axis=1)
  61. else: # train csv
  62. Y = pd_data['Revenue'].values.reshape((-1, 1))
  63. Y = to_categorical(Y)
  64. # pairvise plot
  65. # sns.pairplot(pd_data,hue='Revenue', vars=['Homepage','Aboutus','Month','TrafficType'])
  66. # plt.show()
  67.  
  68. pd_data = pd_data.drop('Revenue', axis=1)
  69.  
  70.  
  71. X = pd_data.values
  72. print("X shape before deleting",X.shape)
  73. xTempMonth=X[:,10]
  74. np.delete(X,10,axis=0)
  75. X= (X-np.mean(X))/np.std(X)
  76. print("X shape after deleting",X.shape)
  77.  
  78.  
  79.  
  80. xTempMonth=to_categorical(xTempMonth)
  81. print(X.shape,xTempMonth.shape)
  82. X=np.concatenate((X,xTempMonth),axis=1)
  83. print("X shape after concatenating",X.shape)
  84.  
  85. # normalize features
  86. # pd_data = (pd_data - pd_data.mean()) / pd_data.std()
  87.  
  88. # get np array from dataframe
  89.  
  90. return X, Y
  91.  
  92.  
  93. def nn1():
  94. mod = Sequential()
  95. mod.add(Dense(40,activation='sigmoid'))
  96. # mod.add(Dropout(0.5))
  97. mod.add(Dense(20,activation='sigmoid'))
  98. # mod.add(Dropout(0.5))
  99. mod.add(Dense(2,activation='softmax'))
  100.  
  101. return mod
  102.  
  103.  
  104. if __name__ == '__main__':
  105. print("python approach3.py trainSet.csv xTest.csv outpd_data.csv")
  106. print("python approach3.py [1] [2] [3]")
  107. xTrain, yTrain = loadCVS(sys.argv[1])
  108. xTest, _ = loadCVS(sys.argv[2])
  109.  
  110. print(xTrain.shape, yTrain.shape)
  111.  
  112. N=yTrain.shape[0]
  113.  
  114. for n in range(N-1,-1,-1):
  115. # print(n,yTrain[n],yTrain.shape)
  116. if yTrain[n,0]==1:
  117. xTrain=np.append(xTrain, np.reshape(xTrain[n],(1,xTrain[n].shape[0])) ,axis=0)
  118. yTrain=np.append(yTrain, np.reshape(yTrain[n],(1,yTrain[n].shape[0])) ,axis=0)
  119.  
  120. a=np.arange(xTrain.shape[0])
  121. np.random.shuffle(a)
  122. print(a.shape)
  123. xTrain=xTrain[a]
  124. yTrain=yTrain[a]
  125.  
  126. print(xTrain.shape, yTrain.shape)
  127. input("Okay?")
  128.  
  129. m = nn1()
  130. m.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])
  131.  
  132. filepath = "weights.hdf5"
  133. checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode=',min')
  134. # tensorboard = TensorBoard(log_dir='./logs', write_graph=True, write_grads=False, write_images=False, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None, embeddings_data=None, update_freq='epoch')
  135. earlystopping = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=100, verbose=0, mode='auto',
  136. baseline=None, restore_best_weights=True)
  137.  
  138. callbacks_list = [checkpoint, earlystopping]
  139. hist = m.fit(xTrain, yTrain, validation_split=0.1, epochs=1000, callbacks=callbacks_list,shuffle=True)
  140.  
  141. plt.subplot(1, 2, 1)
  142. plt.plot(hist.history['acc'])
  143. plt.xlabel('Epoch')
  144. plt.ylabel("Accuracy")
  145.  
  146. plt.subplot(1, 2, 2)
  147. plt.plot(hist.history['loss'])
  148. plt.xlabel('Epoch')
  149. plt.ylabel("Loss")
  150.  
  151. plt.show()
  152.  
  153. print(m.summary())
  154. # save model to JSON
  155. model_json = m.to_json()
  156. with open("model.json", "w") as json_file:
  157. json_file.write(model_json)
  158. print("Model saved to json.")
  159.  
  160. # predicting
  161. yTest = m.predict(xTest)
  162. yTest = np.argmax(yTest, axis=1)
  163.  
  164. pd_test = pd.DataFrame(columns=['ID', 'Revenue'])
  165. pd_test['ID'] = pd.Series(np.arange(1, len(yTest) + 1))
  166. pd_test['Revenue'] = pd.Series(yTest)
  167. pd_test.to_csv(sys.argv[3], index=False)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement