Advertisement
Guest User

Untitled

a guest
Dec 13th, 2017
81
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.56 KB | None | 0 0
  1. import numpy as np
  2. import sys
  3. from sklearn.svm import SVR
  4. from sklearn.linear_model import Ridge, Lasso
  5. import matplotlib.pyplot as plt
  6. from random import sample
  7. from sklearn.svm import SVR
  8. from svr import JN_SVR
  9. from lr import JN_Lasso
  10. from ridge import JN_Ridge
  11. from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
  12.  
  13. target_index = 59
  14. predictor_indices = [ *range(44, 59) ]
  15.  
  16. #reading the csv file
  17. fileName = 'OnlineNewsPopularity.csv';
  18. news_articles = np.loadtxt(fileName, dtype = float, delimiter = ',', skiprows = 1, usecols=range(1,61) )
  19.  
  20. #getting feature names
  21. all_features = []
  22. with open(fileName, 'r') as f:
  23. all_features = f.readline().split(',')
  24.  
  25. predictors = [all_features[i] for i in predictor_indices]
  26.  
  27. # data
  28. data = news_articles[:,predictor_indices]
  29. target = news_articles[:, target_index]
  30.  
  31. # extract outliers
  32. # bad_indices = np.where(target > 400000)
  33. # data = np.delete(data, bad_indices, 0)
  34. # target = np.delete(target, bad_indices, 0)
  35.  
  36. #splitting
  37. # extract test data
  38. num_to_remove = int(0.2*len(data))
  39. te_indices = sample(range(len(data)), num_to_remove)
  40. te_data = data[te_indices]
  41. te_target = target[te_indices]
  42.  
  43. data = np.delete(data, te_indices, 0)
  44. target = np.delete(target, te_indices, 0)
  45.  
  46. #SVR
  47. #USING OUR OWN IMPLEMENTATION
  48. # SVR_MODEL = JN_SVR(2 , 0.001)
  49. # SVR_MODEL.fit(tr_data , tr_target)
  50. # y_p = SVR_MODEL.predict(te_data)
  51.  
  52. # #USING SCIKIT LEARN
  53. # svr_poly = SVR(kernel='poly', C=1e3, degree=2)
  54. # y_poly = svr_poly.fit(tr_data,tr_target).predict(te_data)
  55. # #MEASURING ERROR
  56. # print("SUPPORT VECTOR REGRESSION: ON TESTING DATA\n++++++++++++++++++++++++++++++++++++++++\n")
  57. # print("MEAN SQUARED ERROR")
  58. # print("Using Scikit Learn : " , mean_squared_error(te_target , y_poly))
  59. # print("Using OUR IMPLEMENTATION : " , mean_squared_error( te_target, y_p))
  60.  
  61. # print("MEAN ABSOLUTE ERROR")
  62. # print("Using Scikit Learn : " , mean_absolute_error(te_target , y_poly))
  63. # print("Using OUR IMPLEMENTATION : " , mean_absolute_error(te_target, y_p))
  64.  
  65.  
  66. #Ridge
  67. #using our implementation
  68. alphas = np.arange(0, 1.0, 0.05)
  69. ridge_mse_list = np.zeros(len(alphas))
  70. num_iter = 100
  71. # alphas = [0.0001, 0.001, 0.01, 0.1, 0.5]
  72. min_alpha = sys.maxsize
  73. min_mse = sys.maxsize
  74.  
  75. for i in range(num_iter):
  76. print(i)
  77. # get a random 20% for validation
  78. v_indices = sample(range(len(data)), num_to_remove)
  79. v_data = data[v_indices]
  80. v_target = target[v_indices]
  81. tr_data = np.delete(data, v_indices, 0)
  82. tr_target = np.delete(target, v_indices, 0)
  83.  
  84. # try all alphas
  85. for index, curr_alpha in enumerate(alphas):
  86. curr_ridge = JN_Ridge(alpha=curr_alpha)
  87. curr_ridge.fit(tr_data, tr_target)
  88. curr_ridge_predicted = curr_ridge.predict(v_data)
  89. curr_mse = mean_squared_error(v_target, curr_ridge_predicted)
  90. ridge_mse_list[index] += curr_mse
  91. # ridge_mse_list.append(curr_mse)
  92. # if curr_mse < min_mse:
  93. # min_mse = curr_mse
  94. # min_alpha = curr_alpha
  95.  
  96. ridge_mse_list /= num_iter
  97.  
  98. min_alpha = alphas[np.argmin(ridge_mse_list)]
  99.  
  100. plt.plot(alphas, ridge_mse_list, label="MSE")
  101. plt.xlabel("Alpha")
  102. plt.ylabel("MSE")
  103. plt.legend()
  104. plt.title("Ridge Regression MSE vs Training Alpha")
  105. plt.show()
  106.  
  107. # use alpha with lowest MSE
  108. print("Min alpha = {}".format(min_alpha))
  109. our_ridge = JN_Ridge(alpha=min_alpha)
  110. our_ridge.fit(data, target)
  111. our_ridge_predicted = our_ridge.predict(te_data)
  112.  
  113. #using scikit learn
  114. ridge_clf = Ridge(alpha=min_alpha).fit(data, target)
  115. sklearn_ridge_prediction = ridge_clf.predict(te_data)
  116.  
  117. print("RIDGE REGRESSION: ON TESTING DATA\n++++++++++++++++++++++++++++++++++++++++\n")
  118. print("MEAN SQUARED ERROR (MSE)")
  119. print("Using Scikit Learn : " , mean_squared_error(te_target , sklearn_ridge_prediction))
  120. print("Using OUR IMPLEMENTATION : " , mean_squared_error( te_target, our_ridge_predicted))
  121.  
  122. print("MEAN ABSOLUTE ERROR (MAE)")
  123. print("Using Scikit Learn : " , mean_absolute_error(te_target , sklearn_ridge_prediction))
  124. print("Using OUR IMPLEMENTATION : " , mean_absolute_error(te_target, our_ridge_predicted))
  125.  
  126. print("R-SQUARED SCORE")
  127. print("Using Scikit Learn : " , r2_score(te_target , sklearn_ridge_prediction))
  128. print("Using OUR IMPLEMENTATION : " , r2_score(te_target, our_ridge_predicted))
  129.  
  130.  
  131.  
  132.  
  133. #LASSO REGRESSION
  134. #our implementation
  135. # our_lasso = JN_Lasso(alpha=0.01)
  136. # our_lasso.fit(tr_data, tr_target)
  137. # our_lasso_predicted = our_lasso.predict(te_data)
  138.  
  139. # #sklearn implementation
  140. # sklearn_lasso= Lasso(alpha=0.01, copy_X=True, normalize=True, max_iter=1000).fit(tr_data, tr_target)
  141. # sklearn_lasso_predicted = sklearn_lasso.predict(te_data)
  142.  
  143.  
  144. # print("LASSO REGRESSION: ON TESTING DATA\n++++++++++++++++++++++++++++++++++++++++\n")
  145. # print("MEAN SQUARED ERROR (MSE)")
  146. # print("Using Scikit Learn : " , mean_squared_error(te_target , sklearn_lasso_predicted))
  147. # print("Using OUR IMPLEMENTATION : " , mean_squared_error( te_target, our_lasso_predicted))
  148.  
  149. # print("MEAN ABSOLUTE ERROR (MAE)")
  150. # print("Using Scikit Learn : " , mean_absolute_error(te_target , sklearn_lasso_predicted))
  151. # print("Using OUR IMPLEMENTATION : " , mean_absolute_error(te_target, our_lasso_predicted))
  152.  
  153.  
  154.  
  155.  
  156.  
  157. def makePlot(feature_number , actual , our_p , scikit_p, algorithm):
  158. x = te_data[:,feature_number]
  159. plt.scatter( x , actual , color ='blue' ,label ='Actual Number of shares')
  160. plt.plot(x , our_p , color='red' , label ='OUR ' + algorithm)
  161. plt.plot(x , scikit_p , color='red' , label ='SKLEARN' + algorithm)
  162. plt.xlabel(predictors[feature_number])
  163. plt.ylabel("Number of Shares")
  164. plt.legend()
  165. plt.title( "Number of Shares vs " + str(predictors[feature_number]))
  166. plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement