Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import sys
- from sklearn.svm import SVR
- from sklearn.linear_model import Ridge, Lasso
- import matplotlib.pyplot as plt
- from random import sample
- from sklearn.svm import SVR
- from svr import JN_SVR
- from lr import JN_Lasso
- from ridge import JN_Ridge
- from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
- target_index = 59
- predictor_indices = [ *range(44, 59) ]
- #reading the csv file
- fileName = 'OnlineNewsPopularity.csv';
- news_articles = np.loadtxt(fileName, dtype = float, delimiter = ',', skiprows = 1, usecols=range(1,61) )
- #getting feature names
- all_features = []
- with open(fileName, 'r') as f:
- all_features = f.readline().split(',')
- predictors = [all_features[i] for i in predictor_indices]
- # data
- data = news_articles[:,predictor_indices]
- target = news_articles[:, target_index]
- # extract outliers
- # bad_indices = np.where(target > 400000)
- # data = np.delete(data, bad_indices, 0)
- # target = np.delete(target, bad_indices, 0)
- #splitting
- # extract test data
- num_to_remove = int(0.2*len(data))
- te_indices = sample(range(len(data)), num_to_remove)
- te_data = data[te_indices]
- te_target = target[te_indices]
- data = np.delete(data, te_indices, 0)
- target = np.delete(target, te_indices, 0)
- #SVR
- #USING OUR OWN IMPLEMENTATION
- # SVR_MODEL = JN_SVR(2 , 0.001)
- # SVR_MODEL.fit(tr_data , tr_target)
- # y_p = SVR_MODEL.predict(te_data)
- # #USING SCIKIT LEARN
- # svr_poly = SVR(kernel='poly', C=1e3, degree=2)
- # y_poly = svr_poly.fit(tr_data,tr_target).predict(te_data)
- # #MEASURING ERROR
- # print("SUPPORT VECTOR REGRESSION: ON TESTING DATA\n++++++++++++++++++++++++++++++++++++++++\n")
- # print("MEAN SQUARED ERROR")
- # print("Using Scikit Learn : " , mean_squared_error(te_target , y_poly))
- # print("Using OUR IMPLEMENTATION : " , mean_squared_error( te_target, y_p))
- # print("MEAN ABSOLUTE ERROR")
- # print("Using Scikit Learn : " , mean_absolute_error(te_target , y_poly))
- # print("Using OUR IMPLEMENTATION : " , mean_absolute_error(te_target, y_p))
- #Ridge
- #using our implementation
- alphas = np.arange(0, 1.0, 0.05)
- ridge_mse_list = np.zeros(len(alphas))
- num_iter = 100
- # alphas = [0.0001, 0.001, 0.01, 0.1, 0.5]
- min_alpha = sys.maxsize
- min_mse = sys.maxsize
- for i in range(num_iter):
- print(i)
- # get a random 20% for validation
- v_indices = sample(range(len(data)), num_to_remove)
- v_data = data[v_indices]
- v_target = target[v_indices]
- tr_data = np.delete(data, v_indices, 0)
- tr_target = np.delete(target, v_indices, 0)
- # try all alphas
- for index, curr_alpha in enumerate(alphas):
- curr_ridge = JN_Ridge(alpha=curr_alpha)
- curr_ridge.fit(tr_data, tr_target)
- curr_ridge_predicted = curr_ridge.predict(v_data)
- curr_mse = mean_squared_error(v_target, curr_ridge_predicted)
- ridge_mse_list[index] += curr_mse
- # ridge_mse_list.append(curr_mse)
- # if curr_mse < min_mse:
- # min_mse = curr_mse
- # min_alpha = curr_alpha
- ridge_mse_list /= num_iter
- min_alpha = alphas[np.argmin(ridge_mse_list)]
- plt.plot(alphas, ridge_mse_list, label="MSE")
- plt.xlabel("Alpha")
- plt.ylabel("MSE")
- plt.legend()
- plt.title("Ridge Regression MSE vs Training Alpha")
- plt.show()
- # use alpha with lowest MSE
- print("Min alpha = {}".format(min_alpha))
- our_ridge = JN_Ridge(alpha=min_alpha)
- our_ridge.fit(data, target)
- our_ridge_predicted = our_ridge.predict(te_data)
- #using scikit learn
- ridge_clf = Ridge(alpha=min_alpha).fit(data, target)
- sklearn_ridge_prediction = ridge_clf.predict(te_data)
- print("RIDGE REGRESSION: ON TESTING DATA\n++++++++++++++++++++++++++++++++++++++++\n")
- print("MEAN SQUARED ERROR (MSE)")
- print("Using Scikit Learn : " , mean_squared_error(te_target , sklearn_ridge_prediction))
- print("Using OUR IMPLEMENTATION : " , mean_squared_error( te_target, our_ridge_predicted))
- print("MEAN ABSOLUTE ERROR (MAE)")
- print("Using Scikit Learn : " , mean_absolute_error(te_target , sklearn_ridge_prediction))
- print("Using OUR IMPLEMENTATION : " , mean_absolute_error(te_target, our_ridge_predicted))
- print("R-SQUARED SCORE")
- print("Using Scikit Learn : " , r2_score(te_target , sklearn_ridge_prediction))
- print("Using OUR IMPLEMENTATION : " , r2_score(te_target, our_ridge_predicted))
- #LASSO REGRESSION
- #our implementation
- # our_lasso = JN_Lasso(alpha=0.01)
- # our_lasso.fit(tr_data, tr_target)
- # our_lasso_predicted = our_lasso.predict(te_data)
- # #sklearn implementation
- # sklearn_lasso= Lasso(alpha=0.01, copy_X=True, normalize=True, max_iter=1000).fit(tr_data, tr_target)
- # sklearn_lasso_predicted = sklearn_lasso.predict(te_data)
- # print("LASSO REGRESSION: ON TESTING DATA\n++++++++++++++++++++++++++++++++++++++++\n")
- # print("MEAN SQUARED ERROR (MSE)")
- # print("Using Scikit Learn : " , mean_squared_error(te_target , sklearn_lasso_predicted))
- # print("Using OUR IMPLEMENTATION : " , mean_squared_error( te_target, our_lasso_predicted))
- # print("MEAN ABSOLUTE ERROR (MAE)")
- # print("Using Scikit Learn : " , mean_absolute_error(te_target , sklearn_lasso_predicted))
- # print("Using OUR IMPLEMENTATION : " , mean_absolute_error(te_target, our_lasso_predicted))
- def makePlot(feature_number , actual , our_p , scikit_p, algorithm):
- x = te_data[:,feature_number]
- plt.scatter( x , actual , color ='blue' ,label ='Actual Number of shares')
- plt.plot(x , our_p , color='red' , label ='OUR ' + algorithm)
- plt.plot(x , scikit_p , color='red' , label ='SKLEARN' + algorithm)
- plt.xlabel(predictors[feature_number])
- plt.ylabel("Number of Shares")
- plt.legend()
- plt.title( "Number of Shares vs " + str(predictors[feature_number]))
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement