Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from sklearn.linear_model import LinearRegression
- from sklearn.preprocessing import StandardScaler
- from sklearn.preprocessing import PolynomialFeatures
- import re
- cols_2012 = crime_realEstate.filter(regex='_2012').columns
- crime_realEstate['Area_Name']=crime_realEstate['Area_Name'].apply(lambda x: re.sub(' ', '_', str(x)))
- regDF_2012 = crime_realEstate[cols_2012]
- regDF_2012 = regDF_2012.assign(community_code=crime_finalDF['community_area'])
- regDF_2012.dropna(inplace=True)
- X_feats = regDF_2012.drop(['Avg_Price_2012'], axis=1)
- y_label = regDF_2012['Avg_Price_2012'].values
- from sklearn.preprocessing import scale
- from sklearn.decomposition import PCA
- from sklearn.model_selection import KFold
- from sklearn.model_selection import cross_val_score
- import numpy as np
- pca = PCA()
- pca_ = pca.fit_transform(scale(regDF_2012))
- np.cumsum(np.round(pca.explained_variance_ratio_, decimals=4)*100)
- n = len(pca_)
- kf_10 = KFold(n, n_folds=10, shuffle=True, random_state=0)
- # kf_10 = KFold(n, shuffle=True, random_state=2)
- regr = LinearRegression()
- mse = []
- score = -1*cross_val_score(regr, np.ones((n,1)), y.ravel(), cv=kf_10, scoring='mean_squared_error').mean()
- mse.append(score)
- for i in np.arange(1,6):
- score = -1*cross_val_score(regr, X_reduced[:,:i], y.ravel(), cv=kf_10, scoring='mean_squared_error').mean()
- mse.append(score)
- fig, (ax1, ax2) = plt.subplots(1,2, figsize=(12,5))
- ax1.plot(mse, '-v')
- ax2.plot([1,2,3,4,5], mse[1:6], '-v')
- ax2.set_title('Intercept excluded from plot')
- for ax in fig.axes:
- ax.set_xlabel('Number of principal components in regression')
- ax.set_ylabel('MSE')
- ax.set_xlim((-0.2,5.2))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement