Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- %matplotlib inline
- import numpy as np
- import matplotlib.pyplot as plt
- from sklearn import datasets
- from sklearn.linear_model import LinearRegression
- #Load DATA inspect and do exploratory plots
- #
- diabetes = datasets.load_diabetes()
- Y=diabetes.data
- f=diabetes.target
- NumData,NumFeatures = Y.shape
- print(NumData,NumFeatures)
- print(f.shape)
- fix,ax=plt.subplots(nrows=1,ncols=2,figsize=(12,4))
- ax[0].hist(f,bins=40)
- ax[0].set_title("Distribution of Target",fontsize=14)
- ax[1].scatter(Y[:,6],Y[:,7],c='m',s=3)
- ax[1].set_title("Scatter of Two Inputs",fontsize=14)
- #Linear regression using sklearn
- #
- lin= LinearRegression()
- lin.fit(Y,f)
- fh1=lin.predict(Y)
- # Psedo-incerse solution to linear regression
- #
- a=np.linalg.inv(Y.T@Y)@Y.T@f
- fh2=Y@a
- #plot predictions to check if they look the same!
- #
- fig,ax=plt.subplots(nrows=1,ncols=2,figsize=(10,5))
- ax[0].scatter(f,fh1,c='c',s=3)
- ax[0].grid(True)
- ax[0].set_title("Sklearn,fontsize=14")
- ax[1].scatter(f,fh2,c='m',s=3)
- ax[1].grid(True)
- ax[1].set_title("Pseudoinverse",fontsize=14)
- gamma=0.5
- aR=np.linalg.inv(Y.T@Y+gamma*np.identity(NumFeatures))@Y.T@f
- fig,ax=plt.subplots(nrows=1,ncols=2,figsize=(8,4))
- ax[0].bar(np.arange(len(a)),a)
- ax[0].set_title('Pseudo-inverse solution',fontsize=14)
- ax[0].grid(True)
- ax[0].set_ylim(np.min(a),np.max(a))
- ax[1].bar(np.arange(len(aR)),aR)
- ax[1].set_title('Regularized solution',fontsize=14)
- ax[1].grid(True)
- ax[1].set_ylim(np.min(a),np.max(a))
- from sklearn.linear_model import Lasso
- ll= Lasso(alpha=0.2)
- ll.fit(Y,f)
- yh_lasso=ll.predict(Y)
- fig,ax=plt.subplots(nrows=1,ncols=2,figsize=(10,4))
- ax[0].bar(np.arange(len(a)),a)
- ax[0].set_title('Pseudo-inverse solution',fontsize=14)
- ax[0].grid(True)
- ax[0].set_ylim(np.min(a),np.max(a))
- ax[1].bar(np.arange(len(ll.coef_)),ll.coef_)
- ax[1].set_title('Lasso solution',fontsize=14)
- ax[1].grid(True)
- ax[1].set_ylim(np.min(a),np.max(a))
- plt.savefig('h')
- %matplotlib inline
- import numpy as np
- import matplotlib.pyplot as plt
- from sklearn.linear_model import lasso_path
- from sklearn import datasets
- #Synthetic data
- #problem taken from hastie ,et al.,Statistical Learning with Sparsity
- #z1,z2~N(0,1)
- #Y=3*Z1-1.5Z2+10*N(0,1) Noisy response
- #noisy inputs (the six are in two groups of three each)
- #Xj=Z1+0.2*N(0,1) for j=1,2,3,and
- #Xj=Z2+0.2*N(0,1) for j=4,5,6.
- N=100
- y=np.empty(0)
- X=np.empty([0,6])
- for i in range(N):
- Z1=np.random.randn()
- Z2=np.random.randn()
- y=np.append(y,3*Z1-1.5*Z2+2*np.random.randn())
- Xarr=np.array([Z1,Z1,Z1,Z2,Z2,Z2])+np.random.randn(6)/5
- X=np.vstack((X,Xarr.tolist()))
- #compute regressions with Lasso and return paths
- #
- alphas_lasso,coefs_lasso,_=lasso_path(X,y,fit_intercept=False)
- #plot each coefficient
- #
- fig,ax=plt.subplots(figsize=(8,4))
- for i in range(6):
- ax.plot(alphas_lasso,coefs_lasso[i,:])
- ax.grid(True)
- ax.set_xlabel("Regularization")
- ax.set_ylabel("Regression Coefficients")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement