• API
• FAQ
• Tools
• Archive
SHARE
TWEET

Untitled

a guest Nov 16th, 2019 107 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. %matplotlib inline
2. import numpy as np
3. import matplotlib.pyplot as plt
4. from sklearn import datasets
5. from sklearn.linear_model import LinearRegression
6.
7. #Load DATA inspect and do exploratory plots
8. #
10.
11. Y=diabetes.data
12. f=diabetes.target
13. NumData,NumFeatures = Y.shape
14. print(NumData,NumFeatures)
15.
16. print(f.shape)
17.
18. fix,ax=plt.subplots(nrows=1,ncols=2,figsize=(12,4))
19. ax[0].hist(f,bins=40)
20. ax[0].set_title("Distribution of Target",fontsize=14)
21. ax[1].scatter(Y[:,6],Y[:,7],c='m',s=3)
22. ax[1].set_title("Scatter of Two Inputs",fontsize=14)
23. #Linear regression using sklearn
24. #
25. lin= LinearRegression()
26. lin.fit(Y,f)
27. fh1=lin.predict(Y)
28.
29. # Psedo-incerse solution to linear regression
30. #
31. a=np.linalg.inv(Y.T@Y)@Y.T@f
32. fh2=Y@a
33.
34. #plot predictions to check if they look the same!
35. #
36. fig,ax=plt.subplots(nrows=1,ncols=2,figsize=(10,5))
37. ax[0].scatter(f,fh1,c='c',s=3)
38. ax[0].grid(True)
39. ax[0].set_title("Sklearn,fontsize=14")
40.
41. ax[1].scatter(f,fh2,c='m',s=3)
42. ax[1].grid(True)
43. ax[1].set_title("Pseudoinverse",fontsize=14)
44. gamma=0.5
45. aR=np.linalg.inv(Y.T@Y+gamma*np.identity(NumFeatures))@Y.T@f
46.
47. fig,ax=plt.subplots(nrows=1,ncols=2,figsize=(8,4))
48. ax[0].bar(np.arange(len(a)),a)
49. ax[0].set_title('Pseudo-inverse solution',fontsize=14)
50. ax[0].grid(True)
51. ax[0].set_ylim(np.min(a),np.max(a))
52.
53. ax[1].bar(np.arange(len(aR)),aR)
54. ax[1].set_title('Regularized solution',fontsize=14)
55. ax[1].grid(True)
56. ax[1].set_ylim(np.min(a),np.max(a))
57. from sklearn.linear_model import Lasso
58. ll= Lasso(alpha=0.2)
59. ll.fit(Y,f)
60. yh_lasso=ll.predict(Y)
61.
62. fig,ax=plt.subplots(nrows=1,ncols=2,figsize=(10,4))
63. ax[0].bar(np.arange(len(a)),a)
64. ax[0].set_title('Pseudo-inverse solution',fontsize=14)
65. ax[0].grid(True)
66. ax[0].set_ylim(np.min(a),np.max(a))
67.
68. ax[1].bar(np.arange(len(ll.coef_)),ll.coef_)
69. ax[1].set_title('Lasso solution',fontsize=14)
70. ax[1].grid(True)
71. ax[1].set_ylim(np.min(a),np.max(a))
72. plt.savefig('h')
73. %matplotlib inline
74. import numpy as np
75. import matplotlib.pyplot as plt
76. from sklearn.linear_model import lasso_path
77. from sklearn import datasets
78.
79. #Synthetic data
80. #problem taken from hastie ,et al.,Statistical Learning with Sparsity
81. #z1,z2~N(0,1)
82. #Y=3*Z1-1.5Z2+10*N(0,1) Noisy response
83. #noisy inputs (the six are in two groups of three each)
84. #Xj=Z1+0.2*N(0,1) for j=1,2,3,and
85. #Xj=Z2+0.2*N(0,1) for j=4,5,6.
86.
87. N=100
88. y=np.empty(0)
89. X=np.empty([0,6])
90. for i in range(N):
91.     Z1=np.random.randn()
92.     Z2=np.random.randn()
93.     y=np.append(y,3*Z1-1.5*Z2+2*np.random.randn())
94.     Xarr=np.array([Z1,Z1,Z1,Z2,Z2,Z2])+np.random.randn(6)/5
95.     X=np.vstack((X,Xarr.tolist()))
96.    #compute regressions with Lasso and return paths
97. #
98. alphas_lasso,coefs_lasso,_=lasso_path(X,y,fit_intercept=False)
99.
100. #plot each coefficient
101. #
102. fig,ax=plt.subplots(figsize=(8,4))
103. for i in range(6):
104.     ax.plot(alphas_lasso,coefs_lasso[i,:])
105.
106. ax.grid(True)
107. ax.set_xlabel("Regularization")
108. ax.set_ylabel("Regression Coefficients")
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy.

Top