1. %matplotlib inline
2. import numpy as np
3. import matplotlib.pyplot as plt
4. from sklearn import datasets
5. from sklearn.linear_model import LinearRegression
6.
7. #Load DATA inspect and do exploratory plots
8. #
10.
11. Y=diabetes.data
12. f=diabetes.target
13. NumData,NumFeatures = Y.shape
14. print(NumData,NumFeatures)
15.
16. print(f.shape)
17.
18. fix,ax=plt.subplots(nrows=1,ncols=2,figsize=(12,4))
19. ax[0].hist(f,bins=40)
20. ax[0].set_title("Distribution of Target",fontsize=14)
21. ax[1].scatter(Y[:,6],Y[:,7],c='m',s=3)
22. ax[1].set_title("Scatter of Two Inputs",fontsize=14)
23. #Linear regression using sklearn
24. #
25. lin= LinearRegression()
26. lin.fit(Y,f)
27. fh1=lin.predict(Y)
28.
29. # Psedo-incerse solution to linear regression
30. #
31. a=np.linalg.inv(Y.T@Y)@Y.T@f
32. fh2=Y@a
33.
34. #plot predictions to check if they look the same!
35. #
36. fig,ax=plt.subplots(nrows=1,ncols=2,figsize=(10,5))
37. ax[0].scatter(f,fh1,c='c',s=3)
38. ax[0].grid(True)
39. ax[0].set_title("Sklearn,fontsize=14")
40.
41. ax[1].scatter(f,fh2,c='m',s=3)
42. ax[1].grid(True)
43. ax[1].set_title("Pseudoinverse",fontsize=14)
44. gamma=0.5
45. aR=np.linalg.inv(Y.T@Y+gamma*np.identity(NumFeatures))@Y.T@f
46.
47. fig,ax=plt.subplots(nrows=1,ncols=2,figsize=(8,4))
48. ax[0].bar(np.arange(len(a)),a)
49. ax[0].set_title('Pseudo-inverse solution',fontsize=14)
50. ax[0].grid(True)
51. ax[0].set_ylim(np.min(a),np.max(a))
52.
53. ax[1].bar(np.arange(len(aR)),aR)
54. ax[1].set_title('Regularized solution',fontsize=14)
55. ax[1].grid(True)
56. ax[1].set_ylim(np.min(a),np.max(a))
57. from sklearn.linear_model import Lasso
58. ll= Lasso(alpha=0.2)
59. ll.fit(Y,f)
60. yh_lasso=ll.predict(Y)
61.
62. fig,ax=plt.subplots(nrows=1,ncols=2,figsize=(10,4))
63. ax[0].bar(np.arange(len(a)),a)
64. ax[0].set_title('Pseudo-inverse solution',fontsize=14)
65. ax[0].grid(True)
66. ax[0].set_ylim(np.min(a),np.max(a))
67.
68. ax[1].bar(np.arange(len(ll.coef_)),ll.coef_)
69. ax[1].set_title('Lasso solution',fontsize=14)
70. ax[1].grid(True)
71. ax[1].set_ylim(np.min(a),np.max(a))
72. plt.savefig('h')
73. %matplotlib inline
74. import numpy as np
75. import matplotlib.pyplot as plt
76. from sklearn.linear_model import lasso_path
77. from sklearn import datasets
78.
79. #Synthetic data
80. #problem taken from hastie ,et al.,Statistical Learning with Sparsity
81. #z1,z2~N(0,1)
82. #Y=3*Z1-1.5Z2+10*N(0,1) Noisy response
83. #noisy inputs (the six are in two groups of three each)
84. #Xj=Z1+0.2*N(0,1) for j=1,2,3,and
85. #Xj=Z2+0.2*N(0,1) for j=4,5,6.
86.
87. N=100
88. y=np.empty(0)
89. X=np.empty([0,6])
90. for i in range(N):
91.     Z1=np.random.randn()
92.     Z2=np.random.randn()
93.     y=np.append(y,3*Z1-1.5*Z2+2*np.random.randn())
94.     Xarr=np.array([Z1,Z1,Z1,Z2,Z2,Z2])+np.random.randn(6)/5
95.     X=np.vstack((X,Xarr.tolist()))
96.    #compute regressions with Lasso and return paths
97. #
98. alphas_lasso,coefs_lasso,_=lasso_path(X,y,fit_intercept=False)
99.
100. #plot each coefficient
101. #
102. fig,ax=plt.subplots(figsize=(8,4))
103. for i in range(6):
104.     ax.plot(alphas_lasso,coefs_lasso[i,:])
105.
106. ax.grid(True)
107. ax.set_xlabel("Regularization")
108. ax.set_ylabel("Regression Coefficients")
