Advertisement
Guest User

Untitled

a guest
Nov 16th, 2019
142
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.90 KB | None | 0 0
  1. %matplotlib inline
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. from sklearn import datasets
  5. from sklearn.linear_model import LinearRegression
  6.  
  7. #Load DATA inspect and do exploratory plots
  8. #
  9. diabetes = datasets.load_diabetes()
  10.  
  11. Y=diabetes.data
  12. f=diabetes.target
  13. NumData,NumFeatures = Y.shape
  14. print(NumData,NumFeatures)
  15.  
  16. print(f.shape)
  17.  
  18. fix,ax=plt.subplots(nrows=1,ncols=2,figsize=(12,4))
  19. ax[0].hist(f,bins=40)
  20. ax[0].set_title("Distribution of Target",fontsize=14)
  21. ax[1].scatter(Y[:,6],Y[:,7],c='m',s=3)
  22. ax[1].set_title("Scatter of Two Inputs",fontsize=14)
  23. #Linear regression using sklearn
  24. #
  25. lin= LinearRegression()
  26. lin.fit(Y,f)
  27. fh1=lin.predict(Y)
  28.  
  29. # Psedo-incerse solution to linear regression
  30. #
  31. a=np.linalg.inv(Y.T@Y)@Y.T@f
  32. fh2=Y@a
  33.  
  34. #plot predictions to check if they look the same!
  35. #
  36. fig,ax=plt.subplots(nrows=1,ncols=2,figsize=(10,5))
  37. ax[0].scatter(f,fh1,c='c',s=3)
  38. ax[0].grid(True)
  39. ax[0].set_title("Sklearn,fontsize=14")
  40.  
  41. ax[1].scatter(f,fh2,c='m',s=3)
  42. ax[1].grid(True)
  43. ax[1].set_title("Pseudoinverse",fontsize=14)
  44. gamma=0.5
  45. aR=np.linalg.inv(Y.T@Y+gamma*np.identity(NumFeatures))@Y.T@f
  46.  
  47. fig,ax=plt.subplots(nrows=1,ncols=2,figsize=(8,4))
  48. ax[0].bar(np.arange(len(a)),a)
  49. ax[0].set_title('Pseudo-inverse solution',fontsize=14)
  50. ax[0].grid(True)
  51. ax[0].set_ylim(np.min(a),np.max(a))
  52.  
  53. ax[1].bar(np.arange(len(aR)),aR)
  54. ax[1].set_title('Regularized solution',fontsize=14)
  55. ax[1].grid(True)
  56. ax[1].set_ylim(np.min(a),np.max(a))
  57. from sklearn.linear_model import Lasso
  58. ll= Lasso(alpha=0.2)
  59. ll.fit(Y,f)
  60. yh_lasso=ll.predict(Y)
  61.  
  62. fig,ax=plt.subplots(nrows=1,ncols=2,figsize=(10,4))
  63. ax[0].bar(np.arange(len(a)),a)
  64. ax[0].set_title('Pseudo-inverse solution',fontsize=14)
  65. ax[0].grid(True)
  66. ax[0].set_ylim(np.min(a),np.max(a))
  67.  
  68. ax[1].bar(np.arange(len(ll.coef_)),ll.coef_)
  69. ax[1].set_title('Lasso solution',fontsize=14)
  70. ax[1].grid(True)
  71. ax[1].set_ylim(np.min(a),np.max(a))
  72. plt.savefig('h')
  73. %matplotlib inline
  74. import numpy as np
  75. import matplotlib.pyplot as plt
  76. from sklearn.linear_model import lasso_path
  77. from sklearn import datasets
  78.  
  79. #Synthetic data
  80. #problem taken from hastie ,et al.,Statistical Learning with Sparsity
  81. #z1,z2~N(0,1)
  82. #Y=3*Z1-1.5Z2+10*N(0,1) Noisy response
  83. #noisy inputs (the six are in two groups of three each)
  84. #Xj=Z1+0.2*N(0,1) for j=1,2,3,and
  85. #Xj=Z2+0.2*N(0,1) for j=4,5,6.
  86.  
  87. N=100
  88. y=np.empty(0)
  89. X=np.empty([0,6])
  90. for i in range(N):
  91. Z1=np.random.randn()
  92. Z2=np.random.randn()
  93. y=np.append(y,3*Z1-1.5*Z2+2*np.random.randn())
  94. Xarr=np.array([Z1,Z1,Z1,Z2,Z2,Z2])+np.random.randn(6)/5
  95. X=np.vstack((X,Xarr.tolist()))
  96. #compute regressions with Lasso and return paths
  97. #
  98. alphas_lasso,coefs_lasso,_=lasso_path(X,y,fit_intercept=False)
  99.  
  100. #plot each coefficient
  101. #
  102. fig,ax=plt.subplots(figsize=(8,4))
  103. for i in range(6):
  104. ax.plot(alphas_lasso,coefs_lasso[i,:])
  105.  
  106. ax.grid(True)
  107. ax.set_xlabel("Regularization")
  108. ax.set_ylabel("Regression Coefficients")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement