SHARE
TWEET

Untitled

a guest Nov 16th, 2019 107 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. %matplotlib inline
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. from sklearn import datasets
  5. from sklearn.linear_model import LinearRegression
  6.  
  7. #Load DATA inspect and do exploratory plots
  8. #
  9. diabetes = datasets.load_diabetes()
  10.  
  11. Y=diabetes.data
  12. f=diabetes.target
  13. NumData,NumFeatures = Y.shape
  14. print(NumData,NumFeatures)
  15.  
  16. print(f.shape)
  17.  
  18. fix,ax=plt.subplots(nrows=1,ncols=2,figsize=(12,4))
  19. ax[0].hist(f,bins=40)
  20. ax[0].set_title("Distribution of Target",fontsize=14)
  21. ax[1].scatter(Y[:,6],Y[:,7],c='m',s=3)
  22. ax[1].set_title("Scatter of Two Inputs",fontsize=14)
  23. #Linear regression using sklearn
  24. #
  25. lin= LinearRegression()
  26. lin.fit(Y,f)
  27. fh1=lin.predict(Y)
  28.  
  29. # Psedo-incerse solution to linear regression
  30. #
  31. a=np.linalg.inv(Y.T@Y)@Y.T@f
  32. fh2=Y@a
  33.  
  34. #plot predictions to check if they look the same!
  35. #
  36. fig,ax=plt.subplots(nrows=1,ncols=2,figsize=(10,5))
  37. ax[0].scatter(f,fh1,c='c',s=3)
  38. ax[0].grid(True)
  39. ax[0].set_title("Sklearn,fontsize=14")
  40.  
  41. ax[1].scatter(f,fh2,c='m',s=3)
  42. ax[1].grid(True)
  43. ax[1].set_title("Pseudoinverse",fontsize=14)
  44. gamma=0.5
  45. aR=np.linalg.inv(Y.T@Y+gamma*np.identity(NumFeatures))@Y.T@f
  46.  
  47. fig,ax=plt.subplots(nrows=1,ncols=2,figsize=(8,4))
  48. ax[0].bar(np.arange(len(a)),a)
  49. ax[0].set_title('Pseudo-inverse solution',fontsize=14)
  50. ax[0].grid(True)
  51. ax[0].set_ylim(np.min(a),np.max(a))
  52.  
  53. ax[1].bar(np.arange(len(aR)),aR)
  54. ax[1].set_title('Regularized solution',fontsize=14)
  55. ax[1].grid(True)
  56. ax[1].set_ylim(np.min(a),np.max(a))
  57. from sklearn.linear_model import Lasso
  58. ll= Lasso(alpha=0.2)
  59. ll.fit(Y,f)
  60. yh_lasso=ll.predict(Y)
  61.  
  62. fig,ax=plt.subplots(nrows=1,ncols=2,figsize=(10,4))
  63. ax[0].bar(np.arange(len(a)),a)
  64. ax[0].set_title('Pseudo-inverse solution',fontsize=14)
  65. ax[0].grid(True)
  66. ax[0].set_ylim(np.min(a),np.max(a))
  67.  
  68. ax[1].bar(np.arange(len(ll.coef_)),ll.coef_)
  69. ax[1].set_title('Lasso solution',fontsize=14)
  70. ax[1].grid(True)
  71. ax[1].set_ylim(np.min(a),np.max(a))
  72. plt.savefig('h')
  73. %matplotlib inline
  74. import numpy as np
  75. import matplotlib.pyplot as plt
  76. from sklearn.linear_model import lasso_path
  77. from sklearn import datasets
  78.  
  79. #Synthetic data
  80. #problem taken from hastie ,et al.,Statistical Learning with Sparsity
  81. #z1,z2~N(0,1)
  82. #Y=3*Z1-1.5Z2+10*N(0,1) Noisy response
  83. #noisy inputs (the six are in two groups of three each)
  84. #Xj=Z1+0.2*N(0,1) for j=1,2,3,and
  85. #Xj=Z2+0.2*N(0,1) for j=4,5,6.
  86.  
  87. N=100
  88. y=np.empty(0)
  89. X=np.empty([0,6])
  90. for i in range(N):
  91.     Z1=np.random.randn()
  92.     Z2=np.random.randn()
  93.     y=np.append(y,3*Z1-1.5*Z2+2*np.random.randn())
  94.     Xarr=np.array([Z1,Z1,Z1,Z2,Z2,Z2])+np.random.randn(6)/5
  95.     X=np.vstack((X,Xarr.tolist()))
  96.    #compute regressions with Lasso and return paths
  97. #
  98. alphas_lasso,coefs_lasso,_=lasso_path(X,y,fit_intercept=False)
  99.  
  100. #plot each coefficient
  101. #
  102. fig,ax=plt.subplots(figsize=(8,4))
  103. for i in range(6):
  104.     ax.plot(alphas_lasso,coefs_lasso[i,:])
  105.    
  106. ax.grid(True)
  107. ax.set_xlabel("Regularization")
  108. ax.set_ylabel("Regression Coefficients")
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top