SHARE
TWEET

Untitled

a guest Oct 17th, 2019 87 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # # Algorithm
  2. #
  3. # $$
  4. # \mathbf{x}^{(0)} = (x_1^0,...,x_n^0)
  5. # $$
  6. #
  7. # $$
  8. # \mathbf{x}_i^{(k+1)} = argmin_{\omega} f(x_1^{(k+1)}, ..., x_{i-1}^{(k+1)}, \omega, x_{i+1}^{(k)}, ..., x_n^{(k)})
  9. # $$
  10. #
  11. # $$
  12. # x_i : = x_i - \alpha \frac{\partial f}{\partial x_i}(\mathbf{x})
  13. # $$
  14. #
  15. # $$
  16. # \rho_j = \sum_{i=1}^m x_j^{(i)}  (y^{(i)}  - \sum_{k \neq j}^n \theta_k x_k^{(i)} ) = \sum_{i=1}^m x_j^{(i)}  (y^{(i)}  - \hat y^{(i)}_{pred} + \theta_j x_j^{(i)} )
  17. # $$
  18.  
  19. # +
  20. #Load the diabetes dataset
  21.  
  22. using ScikitLearn
  23. @sk_import datasets: load_diabetes
  24.  
  25. diabetes = load_diabetes()
  26.  
  27. X = diabetes["data"]
  28. y = diabetes["target"]
  29. diabetes
  30.  
  31. # +
  32. using Statistics
  33.  
  34.  
  35. """
  36. Soft threshold function used for normalized
  37. data and lasso regression
  38. """
  39. function soft_threshold(ρ :: Float64, λ :: Float64)
  40.     if ρ + λ <  0
  41.         return (ρ + λ)
  42.     elseif ρ - λ > 0
  43.         return (ρ - λ)
  44.     else
  45.         return 0
  46.     end
  47. end
  48. # -
  49.  
  50. """
  51. Coordinate gradient descent for lasso regression
  52. for normalized data.
  53.  
  54. The intercept parameter allows to specify whether
  55. or not we regularize ``\\theta_0``
  56. """
  57. function coordinate_descent_lasso(theta, X, y; lamda = .01,
  58.         num_iters=100, intercept = false)
  59.  
  60.     #Initialisation of useful values
  61.     m, n = size(X)
  62.     X .= X ./ sqrt.(sum(X.^2, dims=1)) #normalizing X in case it was not done before
  63.  
  64.     #Looping until max number of iterations
  65.     for i in 1:num_iters
  66.  
  67.         #Looping through each coordinate
  68.         for j in 1:n
  69.  
  70.             #Vectorized implementation
  71.             X_j = view(X,:,j)
  72.             y_pred = X * theta
  73.             rho = X_j' * (y .- y_pred  .+ theta[j] .* X_j)
  74.  
  75.             #Checking intercept parameter
  76.             if intercept
  77.                 if j == 0
  78.                     theta[j] =  first(rho)
  79.                 else
  80.                     theta[j] =  soft_threshold(rho..., lamda)
  81.                 end
  82.             end
  83.  
  84.             if !intercept
  85.                 theta[j] =  soft_threshold(rho..., lamda)
  86.             end
  87.         end
  88.     end
  89.  
  90.     return vec(theta)
  91. end
  92.  
  93. # +
  94. using Plots
  95. # Initialize variables
  96. m,n = size(X)
  97. initial_theta = ones(Float64,n)
  98. theta_lasso = Vector{Float64}[]
  99. lamda = exp10.(range(0, stop=4, length=300)) ./10 #Range of lambda values
  100.  
  101. #Run lasso regression for each lambda
  102. for l in lamda
  103.     theta = coordinate_descent_lasso(initial_theta, X, y, lamda = l, num_iters=100)
  104.     push!(theta_lasso, copy(theta))
  105. end
  106. theta_lasso = vcat(theta_lasso'...);
  107. # -
  108.  
  109. #Plot results
  110. n = length(theta_lasso)
  111. p = plot(figsize = (12,8))
  112. for (i,label) in enumerate(diabetes["feature_names"])
  113.     plot!(p, theta_lasso[:,i], label = label)
  114. end
  115. display(p)
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top