Guest User

Untitled

a guest
May 21st, 2018
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.93 KB | None | 0 0
  1. # Logit model, v0.2
  2.  
  3. using CSV, Plots; pyplot();
  4. data = CSV.read("/Users/kevinliu/Documents/machine-learning-ex2/ex2/ex2data1.txt", datarow=1)
  5.  
  6. X = hcat(ones(100,1), Matrix(data[:, [1,2]]))
  7. y = Vector(data[:, 3])
  8.  
  9. # Sigmoid function
  10. function sigmoid(z)
  11. 1.0 ./ (1.0 .+ exp.(-z))
  12. end
  13.  
  14. sigmoid(0) # => 0.5
  15. z = rand(3,1); sigmoid(z) # vector
  16. z = rand(3,3); sigmoid(z) # matrix
  17.  
  18. # Hypothesis: linearly combines X[i] and θ[i], to calculate all instances of cost()
  19. function h(θ, X)
  20. z = 0
  21. for i in 1:length(θ)
  22. z += θ[i] .* X[i, :]
  23. end
  24. sigmoid(z)
  25. end
  26.  
  27. # each zero θ is multiplied to each row of X
  28. h([0,0], X)
  29. h([0,0,0], X)
  30. h([-24, 0.2, 0.2], X)
  31.  
  32. # Cost function: for all elements of `data`, `y` defines cost variance
  33. # y = 1 penalizes low probabilities, y = 0 penalizes high probabilities
  34. # Penalties increase gradient differences of θ_current - average(cost(data))
  35. # To do: θ_zero should not be penalized
  36. function cost(θ, X, y)
  37. m = length(y) # number of training examples
  38. errorsum = 0
  39. for i in 1:m
  40. if y[i] == 1
  41. error = y[i] * log.(h(θ, X[i, :]))
  42. else y[i] == 0
  43. error = (1 - y[i]) * log.(1 - h(θ, X[i, :]))
  44. end
  45. errorsum += error
  46. end
  47. const constant = - 1 / m
  48. global J = constant * errorsum
  49. println("Cost is $J")
  50. end
  51.  
  52. cost([0,0], X, y)
  53. # => Cost is [0.693147] as expected
  54. cost([0,0,0], X, y)
  55. # => Cost is [0.693147] as expected
  56. cost([-24, 0.2, 0.2], X, y)
  57. # => Cost is [0.21833] as expected
  58.  
  59. # θ gradient: is the partial derivative of each current θ, minus
  60. # learning speed alpha, times the average of all costs for current θ
  61. # Each θ has a cost
  62. function cost_deriv(X, y, θ, j, α)
  63. m = length(y)
  64. errorsum = 0
  65. for i = 1:m, j = 1:size(X, 2)
  66. error = (h(θ, X[i, :]) - y[i]) .* X[i, j]
  67. errorsum += error
  68. end
  69. const constant = float(α) / float(m)
  70. J = constant * errorsum
  71. end
  72.  
  73. cost_deriv(X, y, [0,0], 2, 0.1)
  74. # => 1×2 Array{Float64,2}:
  75. # -1.20092 -1.12628 as expected in version 1
  76. cost_deriv(X, y, [0,0,0], 2, 0.1)
  77. cost_deriv(X, y, [-24, 0.2, 0.2], 2, 0.1)
  78.  
  79. # Gradient descent: vector in θ space from current θ to a more accurate θ
  80. function gd(X, y, θ, α)
  81. m = length(y)
  82. θ_new = []
  83. const constant = α / m
  84. for j in 1:length(θ)
  85. θ_new_value = θ[j] - cost_deriv(X, y, θ, j, α)
  86. append!(θ_new, θ_new_value)
  87. end
  88. θ_new
  89. end
  90.  
  91. gd(X, y, [0,0], 0.1)
  92. gd(X, y, [0,0,0], 0.1)
  93. gd(X, y, [-24, 0.2, 0.2], 0.1)
  94.  
  95. # Logit model: high level function, which for iter finds gradients that map θ estimations
  96. # to θ optimum estimations, to best represent a linear model
  97. function logit(X, y, θ, α, iter)
  98. for i in 1:iter
  99. θ_new = gd(X, y, θ, α)
  100. θ = θ_new
  101. if mod.(i, 100) == 0
  102. # cost returns final hypothesis of model
  103. cost(θ, X, y)
  104. end
  105. end
  106. println("θ is $θ")
  107. println("J is $(cost(θ, X, y))")
  108. end
  109.  
  110. logit(X, y, [0, 0], 0.1, 1000)
  111. logit(X, y, [0, 0, 0], 0.1, 1000)
  112. # cost will not decrease on every iteration because gd() is not yet optimized
  113. # To do: gd() should also be monitored for convergence
  114. logit(X, y, [-24, 0.2, 0.2], 0.1, 1000)
  115.  
  116. # compute and display initial cost and gradient
  117. J, gradient = cost([0,0,0], X, y);
  118. @printf("Cost at initial theta (zeros): %f\n", J);
  119. @printf("Expected cost (approx): 0.693\n");
  120. @printf("Gradient at initial theta (zeros): %f\n", gradient);
  121. @printf("Expected gradients (approx):\n -0.1000\n -12.0092\n -11.2628\n");
  122.  
  123. # compute and display cost and gradient with non-zero theta
  124. J, gradient = cost([-24, 0.2, 0.2], X, y);
  125. @printf("Cost at test theta: %f\n", J);
  126. @printf("Expected cost (approx): 0.218\n");
  127. @printf("Gradient at test theta: %f\n", gradient);
  128. @printf("Expected gradients (approx):\n 0.043\n 2.566\n 2.647\n");
  129.  
  130. # Optimizing theta parameters
  131. using Optim
  132. function fmin(X, y)
  133. J(θ) = cost(θ, X, y)
  134. θ₀ = zeros(Float64, 3)
  135. optimize(J, θ₀)
  136. end
  137.  
  138. fmin(X, y)
  139.  
  140. optimize(cost, zeros(Float64, 3), LBFGS())
Add Comment
Please, Sign In to add comment