Advertisement
Guest User

Untitled

a guest
Feb 10th, 2019
122
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Julia 10.81 KB | None | 0 0
  1. using Images
  2. using PyPlot
  3. using FileIO
  4. using Optim
  5. using Random
  6. using Statistics
  7.  
  8. include("Common.jl")
  9.  
  10. # TODO: remove
  11. function printarr(name, arr; show=false)
  12.   println("Array: $name, shape: $(size(arr))")
  13.   if show
  14.     display(arr)
  15.   end
  16. end
  17.  
  18. #---------------------------------------------------------
  19. # Load features and labels from file.
  20. #---------------------------------------------------------
  21. function loaddata(path::String)
  22.   data = load(path)
  23.   features, labels = data["features"], data["labels"]
  24.   @assert length(labels) == size(features,1)
  25.   return features::Array{Float64,2}, labels::Array{Float64,1}
  26. end
  27.  
  28. #---------------------------------------------------------
  29. # Show a 2-dimensional plot for the given features with
  30. # different colors according to the labels.
  31. #---------------------------------------------------------
  32. function showbefore(features::Array{Float64,2},labels::Array{Float64,1})
  33.  
  34.   # Create masks for class 0 and 1
  35.   mask0 = labels .== 0.0
  36.   mask1 = labels .== 1.0
  37.  
  38.   # Plot
  39.   figure()
  40.   scatter(features[mask0, 1], features[mask0, 2], c="red", label="class 0")
  41.   scatter(features[mask1, 1], features[mask1, 2], c="blue", label="class 1")
  42.  
  43.   # Setup and show
  44.   title("Data before")
  45.   xlabel("x1")
  46.   ylabel("x2")
  47.   legend()
  48.   show()
  49.   return nothing::Nothing
  50. end
  51.  
  52.  
  53. #---------------------------------------------------------
  54. # Show a 2-dimensional plot for the given features along
  55. # with the decision boundary.
  56. #---------------------------------------------------------
  57. function showafter(features::Array{Float64,2},labels::Array{Float64,1},Ws::Vector{Any}, bs::Vector{Any})
  58.  
  59.   return nothing::Nothing
  60. end
  61.  
  62.  
  63. #---------------------------------------------------------
  64. # Implements the sigmoid function.
  65. #---------------------------------------------------------
  66. function sigmoid(z)
  67.   s = 1 ./ (1 + exp.(-z))
  68.   return s
  69. end
  70.  
  71.  
  72. #---------------------------------------------------------
  73. # Implements the derivative of the sigmoid function.
  74. #---------------------------------------------------------
  75. function dsigmoid_dz(z)
  76.   s = sigmoid.(z)
  77.   ds = s.*(1 .- s)
  78.   return ds
  79. end
  80.  
  81.  
  82. #---------------------------------------------------------
  83. # Evaluates the loss function of the MLP.
  84. #---------------------------------------------------------
  85. function nnloss(theta::Array{Float64,1}, X::Array{Float64,2}, y::Array{Float64,1}, netdefinition::Array{Int, 1})
  86.   # Get weights and biases
  87.   Ws, bs = thetaToWeights(theta, netdefinition)
  88.  
  89.   # Forward pass
  90.   z = X
  91.   for i=1:length(Ws) - 1
  92.     z = sigmoid(Ws[i]' * z .+ bs[i])
  93.  end
  94.  
  95.  # Last layer: softmax to get class probabilities
  96.  function softmax(x)
  97.    exps = exps.(x)
  98.    sums = sum(exps)
  99.    return exps / sums
  100.  end
  101.  
  102.  # Get probabilities
  103.  probs = softmax(Ws[end]' * z .+ bs[end])
  104.  
  105.   # Define loss function
  106.   L(y, t) = -t * log(y) - (1 - t) * log(1 - y)
  107.  
  108.   # Compute loss
  109.   loss = 1/length(y) * sum(L.(probs, y))
  110.  
  111.   return loss::Float64
  112. end
  113.  
  114.  
  115. #---------------------------------------------------------
  116. # Softmax activation function to get probabilities
  117. #---------------------------------------------------------
  118. function softmax(x)
  119.   exps = exps.(x)
  120.   sums = sum(exps)
  121.   return exps / sums
  122. end
  123.  
  124. #---------------------------------------------------------
  125. # Feed forward pass. Returns activations for each layer.
  126. #---------------------------------------------------------
  127. function feedforward(theta::Array{Float64,1}, X::Array{Float64,2}, y::Array{Float64,1}, netdefinition::Array{Int, 1})
  128.   println("Entered feedforward")
  129.   # Get weights and biases
  130.   Ws, bs = thetaToWeights(theta, netdefinition)
  131.  
  132.   # Forward pass
  133.   activations = []
  134.   zs = []
  135.  
  136.   # Put training samples along columns for efficient multiplication
  137.   a = X'
  138.  push!(activations, a)
  139.  for i=1:length(Ws)
  140.    println("forward loop iteration $i")
  141.    println("z = Ws[$i]' * a .+ bs[$i]")
  142.    println("with shape(Ws[$i]) = $(size(Ws[i])), shape(bs[$i]) = $(length(bs[i]))")
  143.    println("a has shape: $(size(a))")
  144.    z = Ws[i] * a .+ bs[i]
  145.    a = sigmoid.(z)
  146.    push!(zs, z)
  147.    push!(activations, a)
  148.  end
  149.  return activations, zs
  150. end
  151.  
  152. #---------------------------------------------------------
  153. # Evaluate the gradient of the MLP loss w.r.t. Ws and Bs
  154. # The gradient should be stored in the vector 'storage'
  155. #---------------------------------------------------------
  156. function nnlossgrad(storage::Array{Float64,1}, theta::Array{Float64,1}, X::Array{Float64,2}, y::Array{Float64,1}, netdefinition::Array{Int, 1})
  157.  # Get weights and biases
  158.  
  159.  Ws, bs = thetaToWeights(theta, netdefinition)
  160.  nlayers = length(netdefinition) - 1
  161.  
  162.  # Gradient storage
  163.  storage = zeros(length(theta))
  164.  
  165.  # Compute activations
  166.  activations, zs = feedforward(theta, X, y, netdefinition)
  167.  probs = activations[end]
  168.  
  169.  dEdz(ypred, ytrue) = ypred - ytrue
  170.  
  171.  dWs = [zeros(size(W)) for W in Ws]
  172.  dbs = [zeros(size(b)) for b in bs]
  173.  dEdz_eval = dEdz(probs[:], y)
  174.  dsigmoiddz_eval = dsigmoid_dz.(zs[end])
  175.  printarr("dEdz", dEdz_eval)
  176.  printarr("dsigmoiddz_eval", dsigmoiddz_eval)
  177.  
  178.  delta = dEdz_eval .* dsigmoiddz_eval[:]
  179.  printarr("delta", delta)
  180.  dbs[end] = delta
  181.  dWs[end] = delta' * activations[end-1]'
  182.  
  183.  printshapes("activations", activations)
  184.  for l=2:nlayers
  185.    println("backprop layer: $l")
  186.    z = zs[end - l + 1]
  187.    printarr("z", z)
  188.    dsig = dsigmoid_dz.(z)
  189.    printarr("dsig", dsig)
  190.    W = Ws[end - l + 2]
  191.    printarr("W", W)
  192.    Wpxdelta = W' * delta
  193.    delta = Wpxdelta * dsig
  194.    dbs[end - l + 1] = delta
  195.    dWs[end - l + 1] = delta * activations[end - l]'
  196.  end
  197.  
  198.  storage[:] .= weightsToTheta(dWs, dbs)
  199.  return storage::Array{Float64,1}
  200. end
  201.  
  202. function printshapes(name, arr)
  203.  println(name)
  204.  for (i, a) in enumerate(arr)
  205.    println("$i: $(size(a))")
  206.  end
  207. end
  208.  
  209. #---------------------------------------------------------
  210. # Use LBFGS to optimize the MLP loss
  211. #---------------------------------------------------------
  212. function train(trainfeatures::Array{Float64,2}, trainlabels::Array{Float64,1}, netdefinition::Array{Int, 1})
  213.  sigma_w = 0.01
  214.  sigma_b = 0.001
  215.  Ws, bs = initWeights(netdefinition, sigma_w, sigma_b)
  216.  theta = weightsToTheta(Ws, bs)
  217.  Wsp, bsp = thetaToWeights(theta, netdefinition)
  218.  
  219.  L(theta) = nnloss(theta, trainfeatures, trainlabels, netdefinition)
  220.  Lgrad!(storage, theta) = nnlossgrad(storage, theta, trainfeatures, trainlabels, netdefinition)
  221.  
  222.  res = optimize(L, Lgrad!, theta, LBFGS())
  223.  mintheta = Optim.minimizer(res)
  224.  
  225.  Ws, bs = thetaToWeights(mintheta)
  226.  return Ws::Vector{Any},bs::Vector{Any}
  227. end
  228.  
  229.  
  230. #---------------------------------------------------------
  231. # Predict the classes of the given data points using Ws and Bs.
  232. # p, N x 1 array of Array{Float,2}, contains the output class scores (continuous value) for each input feature.
  233. # c, N x 1 array of Array{Float,2}, contains the output class label (either 0 or 1) for each input feature.
  234. #---------------------------------------------------------
  235. function predict(X::Array{Float64,2}, Ws::Vector{Any}, bs::Vector{Any})
  236.  
  237.  return p::Array{Float64,2}, c::Array{Float64,2}
  238. end
  239.  
  240.  
  241. #---------------------------------------------------------
  242. # A helper function which concatenates weights and biases into a variable theta
  243. #---------------------------------------------------------
  244. function weightsToTheta(Ws::Vector{Any}, bs::Vector{Any})
  245.  # Init theta as dynamic list
  246.  theta = Float64[]
  247.  for i=1:length(Ws)
  248.    # Reshape and unflod Ws and bs
  249.    push!(theta, reshape(Float64.(Ws[i]), :)...)
  250.    push!(theta, Float64.(bs[i])...)
  251.  end
  252.  return theta::Vector{Float64}
  253. end
  254.  
  255.  
  256. #---------------------------------------------------------
  257. # A helper function which decomposes and reshapes weights and biases from the variable theta
  258. #---------------------------------------------------------
  259. function thetaToWeights(theta::Vector{Float64}, netdefinition::Array{Int,1})
  260.  # Init weights and bias vectors
  261.  nlayers = length(netdefinition) - 1
  262.  Ws = Vector{Any}(missing, nlayers)
  263.  bs = Vector{Any}(missing, nlayers)
  264.  
  265.  # Alias
  266.  nd = netdefinition
  267.  
  268.  # Offset for the theta vector
  269.  offset = 1
  270.  
  271.  # For each layer
  272.  for i=1:nlayers
  273.    # Lenght of the current layers
  274.    size_wi = nd[i] * nd[i + 1]
  275.    size_bi = nd[i + 1]
  276.  
  277.    # Get current weights
  278.    wi = theta[offset:offset+size_wi - 1]
  279.  
  280.    # Shift offset
  281.    offset += size_wi
  282.  
  283.    # Get current bias
  284.    bi = theta[offset:offset+size_bi - 1]
  285.  
  286.    # Shift offset
  287.    offset += size_bi
  288.  
  289.    # Collect weights and biases
  290.    Ws[i] = reshape(wi, nd[i+1], nd[i])
  291.    bs[i] = bi
  292.  end
  293.  return Ws::Vector{Any}, bs::Vector{Any}
  294. end
  295.  
  296.  
  297. #---------------------------------------------------------
  298. # Initialize weights and biases from Gaussian distributions
  299. #---------------------------------------------------------
  300. function initWeights(netdefinition::Array{Int,1}, sigmaW::Float64, sigmaB::Float64)
  301.  nlayers = length(netdefinition) - 1
  302.  Ws = Vector{Any}(missing, nlayers)
  303.  bs = Vector{Any}(missing, nlayers)
  304.  
  305.  nd = netdefinition
  306.  # For each layer: init weight matrix and bias vector
  307.  for i=1:nlayers
  308.    # W has nd[i] as input and nd[i+1] as output
  309.    Ws[i] = randn(nd[i+1], nd[i]) * sigmaW
  310.    bs[i] = randn(nd[i+1]) * sigmaB
  311.  end
  312.  
  313.  return Ws::Vector{Any}, bs::Vector{Any}
  314. end
  315.  
  316.  
  317. # Problem 2: Multilayer Perceptron
  318.  
  319. function problem2()
  320.  # make results reproducable
  321.  Random.seed!(10)
  322.  
  323.  # LINEAR SEPARABLE DATA
  324.  # load data
  325.  features,labels = loaddata("separable.jld2")
  326.  
  327.  # show data points
  328.  showbefore(features,labels)
  329.  title("Data for Separable Case")
  330.  
  331.  # train MLP
  332.  Ws,bs = train(features,labels, [2,4,1])
  333.  
  334.  # show optimum and plot decision boundary
  335.  showafter(features,labels,Ws,bs)
  336.  title("Learned Decision Boundary for Separable Case")
  337.  
  338.  
  339.  ## LINEAR NON-SEPARABLE DATA
  340.  # load data
  341.  features2,labels2 = loaddata("nonseparable.jld2")
  342.  
  343.  # show data points
  344.  showbefore(features2,labels2)
  345.  title("Data for Non-Separable Case")
  346.  
  347.  # train MLP
  348.  Ws,bs = train(features2,labels2, [2,4,1])
  349.  
  350.  # show optimum and plot decision boundary
  351.  showafter(features2,labels2,Ws, bs)
  352.  title("Learned Decision Boundary for Non-Separable Case")
  353.  
  354.  # PLANE-BIKE-CLASSIFICATION FROM PROBLEM 2
  355.  # load data
  356.  trainfeatures,trainlabels = loaddata("imgstrain.jld2")
  357.  testfeatures,testlabels = loaddata("imgstest.jld2")
  358.  
  359.  # train MLP and predict classes
  360.  Ws,bs = train(trainfeatures,trainlabels, [50,40,30,1])
  361.  _,trainpredictions = predict(trainfeatures, Ws, bs)
  362.  _,testpredictions = predict(testfeatures, Ws, bs)
  363.  
  364.  # show error
  365.  trainerror = sum(trainpredictions.!=trainlabels)/length(trainlabels)
  366.  testerror = sum(testpredictions.!=testlabels)/length(testlabels)
  367.  println("Training Error Rate: $(round(100*trainerror,digits=2))%")
  368.  println("Testing Error Rate: $(round(100*testerror,digits=2))%")
  369.  
  370.  return
  371. end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement