Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- using Images
- using PyPlot
- using FileIO
- using Optim
- using Random
- using Statistics
- include("Common.jl")
- # TODO: remove
- function printarr(name, arr; show=false)
- println("Array: $name, shape: $(size(arr))")
- if show
- display(arr)
- end
- end
- #---------------------------------------------------------
- # Load features and labels from file.
- #---------------------------------------------------------
- function loaddata(path::String)
- data = load(path)
- features, labels = data["features"], data["labels"]
- @assert length(labels) == size(features,1)
- return features::Array{Float64,2}, labels::Array{Float64,1}
- end
- #---------------------------------------------------------
- # Show a 2-dimensional plot for the given features with
- # different colors according to the labels.
- #---------------------------------------------------------
- function showbefore(features::Array{Float64,2},labels::Array{Float64,1})
- # Create masks for class 0 and 1
- mask0 = labels .== 0.0
- mask1 = labels .== 1.0
- # Plot
- figure()
- scatter(features[mask0, 1], features[mask0, 2], c="red", label="class 0")
- scatter(features[mask1, 1], features[mask1, 2], c="blue", label="class 1")
- # Setup and show
- title("Data before")
- xlabel("x1")
- ylabel("x2")
- legend()
- show()
- return nothing::Nothing
- end
- #---------------------------------------------------------
- # Show a 2-dimensional plot for the given features along
- # with the decision boundary.
- #---------------------------------------------------------
- function showafter(features::Array{Float64,2},labels::Array{Float64,1},Ws::Vector{Any}, bs::Vector{Any})
- return nothing::Nothing
- end
- #---------------------------------------------------------
- # Implements the sigmoid function.
- #---------------------------------------------------------
- function sigmoid(z)
- s = 1 ./ (1 + exp.(-z))
- return s
- end
- #---------------------------------------------------------
- # Implements the derivative of the sigmoid function.
- #---------------------------------------------------------
- function dsigmoid_dz(z)
- s = sigmoid.(z)
- ds = s.*(1 .- s)
- return ds
- end
- #---------------------------------------------------------
- # Evaluates the loss function of the MLP.
- #---------------------------------------------------------
- function nnloss(theta::Array{Float64,1}, X::Array{Float64,2}, y::Array{Float64,1}, netdefinition::Array{Int, 1})
- # Get weights and biases
- Ws, bs = thetaToWeights(theta, netdefinition)
- # Forward pass
- z = X
- for i=1:length(Ws) - 1
- z = sigmoid(Ws[i]' * z .+ bs[i])
- end
- # Last layer: softmax to get class probabilities
- function softmax(x)
- exps = exps.(x)
- sums = sum(exps)
- return exps / sums
- end
- # Get probabilities
- probs = softmax(Ws[end]' * z .+ bs[end])
- # Define loss function
- L(y, t) = -t * log(y) - (1 - t) * log(1 - y)
- # Compute loss
- loss = 1/length(y) * sum(L.(probs, y))
- return loss::Float64
- end
- #---------------------------------------------------------
- # Softmax activation function to get probabilities
- #---------------------------------------------------------
- function softmax(x)
- exps = exps.(x)
- sums = sum(exps)
- return exps / sums
- end
- #---------------------------------------------------------
- # Feed forward pass. Returns activations for each layer.
- #---------------------------------------------------------
- function feedforward(theta::Array{Float64,1}, X::Array{Float64,2}, y::Array{Float64,1}, netdefinition::Array{Int, 1})
- println("Entered feedforward")
- # Get weights and biases
- Ws, bs = thetaToWeights(theta, netdefinition)
- # Forward pass
- activations = []
- zs = []
- # Put training samples along columns for efficient multiplication
- a = X'
- push!(activations, a)
- for i=1:length(Ws)
- println("forward loop iteration $i")
- println("z = Ws[$i]' * a .+ bs[$i]")
- println("with shape(Ws[$i]) = $(size(Ws[i])), shape(bs[$i]) = $(length(bs[i]))")
- println("a has shape: $(size(a))")
- z = Ws[i] * a .+ bs[i]
- a = sigmoid.(z)
- push!(zs, z)
- push!(activations, a)
- end
- return activations, zs
- end
- #---------------------------------------------------------
- # Evaluate the gradient of the MLP loss w.r.t. Ws and Bs
- # The gradient should be stored in the vector 'storage'
- #---------------------------------------------------------
- function nnlossgrad(storage::Array{Float64,1}, theta::Array{Float64,1}, X::Array{Float64,2}, y::Array{Float64,1}, netdefinition::Array{Int, 1})
- # Get weights and biases
- Ws, bs = thetaToWeights(theta, netdefinition)
- nlayers = length(netdefinition) - 1
- # Gradient storage
- storage = zeros(length(theta))
- # Compute activations
- activations, zs = feedforward(theta, X, y, netdefinition)
- probs = activations[end]
- dEdz(ypred, ytrue) = ypred - ytrue
- dWs = [zeros(size(W)) for W in Ws]
- dbs = [zeros(size(b)) for b in bs]
- dEdz_eval = dEdz(probs[:], y)
- dsigmoiddz_eval = dsigmoid_dz.(zs[end])
- printarr("dEdz", dEdz_eval)
- printarr("dsigmoiddz_eval", dsigmoiddz_eval)
- delta = dEdz_eval .* dsigmoiddz_eval[:]
- printarr("delta", delta)
- dbs[end] = delta
- dWs[end] = delta' * activations[end-1]'
- printshapes("activations", activations)
- for l=2:nlayers
- println("backprop layer: $l")
- z = zs[end - l + 1]
- printarr("z", z)
- dsig = dsigmoid_dz.(z)
- printarr("dsig", dsig)
- W = Ws[end - l + 2]
- printarr("W", W)
- Wpxdelta = W' * delta
- delta = Wpxdelta * dsig
- dbs[end - l + 1] = delta
- dWs[end - l + 1] = delta * activations[end - l]'
- end
- storage[:] .= weightsToTheta(dWs, dbs)
- return storage::Array{Float64,1}
- end
- function printshapes(name, arr)
- println(name)
- for (i, a) in enumerate(arr)
- println("$i: $(size(a))")
- end
- end
- #---------------------------------------------------------
- # Use LBFGS to optimize the MLP loss
- #---------------------------------------------------------
- function train(trainfeatures::Array{Float64,2}, trainlabels::Array{Float64,1}, netdefinition::Array{Int, 1})
- sigma_w = 0.01
- sigma_b = 0.001
- Ws, bs = initWeights(netdefinition, sigma_w, sigma_b)
- theta = weightsToTheta(Ws, bs)
- Wsp, bsp = thetaToWeights(theta, netdefinition)
- L(theta) = nnloss(theta, trainfeatures, trainlabels, netdefinition)
- Lgrad!(storage, theta) = nnlossgrad(storage, theta, trainfeatures, trainlabels, netdefinition)
- res = optimize(L, Lgrad!, theta, LBFGS())
- mintheta = Optim.minimizer(res)
- Ws, bs = thetaToWeights(mintheta)
- return Ws::Vector{Any},bs::Vector{Any}
- end
- #---------------------------------------------------------
- # Predict the classes of the given data points using Ws and Bs.
- # p, N x 1 array of Array{Float,2}, contains the output class scores (continuous value) for each input feature.
- # c, N x 1 array of Array{Float,2}, contains the output class label (either 0 or 1) for each input feature.
- #---------------------------------------------------------
- function predict(X::Array{Float64,2}, Ws::Vector{Any}, bs::Vector{Any})
- return p::Array{Float64,2}, c::Array{Float64,2}
- end
- #---------------------------------------------------------
- # A helper function which concatenates weights and biases into a variable theta
- #---------------------------------------------------------
- function weightsToTheta(Ws::Vector{Any}, bs::Vector{Any})
- # Init theta as dynamic list
- theta = Float64[]
- for i=1:length(Ws)
- # Reshape and unflod Ws and bs
- push!(theta, reshape(Float64.(Ws[i]), :)...)
- push!(theta, Float64.(bs[i])...)
- end
- return theta::Vector{Float64}
- end
- #---------------------------------------------------------
- # A helper function which decomposes and reshapes weights and biases from the variable theta
- #---------------------------------------------------------
- function thetaToWeights(theta::Vector{Float64}, netdefinition::Array{Int,1})
- # Init weights and bias vectors
- nlayers = length(netdefinition) - 1
- Ws = Vector{Any}(missing, nlayers)
- bs = Vector{Any}(missing, nlayers)
- # Alias
- nd = netdefinition
- # Offset for the theta vector
- offset = 1
- # For each layer
- for i=1:nlayers
- # Lenght of the current layers
- size_wi = nd[i] * nd[i + 1]
- size_bi = nd[i + 1]
- # Get current weights
- wi = theta[offset:offset+size_wi - 1]
- # Shift offset
- offset += size_wi
- # Get current bias
- bi = theta[offset:offset+size_bi - 1]
- # Shift offset
- offset += size_bi
- # Collect weights and biases
- Ws[i] = reshape(wi, nd[i+1], nd[i])
- bs[i] = bi
- end
- return Ws::Vector{Any}, bs::Vector{Any}
- end
- #---------------------------------------------------------
- # Initialize weights and biases from Gaussian distributions
- #---------------------------------------------------------
- function initWeights(netdefinition::Array{Int,1}, sigmaW::Float64, sigmaB::Float64)
- nlayers = length(netdefinition) - 1
- Ws = Vector{Any}(missing, nlayers)
- bs = Vector{Any}(missing, nlayers)
- nd = netdefinition
- # For each layer: init weight matrix and bias vector
- for i=1:nlayers
- # W has nd[i] as input and nd[i+1] as output
- Ws[i] = randn(nd[i+1], nd[i]) * sigmaW
- bs[i] = randn(nd[i+1]) * sigmaB
- end
- return Ws::Vector{Any}, bs::Vector{Any}
- end
- # Problem 2: Multilayer Perceptron
- function problem2()
- # make results reproducable
- Random.seed!(10)
- # LINEAR SEPARABLE DATA
- # load data
- features,labels = loaddata("separable.jld2")
- # show data points
- showbefore(features,labels)
- title("Data for Separable Case")
- # train MLP
- Ws,bs = train(features,labels, [2,4,1])
- # show optimum and plot decision boundary
- showafter(features,labels,Ws,bs)
- title("Learned Decision Boundary for Separable Case")
- ## LINEAR NON-SEPARABLE DATA
- # load data
- features2,labels2 = loaddata("nonseparable.jld2")
- # show data points
- showbefore(features2,labels2)
- title("Data for Non-Separable Case")
- # train MLP
- Ws,bs = train(features2,labels2, [2,4,1])
- # show optimum and plot decision boundary
- showafter(features2,labels2,Ws, bs)
- title("Learned Decision Boundary for Non-Separable Case")
- # PLANE-BIKE-CLASSIFICATION FROM PROBLEM 2
- # load data
- trainfeatures,trainlabels = loaddata("imgstrain.jld2")
- testfeatures,testlabels = loaddata("imgstest.jld2")
- # train MLP and predict classes
- Ws,bs = train(trainfeatures,trainlabels, [50,40,30,1])
- _,trainpredictions = predict(trainfeatures, Ws, bs)
- _,testpredictions = predict(testfeatures, Ws, bs)
- # show error
- trainerror = sum(trainpredictions.!=trainlabels)/length(trainlabels)
- testerror = sum(testpredictions.!=testlabels)/length(testlabels)
- println("Training Error Rate: $(round(100*trainerror,digits=2))%")
- println("Testing Error Rate: $(round(100*testerror,digits=2))%")
- return
- end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement