Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Config
- learningRate <- 0.000001 # alpha
- maxIterations <- 10000000 # abort after n iterations
- maxThetaDifference <- 0.000001 # assume t in theta is close enough if diff after iter < than this
- data <- read.csv("data1.tsv", sep = "\t", header = FALSE)
- hypothesis <- function(theta, x){
- return(theta[1] + theta[2] * x)
- }
- loss <- function(theta, x, y){
- return (sum((hypothesis(theta, x) - y) ^ 2)) / (2 * length(x))
- }
- # Set initial values
- theta <- c(0, 0)
- thetaFinished <- rep(FALSE, length(theta)) # For each theta, contains true iff approximation is complete
- iterations <- 0
- differences <- c() # Keeps track of the changes of all thetas
- losses <- c()
- while (!all(thetaFinished) && iterations < maxIterations) {
- oldTheta <- theta # Copy theta
- predictions <- hypothesis(oldTheta, data[, 1]) # Compute vector of current predictions
- actual <- data[, 2] # Get actual y-values
- # x_0 = 1
- theta[1] <- oldTheta[1] - learningRate * sum(predictions - actual) / nrow(data)
- thetaFinished[1] = TRUE
- # Iterate over all other thetas, apply descent
- for (t in 2:length(theta)) {
- if (thetaFinished[t]) next
- # As per slide 46:
- theta[t] <- oldTheta[t] - learningRate * sum((predictions - actual) * data[, t - 1]) / nrow(data)
- thetaFinished[t] = abs(oldTheta[t] - theta[t]) < maxThetaDifference
- }
- iterations <- iterations + 1
- differences <- c(differences, sum(abs(theta - oldTheta))) # Append total sum of differences
- losses <- c(losses, loss(theta, data[, 1], data[, 2])) # Append new loss
- }
- plot(data[, 1], data[, 2], col = "red", main = "Actual data (red), regression line (green)")
- lines(data[, 1], hypothesis(oldTheta, data[, 1]), col = "green")
- dev.new()
- plot(differences, main = "Sum of theta differences over iterations")
- dev.new()
- plot(losses, main = "Loss over iterations")
- print("Theta =")
- print(theta)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement