Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- # y(x) = b + ax
- def LinearFunction(theta, x):
- return theta[0] + theta[1] * x
- #How well is our theta doing
- def CalculateCost(theta, x, y):
- dataCount = len(y)
- total = 0
- for i in range(dataCount):
- total += ((LinearFunction(theta, x[i]) - y[i])**2)
- return (1.0 / (2 * dataCount)) * total
- #Alpha - learning rate Eps - when to stop
- def GradientDescent(theta, x, y, alpha, eps):
- actualCost = CalculateCost(theta, x, y)
- dataCount = len(y)
- counter = 0
- while True:
- totalBeta = 0
- totalAlpha = 0
- #Count how much we need to change actual values
- for i in range(dataCount):
- totalBeta += (LinearFunction(theta, x[i]) - y[i])
- totalAlpha += (LinearFunction(theta, x[i]) - y[i]) * x[i]
- #change them acording to learning rate
- b = theta[0] - alpha/(float(dataCount)) * totalBeta
- a = theta[1] - alpha/(float(dataCount)) * totalAlpha
- #Update
- theta = [b,a]
- actualCost, previousCost = CalculateCost(theta, x, y), actualCost
- change = abs(previousCost - actualCost)
- #Show some info
- if counter % 100 == 0:
- print("Iteration: " + str(counter) + " Actual Cost: " + str(actualCost) + " Improved by: " + str(change))
- counter += 1
- #Check if we are done
- if change <= eps:
- print("Last: " + str(counter) + " Actual Cost: " + str(actualCost) + " Improved by: " + str(change))
- break
- return theta
- #Scale
- def Normalize(values):
- return (values - np.mean(values)) / np.std(values)
- #Load data
- trainData = pd.read_csv('train/train.tsv', sep='\t')
- devDataInput = pd.read_csv('dev-0/in.tsv', sep='\t', header=None)[0]
- testDataInput = pd.read_csv('test-A/in.tsv', sep='\t', header=None)[1]
- #Prepare data
- trainInput = Normalize(trainData['Powierzchnia w m2'])
- trainOutput = trainData['cena']
- devDataInput = Normalize(devDataInput)
- print(len(devDataInput))
- testDataInput = Normalize(testDataInput)
- print(len(testDataInput))
- #Solve and predict
- solvedTheta = GradientDescent([4, -0.5], trainInput, trainOutput, 0.005, 0.0001)
- print(solvedTheta)
- predictedDev = LinearFunction(solvedTheta, devDataInput)
- predictedTest = LinearFunction(solvedTheta, testDataInput)
- np.savetxt('test-A/out.tsv', predictedTest, '%.0f')
- np.savetxt('dev-0/out.tsv', predictedDev, '%.0f')
Add Comment
Please, Sign In to add comment