Th3NiKo

Machine learning - linear regression

Apr 14th, 2019
114
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.43 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3.  
  4.  
  5. # y(x) = b + ax
  6. def LinearFunction(theta, x):
  7.     return theta[0] + theta[1] * x
  8.  
  9. #How well is our theta doing
  10. def CalculateCost(theta, x, y):
  11.     dataCount = len(y)
  12.     total = 0
  13.     for i in range(dataCount):
  14.         total += ((LinearFunction(theta, x[i]) - y[i])**2)
  15.     return (1.0 / (2 * dataCount)) * total
  16.  
  17. #Alpha - learning rate    Eps - when to stop
  18. def GradientDescent(theta, x, y, alpha, eps):
  19.     actualCost = CalculateCost(theta, x, y)
  20.     dataCount = len(y)
  21.     counter = 0
  22.     while True:
  23.         totalBeta = 0
  24.         totalAlpha = 0
  25.  
  26.         #Count how much we need to change actual values
  27.         for i in range(dataCount):
  28.             totalBeta += (LinearFunction(theta, x[i]) - y[i])
  29.             totalAlpha += (LinearFunction(theta, x[i]) - y[i]) * x[i]
  30.        
  31.         #change them acording to learning rate
  32.         b = theta[0] - alpha/(float(dataCount)) * totalBeta
  33.         a = theta[1] - alpha/(float(dataCount)) * totalAlpha
  34.  
  35.         #Update
  36.         theta = [b,a]
  37.         actualCost, previousCost = CalculateCost(theta, x, y), actualCost
  38.  
  39.         change = abs(previousCost - actualCost)
  40.         #Show some info
  41.         if counter % 100 == 0:
  42.             print("Iteration: " + str(counter) + " Actual Cost: " + str(actualCost) + " Improved by: " + str(change))
  43.         counter += 1
  44.         #Check if we are done
  45.         if change <= eps:
  46.             print("Last: " + str(counter) + " Actual Cost: " + str(actualCost) + " Improved by: " + str(change))
  47.  
  48.             break
  49.     return theta
  50.  
  51. #Scale
  52. def Normalize(values):
  53.     return (values - np.mean(values)) / np.std(values)
  54.  
  55. #Load data
  56. trainData = pd.read_csv('train/train.tsv', sep='\t')
  57. devDataInput = pd.read_csv('dev-0/in.tsv', sep='\t', header=None)[0]
  58. testDataInput = pd.read_csv('test-A/in.tsv', sep='\t', header=None)[1]
  59.  
  60. #Prepare data
  61. trainInput = Normalize(trainData['Powierzchnia w m2'])
  62. trainOutput = trainData['cena']
  63.  
  64. devDataInput = Normalize(devDataInput)
  65. print(len(devDataInput))
  66. testDataInput = Normalize(testDataInput)
  67. print(len(testDataInput))
  68. #Solve and predict
  69. solvedTheta = GradientDescent([4, -0.5], trainInput, trainOutput, 0.005, 0.0001)
  70. print(solvedTheta)
  71. predictedDev = LinearFunction(solvedTheta, devDataInput)
  72. predictedTest = LinearFunction(solvedTheta, testDataInput)
  73.  
  74. np.savetxt('test-A/out.tsv', predictedTest, '%.0f')
  75. np.savetxt('dev-0/out.tsv', predictedDev, '%.0f')
Add Comment
Please, Sign In to add comment