Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas
- from numpy import sqrt
- training = []
- test = []
- def init():
- dataPanda = pandas.read_csv("data//train.csv")
- testPanda = pandas.read_csv("data//test_preview.csv")
- # // 4 * 3
- training_index = len(dataPanda.index)
- x, y = [], []
- for i in range(0, len(dataPanda.index)):
- x1 = dataPanda.loc[i][0]
- y1 = dataPanda.loc[i][1]
- if (x1 < 3100 and y1 > 1400) or (x1 > 4400 and y1 < 1100) or x1 > 4800:
- continue
- x.append(x1)
- y.append(y1)
- maxx = max(x)
- minx = min(x)
- maxy = max(y)
- miny = min(y)
- for i in range(0, len(x)):
- x1 = normalize(x[i], minx, maxx)
- y1 = normalize(y[i], miny, maxy)
- training.append([x1, y1])
- # else: # test.append([x1, y1])
- for i in range(0, len(testPanda.index)):
- x1 = normalize(testPanda.loc[i][0], minx, maxx)
- y1 = normalize(testPanda.loc[i][1], miny, maxy)
- test.append([x1,y1])
- print(test)
- return minx, maxx, miny, maxy
- def stepGradient(b_current, m_current, points, learningRate):
- b_gradient = 0
- m_gradient = 0
- N = float(len(training))
- for i in range(0, len(training)):
- # b_gradient += -(2 / N) * (points[i][1] - ((m_current * points[i][0]) + b_current))
- # m_gradient += -(2 / N) * points[i][0] * (points[i][1] - ((m_current * points[i][0]) + b_current))
- # b_gradient -= points[i][1] - ((m_current * points[i][0]) + b_current)
- # m_gradient -= points[i][0] * (points[i][1] - ((m_current * points[i][0]) + b_current))
- #
- b_gradient += ((m_current * points[i][0]) + b_current) - points[i][1]
- m_gradient += points[i][0] * (((m_current * points[i][0]) + b_current) - points[i][1])
- # b_gradient *= 2 / N
- # m_gradient *= 2 / N
- new_b = b_current - (learningRate * b_gradient / N)
- # new_b /= N
- new_m = m_current - (learningRate * m_gradient / N)
- # new_m /= N
- return [new_b, new_m]
- def normalize(value, min, max):
- return (value - min) / (max - min)
- def denormalize(value_prim, min, max):
- return min + value_prim * (max - min)
- def calculateError(k, n, minx, maxx, miny, maxy):
- sumE = 0
- for i in range(0, len(test)):
- predicted = k * test[i][0] + n
- predicted = denormalize(predicted, miny, maxy)
- actual = denormalize(test[i][1], miny, maxy)
- # actual = test[i][1]
- # print(str(predicted) + " " + str(actual))
- sumE += (predicted - actual) ** 2
- sumE /= float(len(test))
- sumE = sqrt(sumE)
- return sumE
- if __name__ == '__main__':
- minx, maxx, miny, maxy = init()
- n, k = 242.86, 0.29
- # n,k = 0.2, 240
- # n, k = 0, 0
- for i in range(0, 1000):
- n, k = stepGradient(n, k, training, 0.01)
- err = calculateError(k, n, minx, maxx, miny, maxy)
- print("ERR: " + str(err))
- print(n)
- print(k)
- rmse = calculateError(k, n, minx, maxx, miny, maxy)
- print(rmse)
- print(k)
- print(n)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement