Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import math
- import numpy
- def read_data(dataset='learn.txt'):
- y = []
- x = []
- lines = 0
- cols = 0
- with open(dataset) as f:
- for line in f:
- xy = line.split()
- xy = [float(i) for i in xy]
- y.append(xy[1])
- poly = create_quadratic(xy[2::])
- # poly = xy[2::]
- x.extend(poly)
- lines += 1
- cols = len(poly)
- return numpy.reshape(y, (lines, 1)), numpy.reshape(x, (lines, cols))
- def create_indicator_matrix(y):
- cnt = len(y)
- count_classes = max(y)
- ind = numpy.zeros((cnt, count_classes))
- for i in range(cnt):
- ind[i][y[i]] = 1.0
- return ind
- def linear_regression(x, y):
- ind = create_indicator_matrix(y)
- return (x.T * x).I * x.T * ind
- def normalize(x):
- xn = numpy.ones((x.shape[0], 1))
- m = x.shape[1]
- means = numpy.mean(x, axis=0)
- stds = numpy.std(x, axis=0)
- removed = []
- for i in range(1, m):
- if stds[i] > 0.0:
- x[:, i] -= means[i]
- x[:, i] /= stds[i]
- xn = numpy.hstack([xn, numpy.matrix(x[:, i]).T])
- else:
- removed.append(i)
- return xn, means, stds, removed
- def create_quadratic(x):
- p = []
- for x1 in x:
- p.append(x1)
- p.append(x1 * x1)
- # for x1 in x:
- # for x2 in x:
- # p.append(x1 * x2)
- return p
- def sigmoid(m):
- g = numpy.zeros(m.shape)
- for x in range(len(m)):
- g[x] = 1.0 / (1 + math.exp(-m[x]))
- return g
- def cost(x, theta, y):
- h = sigmoid(numpy.dot(x, theta))
- return -(numpy.dot(y.T, numpy.log(h)) + numpy.dot((-y + 1).T, numpy.log(-h + 1))) / len(y)
- def grad(x, theta, y):
- return (numpy.dot(x.T, (sigmoid(numpy.dot(x, theta)) - y))) / len(y)
- def gradient_descent(x, y, theta, alpha, num_iters):
- m = len(y)
- for i in range(num_iters):
- if i % 50 == 0:
- print('Iterations left: ', num_iters - i)
- theta -= alpha * grad(x, theta, y)
- # print((cost(x, theta, y)))
- return theta
- def cohen(p, y):
- m = len(p)
- a = 0
- a1 = 0
- a2 = 0
- b1 = 0
- b2 = 0
- for i in range(m):
- if p[i] == y[i][0]:
- a += 1
- a1 += 1 - p[i]
- b1 += p[i]
- a2 += 1 - y[i][0]
- b2 += y[i][0]
- f = float(m)
- pra = a / f
- pre = a1 * a2 / (f * f) + (b1 * b2) / (f * f)
- # pre = 0.5
- return (pra - pre) / (1 - pre)
- def predict(x, theta):
- num = x.shape[0]
- p = numpy.zeros(num)
- for i in range(num):
- s = sigmoid(numpy.dot(x[i], theta))
- if s[0][0] >= 0.5:
- p[i] = 1
- return p
- y, x = read_data()
- x, means, stds, removed = normalize(x)
- t = 0
- for i in range(len(y)):
- if y[i] == 1:
- t += 1
- if y[i] == 2:
- y[i] = 0
- theta = numpy.zeros((x.shape[1], 1))
- initial_cost = cost(x, theta, y)
- print('Initial cost: ', initial_cost)
- print('Initial cohen: ', cohen(predict(x, theta), y))
- theta = gradient_descent(x, y, theta, alpha=1.0, num_iters=350)
- print('Test:')
- print(grad(x, theta, y))
- p = predict(x, theta)
- print('Estimated cost: ', cost(x, theta, y))
- print("Cohen's cappa: ", cohen(p, y))
- print(p)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement