Untitled

import math
import numpy


def read_data(dataset='learn.txt'):
    y = []
    x = []
    lines = 0
    cols = 0
    with open(dataset) as f:
        for line in f:
            xy = line.split()
            xy = [float(i) for i in xy]
            y.append(xy[1])

            poly = create_quadratic(xy[2::])
            # poly = xy[2::]
            x.extend(poly)
            lines += 1
            cols = len(poly)

    return numpy.reshape(y, (lines, 1)), numpy.reshape(x, (lines, cols))


def create_indicator_matrix(y):
    cnt = len(y)
    count_classes = max(y)
    ind = numpy.zeros((cnt, count_classes))
    for i in range(cnt):
        ind[i][y[i]] = 1.0

    return ind


def linear_regression(x, y):
    ind = create_indicator_matrix(y)
    return (x.T * x).I * x.T * ind


def normalize(x):
    xn = numpy.ones((x.shape[0], 1))
    m = x.shape[1]
    means = numpy.mean(x, axis=0)
    stds = numpy.std(x, axis=0)
    removed = []
    for i in range(1, m):
        if stds[i] > 0.0:
            x[:, i] -= means[i]
            x[:, i] /= stds[i]
            xn = numpy.hstack([xn, numpy.matrix(x[:, i]).T])
        else:
            removed.append(i)

    return xn, means, stds, removed


def create_quadratic(x):
    p = []
    for x1 in x:
        p.append(x1)
        p.append(x1 * x1)

    # for x1 in x:
    #     for x2 in x:
    #         p.append(x1 * x2)

    return p


def sigmoid(m):
    g = numpy.zeros(m.shape)
    for x in range(len(m)):
        g[x] = 1.0 / (1 + math.exp(-m[x]))

    return g


def cost(x, theta, y):
    h = sigmoid(numpy.dot(x, theta))
    return -(numpy.dot(y.T, numpy.log(h)) + numpy.dot((-y + 1).T, numpy.log(-h + 1))) / len(y)


def grad(x, theta, y):
    return (numpy.dot(x.T, (sigmoid(numpy.dot(x, theta)) - y))) / len(y)


def gradient_descent(x, y, theta, alpha, num_iters):
    m = len(y)
    for i in range(num_iters):
        if i % 50 == 0:
            print('Iterations left: ', num_iters - i)
        theta -= alpha * grad(x, theta, y)
        # print((cost(x, theta, y)))

    return theta


def cohen(p, y):
    m = len(p)
    a = 0
    a1 = 0
    a2 = 0
    b1 = 0
    b2 = 0
    for i in range(m):
        if p[i] == y[i][0]:
            a += 1

        a1 += 1 - p[i]
        b1 += p[i]

        a2 += 1 - y[i][0]
        b2 += y[i][0]

    f = float(m)
    pra = a / f
    pre = a1 * a2 / (f * f) + (b1 * b2) / (f * f)
    # pre = 0.5

    return (pra - pre) / (1 - pre)


def predict(x, theta):
    num = x.shape[0]
    p = numpy.zeros(num)
    for i in range(num):
        s = sigmoid(numpy.dot(x[i], theta))
        if s[0][0] >= 0.5:
            p[i] = 1

    return p


y, x = read_data()
x, means, stds, removed = normalize(x)

t = 0
for i in range(len(y)):
    if y[i] == 1:
        t += 1

    if y[i] == 2:
        y[i] = 0

theta = numpy.zeros((x.shape[1], 1))

initial_cost = cost(x, theta, y)
print('Initial cost: ', initial_cost)
print('Initial cohen: ', cohen(predict(x, theta), y))

theta = gradient_descent(x, y, theta, alpha=1.0, num_iters=350)

print('Test:')
print(grad(x, theta, y))

p = predict(x, theta)

print('Estimated cost: ', cost(x, theta, y))
print("Cohen's cappa: ", cohen(p, y))

print(p)