18.py

import numpy as np

times = 2000

def sgn(n):
    return 1 if n > 0 else -1

def evaluate(w, x_data, y_data):
    return 500 - np.sum( np.equal( y_data, np.sign( np.dot(x_data, w) ) ) )

content_train = open( 'hw1_18_train.dat', 'r' ).read().strip()
content_test = open( 'hw1_18_test.dat', 'r' ).read().strip()

x_train = np.array( [ [1] + [ float(x) for x in line.split()[:4] ] for line in content_train.split('\n') ] )
y_train = np.array( [ int(line.split()[-1]) for line in content_train.split('\n') ] )
x_test = np.array( [ [1] + [ float(x) for x in line.split()[:4] ] for line in content_test.split('\n') ] )
y_test = np.array( [ int(line.split()[-1]) for line in content_test.split('\n') ] )

_sum = 0
for a in range(times):
    if a % 100 == 0: print(a, end=' ', flush=True)
    w_best = np.zeros( 5 )
    w_new = np.zeros( 5 )
    index = np.arange( len(x_train) )
    np.random.shuffle( index )

    i, t = 0, 0
    least_err = evaluate(w_best, x_train, y_train)
    while t < 50:
        if sgn(np.dot( x_train[index[i]], w_new )) != y_train[index[i]]:
            w_new = w_new + y_train[index[i]] * x_train[index[i]]
            t += 1

            tmp = evaluate(w_new, x_train, y_train)
            if tmp < least_err:
                w_best = w_new[:]
                least_err = tmp

        i = (i + 1) % len(x_train)
    _sum += evaluate(w_best, x_test, y_test)

print()
print(f'avg_err: {_sum / times / len(y_train)}')