Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- times = 2000
- def sgn(n):
- return 1 if n > 0 else -1
- def evaluate(w, x_data, y_data):
- return 500 - np.sum( np.equal( y_data, np.sign( np.dot(x_data, w) ) ) )
- content_train = open( 'hw1_18_train.dat', 'r' ).read().strip()
- content_test = open( 'hw1_18_test.dat', 'r' ).read().strip()
- x_train = np.array( [ [1] + [ float(x) for x in line.split()[:4] ] for line in content_train.split('\n') ] )
- y_train = np.array( [ int(line.split()[-1]) for line in content_train.split('\n') ] )
- x_test = np.array( [ [1] + [ float(x) for x in line.split()[:4] ] for line in content_test.split('\n') ] )
- y_test = np.array( [ int(line.split()[-1]) for line in content_test.split('\n') ] )
- _sum = 0
- for a in range(times):
- if a % 100 == 0: print(a, end=' ', flush=True)
- w_best = np.zeros( 5 )
- w_new = np.zeros( 5 )
- index = np.arange( len(x_train) )
- np.random.shuffle( index )
- i, t = 0, 0
- least_err = evaluate(w_best, x_train, y_train)
- while t < 50:
- if sgn(np.dot( x_train[index[i]], w_new )) != y_train[index[i]]:
- w_new = w_new + y_train[index[i]] * x_train[index[i]]
- t += 1
- tmp = evaluate(w_new, x_train, y_train)
- if tmp < least_err:
- w_best = w_new[:]
- least_err = tmp
- i = (i + 1) % len(x_train)
- _sum += evaluate(w_best, x_test, y_test)
- print()
- print(f'avg_err: {_sum / times / len(y_train)}')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement