Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- from sklearn.metrics import mean_squared_error
- from sklearn.preprocessing import PolynomialFeatures
- def load_data(path):
- data = np.load(path)
- return data['X'], data['Y'], data['X_test'], data['Y_test']
- def linear_regression(x, y, degree):
- asd = np.polyfit(x, y, degree)
- print(asd)
- if __name__ == '__main__':
- data = load_data('./dataset1.npz')
- print('1st Task')
- print("The given dataset's shape is: ")
- [print(x.shape) for x in data]
- print('\n')
- data_trng,
- data_trng, label_trng, data_test, label_test = load_data('./dataset1.npz')
- bias_trng = np.ones((len(data_trng), 1))
- data_trng = np.concatenate((data_trng, bias_trng), axis=1)
- bias_test = np.ones((len(data_test), 1))
- data_test = np.concatenate((data_test, bias_test), axis=1)
- x = data_trng
- y = label_trng
- weights = np.linalg.inv( x.T @ x) @ (x.T @ y)
- z = weights
- print('The generated weights are: ', z, '\n')
- pred_train = data_trng @ weights
- mse_train = mean_squared_error(label_trng, pred_train)
- print('Mean squared error for the training data: %g' % mse_train)
- pred_test = data_test @ weights
- mse_test = mean_squared_error(label_test, pred_test)
- print('Mean squared error for the testing data: %g' % mse_test)
- random_weights = np.random.rand(11)
- random_pred = data_trng @ random_weights
- mse_random = mean_squared_error(label_trng, random_pred)
- print('Mean squared error for the train data with random weights: %g' % mse_random)
- # I think I found the correct parameters, because the MSE for both the training and test data is very small,
- # almost zero. however the random weights produce a very high MSE, especially if we compare it to the proper
- # weights, the difference is 30 orders of magnitude.
- # task 2
- print('\n\n########## task 2 ##########\n')
- data = load_data('./dataset2.npz')
- print('The shapes of the given dataset are the following: ')
- [print(x.shape) for x in data]
- print('\n')
- data_trng, label_trng, data_test, label_test = load_data('./dataset1.npz')
- bias_trng = np.ones((len(data_trng), 1))
- data_trng = np.concatenate((data_trng, bias_trng), axis=1)
- bias_test = np.ones((len(data_test), 1))
- data_test = np.concatenate((data_test, bias_test), axis=1)
- weights = np.linalg.inv(data_trng.T @ data_trng) @ (data_trng.T @ label_trng)
- print('The weights generating the given data are: ', weights, '\n')
- pred_train = data_trng @ weights
- mse_train = mean_squared_error(label_trng, pred_train)
- print('Mean squared error for the training data: %g' % mse_train)
- pred_test = data_test @ weights
- mse_test = mean_squared_error(label_test, pred_test)
- print('Mean squared error for the testing data: %g' % mse_test)
- random_weights = np.random.rand(11)
- random_pred = data_trng @ random_weights
- mse_random = mean_squared_error(label_trng, random_pred)
- print('Mean squared error for the train data with random weights: %g' % mse_random)
- # The MSEs are almost the same, except for the random, which is, well, random. The training error is higher,
- # which is odd, because the test error should be higher, because that's previously unseen data.
- # task 3
- print('\n\n########## task 3 ##########\n')
- data = load_data('./dataset3.npz')
- print('The shapes of the given dataset are the following: ')
- [print(x.shape) for x in data]
- print('\n')
- data_trng, label_trng, data_test, label_test = load_data('./dataset3.npz')
- def polynomial_regression_weights(data, label, degree):
- weights = np.zeros(degree + 1)
- for deg in range(0, degree + 1):
- data_power = np.power(data, deg)
- weights[deg] = np.linalg.inv(data_power.T @ data_power) @ (data_power.T @ label)
- return weights
- def polynomial_regression_prediction(data, weights):
- result = np.zeros(len(data))
- for i, weight in enumerate(weights):
- result += np.power(data, i) @ weight
- return result
- weights_1 = polynomial_regression_weights(data_trng, label_trng, 1)
- pred_train_1 = polynomial_regression_prediction(data_trng, np.expand_dims(weights_1, 1))
- pred_test_1 = polynomial_regression_prediction(data_test, np.expand_dims(weights_1, 1))
- mse_train_1 = mean_squared_error(label_trng, pred_train_1)
- mse_test_1 = mean_squared_error(label_test, pred_test_1)
- print(mse_train_1, mse_test_1)
- weights_2 = polynomial_regression_weights(data_trng, label_trng, 2)
- pred_train_2 = polynomial_regression_prediction(data_trng, np.expand_dims(weights_2, 1))
- pred_test_2 = polynomial_regression_prediction(data_test, np.expand_dims(weights_2, 1))
- mse_train_2 = mean_squared_error(label_trng, pred_train_2)
- mse_test_2 = mean_squared_error(label_test, pred_test_2)
- print(mse_train_2, mse_test_2)
- weights_5 = polynomial_regression_weights(data_trng, label_trng, 5)
- pred_train_5 = polynomial_regression_prediction(data_trng, np.expand_dims(weights_5, 1))
- pred_test_5 = polynomial_regression_prediction(data_test, np.expand_dims(weights_5, 1))
- mse_train_5 = mean_squared_error(label_trng, pred_train_5)
- mse_test_5 = mean_squared_error(label_test, pred_test_5)
- print(mse_train_5, mse_test_5)
- # I think that the training data was generated by a first-order polynomial, because the first-order polynomial
- # regression produces the lowest MSE. Yes, I would have deducted the same just from the training dataset.
- # task 4
- print('\n\n########## task 4 ##########\n')
- # There is no fourth dataset. Ha ha.
- def weight_update_op(current_weights, learning_rate, data, label, prediction):
- return current_weights + learning_rate * (label - prediction) * data
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement