# Untitled

a guest May 23rd, 2019 71 Never
1. import numpy as np
2. from sklearn.metrics import mean_squared_error
3. from sklearn.preprocessing import PolynomialFeatures
4.
5.
8.     return data['X'], data['Y'], data['X_test'], data['Y_test']
9.
10. def linear_regression(x, y, degree):
11.     asd = np.polyfit(x, y, degree)
12.     print(asd)
13.
14. if __name__ == '__main__':
17.     print("The given dataset's shape is: ")
18.     [print(x.shape) for x in data]
19.     print('\n')
20.
21.     data_trng,
22.
23.     data_trng, label_trng, data_test, label_test = load_data('./dataset1.npz')
24.
25.     bias_trng = np.ones((len(data_trng), 1))
26.     data_trng = np.concatenate((data_trng, bias_trng), axis=1)
27.
28.     bias_test = np.ones((len(data_test), 1))
29.     data_test = np.concatenate((data_test, bias_test), axis=1)
30.
31.     x = data_trng
32.     y = label_trng
33.     weights = np.linalg.inv( x.T @ x) @ (x.T @ y)
34.     z = weights
35.     print('The generated weights are: ', z, '\n')
36.
37.     pred_train = data_trng @ weights
38.     mse_train = mean_squared_error(label_trng, pred_train)
39.     print('Mean squared error for the training data: %g' % mse_train)
40.
41.     pred_test = data_test @ weights
42.     mse_test = mean_squared_error(label_test, pred_test)
43.     print('Mean squared error for the testing data: %g' % mse_test)
44.
45.     random_weights = np.random.rand(11)
46.     random_pred = data_trng @ random_weights
47.     mse_random = mean_squared_error(label_trng, random_pred)
48.     print('Mean squared error for the train data with random weights: %g' % mse_random)
49.
50.     # I think I found the correct parameters, because the MSE for both the training and test data is very small,
51.     # almost zero. however the random weights produce a very high MSE, especially if we compare it to the proper
52.     # weights, the difference is 30 orders of magnitude.
53.
56.
58.     print('The shapes of the given dataset are the following: ')
59.     [print(x.shape) for x in data]
60.     print('\n')
61.
62.     data_trng, label_trng, data_test, label_test = load_data('./dataset1.npz')
63.
64.     bias_trng = np.ones((len(data_trng), 1))
65.     data_trng = np.concatenate((data_trng, bias_trng), axis=1)
66.
67.     bias_test = np.ones((len(data_test), 1))
68.     data_test = np.concatenate((data_test, bias_test), axis=1)
69.
70.     weights = np.linalg.inv(data_trng.T @ data_trng) @ (data_trng.T @ label_trng)
71.
72.     print('The weights generating the given data are: ', weights, '\n')
73.
74.     pred_train = data_trng @ weights
75.     mse_train = mean_squared_error(label_trng, pred_train)
76.     print('Mean squared error for the training data: %g' % mse_train)
77.
78.     pred_test = data_test @ weights
79.     mse_test = mean_squared_error(label_test, pred_test)
80.     print('Mean squared error for the testing data: %g' % mse_test)
81.
82.     random_weights = np.random.rand(11)
83.     random_pred = data_trng @ random_weights
84.     mse_random = mean_squared_error(label_trng, random_pred)
85.     print('Mean squared error for the train data with random weights: %g' % mse_random)
86.
87.     # The MSEs are almost the same, except for the random, which is, well, random. The training error is higher,
88.     # which is odd, because the test error should be higher, because that's previously unseen data.
89.
92.
94.     print('The shapes of the given dataset are the following: ')
95.     [print(x.shape) for x in data]
96.     print('\n')
97.
98.     data_trng, label_trng, data_test, label_test = load_data('./dataset3.npz')
99.
100.
101.     def polynomial_regression_weights(data, label, degree):
102.         weights = np.zeros(degree + 1)
103.         for deg in range(0, degree + 1):
104.             data_power = np.power(data, deg)
105.             weights[deg] = np.linalg.inv(data_power.T @ data_power) @ (data_power.T @ label)
106.
107.         return weights
108.
109.
110.     def polynomial_regression_prediction(data, weights):
111.         result = np.zeros(len(data))
112.         for i, weight in enumerate(weights):
113.             result += np.power(data, i) @ weight
114.
115.         return result
116.
117.
118.     weights_1 = polynomial_regression_weights(data_trng, label_trng, 1)
119.     pred_train_1 = polynomial_regression_prediction(data_trng, np.expand_dims(weights_1, 1))
120.     pred_test_1 = polynomial_regression_prediction(data_test, np.expand_dims(weights_1, 1))
121.
122.     mse_train_1 = mean_squared_error(label_trng, pred_train_1)
123.     mse_test_1 = mean_squared_error(label_test, pred_test_1)
124.
125.     print(mse_train_1, mse_test_1)
126.
127.     weights_2 = polynomial_regression_weights(data_trng, label_trng, 2)
128.     pred_train_2 = polynomial_regression_prediction(data_trng, np.expand_dims(weights_2, 1))
129.     pred_test_2 = polynomial_regression_prediction(data_test, np.expand_dims(weights_2, 1))
130.
131.     mse_train_2 = mean_squared_error(label_trng, pred_train_2)
132.     mse_test_2 = mean_squared_error(label_test, pred_test_2)
133.
134.     print(mse_train_2, mse_test_2)
135.
136.     weights_5 = polynomial_regression_weights(data_trng, label_trng, 5)
137.     pred_train_5 = polynomial_regression_prediction(data_trng, np.expand_dims(weights_5, 1))
138.     pred_test_5 = polynomial_regression_prediction(data_test, np.expand_dims(weights_5, 1))
139.
140.     mse_train_5 = mean_squared_error(label_trng, pred_train_5)
141.     mse_test_5 = mean_squared_error(label_test, pred_test_5)
142.
143.     print(mse_train_5, mse_test_5)
144.
145.     # I think that the training data was generated by a first-order polynomial, because the first-order polynomial
146.     # regression produces the lowest MSE. Yes, I would have deducted the same just from the training dataset.
147.