Untitled

import numpy as np
import matplotlib.pyplot as plt

# read train data, init empty x/y arrays
data_train = np.loadtxt("Train_Dataset.csv")
train_shape_fst, train_shape_snd = data_train.shape
x_train = np.empty([train_shape_fst, 1])  # all of the x values
y_train = np.empty([train_shape_fst, 1])  # all of the y values

# fill x/y train data with the data from Train_Dataset.csv
for i in range(0, train_shape_fst):
    x_train[i] = data_train[i][0]
    y_train[i] = data_train[i][1]

# read test data, init emtpy x/y arrays
data_test = np.loadtxt("Test_Dataset.csv")
test_shape_fst, test_shape_snd = data_test.shape
x_test = np.empty([test_shape_fst, 1])  # all of the x values
y_test = np.empty([test_shape_fst, 1])  # all of the y values

# fill x/y test data
for i in range(0, test_shape_fst):
    x_test[i] = data_test[i][0]
    y_test[i] = data_test[i][1]


class RegressionModel:
    def __init__(self, degree):
        self.degree = degree
        self.model = [None, None, None]

    def vectorized_SLR(self, x, y):
        XT = np.transpose(x)  # create XT
        A = np.matmul(XT, x)  # A is now a squared matrix

        if np.linalg.det(A) > 0:  # Normal way
            B = np.matmul(XT, y)  # right side of the formula
            w = np.matmul(np.linalg.inv(A), B)  # create w
            return w

        else:  # if the det is 0 then the above is not working
            lam = np.identity(A.shape[0])  # create a lambda in the size of A

            while np.linalg.det(A) < 0:  # add lambda until condition is met
                A = np.add(A, lam)
            B = np.matmul(XT, y)  # proceed as before
            w = np.matmul(np.linalg.inv(A), B)

            return w

    def fit(self, x_values, y_values):  # creates the w with function above
        generated_x_values = self._generate_features(x_values)
        w = self.vectorized_SLR(generated_x_values, y_values)
        self.model[1] = w  # add the w to the model
        self.model[2] = np.matmul(self.model[0], self.model[1])  # add the y values to the model
        return w

    def predict(self, x_test):
        new_x_values = self._generate_features(x_test)  # create the new x values
        more_y_values = np.matmul(new_x_values, self.model[1])  # use the w on new x values
        return [x_test, more_y_values]  # return x and y values

    def _generate_features(self, x_values):  # create the matrix
        shape_fst, shape_snd = x_values.shape  # only shape_fst is needed to read the number of x values
        feature_matrix_x = np.empty((shape_fst, self.degree + 1))  # create the matrix uninitialized

        for i in range(0, feature_matrix_x.shape[0]):  # for every x value
            for j in range(0, self.degree + 1):  # add columns to the number of degrees
                feature_matrix_x[i][j] = np.power(x_values[i], j)  # create the values in the matrix

        self.model[0] = feature_matrix_x  # add the x values to the model
        return feature_matrix_x


# ED Formula
def ed(y, y_dach):
    ed_value = 0.0

    for i in range(y.size):
        ed_value += ((y[i] - y_dach[i]) ** 2)

    return np.sqrt(ed_value)


# ED Testing
y = np.array([0.8, 0.43, 1.74, 0.26, 4.06, 0.73, 2.8, 3.37])
y_dach = np.array([3.49, 1.3, 1.49, 4.12, 2.19, 4.24, 4.67, 0.22])
print(ed(y, y_dach))


# Regression Model Testing
plt.scatter(x_train, y_train)  # plot the training points
plt.plot(x_test, y_test, color="black")  # plot the test data

# create the Objects
p1 = RegressionModel(1)
p2 = RegressionModel(2)
p3 = RegressionModel(3)
p4 = RegressionModel(4)

# train the w
p1.fit(x_train, y_train)
p2.fit(x_train, y_train)
p3.fit(x_train, y_train)
p4.fit(x_train, y_train)

# plot everything
plt.plot(p1.predict(x_test)[0], p1.predict(x_test)[1], color="blue")
plt.plot(p2.predict(x_test)[0], p2.predict(x_test)[1], color="red")
plt.plot(p3.predict(x_test)[0], p3.predict(x_test)[1], color="yellow")
plt.plot(p4.predict(x_test)[0], p4.predict(x_test)[1], color="green")

plt.show()


"""
2.4 c

Die Approximation von Grad 2 ist genauer als die von Grad 3 und 4, da Funktionen
von hoeherem Grad dazu tendieren, nach Ablauf der Testwerte eine hoehere Varianz
aufzuzeigen. Dies kommt daher da Funktionen mit hoeherem Grad meist mehr
Schwingungen aufzeigen und diese dann im weiteren Verlauf den Graphen
staerker beeinflussen, als es zum Besipiel der Graph vom Grad 2 tut.
"""