Untitled

import numpy as np
import pandas
import matplotlib.pyplot as plt


def main():
    df = pandas.read_csv('./Salary.csv')
    x = df.iloc[:, :-2].values
    y = df.iloc[:, -1].values.reshape(df.shape[0], 1)
    lear_and_fit(x, y)


def lear_and_fit(x, y):
    weights = initialize_weights(len(x[0]) + 1, 1)
    xNew = insert_coefficient_to_weights_matrix(x)


    # for i in range(700000): # uncomment when weights will be proper updated
    predictions = predict(xNew, weights)
    gradient_vector = calculate_gradient(predictions, xNew, y)
    # print(gradient_vector)
    weights = update_weights(gradient_vector, weights, predictions)

    # plt.scatter(x, y)
    # plt.plot(x, predictions)
    # plt.show()

def update_weights(gradient_vector, weights, predictions):
    alpha = 0.00001
    d = np.multiply(gradient_vector, alpha)
    return np.subtract(weights, d)
    # calculate partial derivatives and update weights

def calculate_gradient(preconditions, x, y):
    residuals = calculate_residual(y, preconditions)
    gradient_vector = multiply_residual_by_features_matrix(x.transpose(), residuals)
    return gradient_vector * 1 / len(x)

def multiply_residual_by_features_matrix(x, residual):
    return np.dot(x, residual)


def calculate_residual(y, predictions):
    return np.subtract(predictions, y)

def initialize_weights(x, y):
    return np.random.rand(x, y)


def insert_coefficient_to_weights_matrix(matrix):
    ones_column = np.ones((len(matrix), 1))
    return np.hstack((ones_column, matrix))


def predict(x, weights):
    return np.dot(x, weights)


if __name__ == "__main__":
    main()