Untitled

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.utils import shuffle


def get_w(X,y):

    w = np.linalg.inv(X.T @ X) @ X.T @ y
    return w

def predict(w, input_data):

    predict = input_data @ w
    return predict

df = pd.read_csv("diamonds.csv", index_col=0)
df = shuffle(df)

cut_class_dict = {"Fair": 1, "Good": 2, "Very Good": 3, "Premium": 4, "Ideal": 5}
clarity_dict = {"I3": 1, "I2": 2, "I1": 3, "SI2": 4, "SI1": 5, "VS2": 6, "VS1": 7, "VVS2": 8, "VVS1": 9, "IF": 10, "FL": 11}
color_dict = {"J": 1,"I": 2,"H": 3,"G": 4,"F": 5,"E": 6,"D": 7}

df["cut"] = df["cut"].map(cut_class_dict)
df["clarity"] = df["clarity"].map(clarity_dict)
df["color"] = df["color"].map(color_dict)

X = np.array(df.drop(["price"],1))
y = np.array(df["price"])

X = np.append(np.ones((X.shape[0],1)), X, axis=1)

w = get_w(X, y)

for i in range(20):
    print(f"Predicted {predict(w, X[i])} / Real {y[i]}")