Advertisement
Guest User

Linear Regression

a guest
Apr 19th, 2019
123
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.37 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3. from sklearn.utils import shuffle
  4.  
  5. class LinearRegression:
  6. def __init__(self,):
  7. pass
  8. def fit(self, X, y):
  9. self.X = X
  10. self.y = y
  11. self.w = np.linalg.inv(self.X.T @ self.X) @ self.X.T @ self.y
  12. def predict(self, X):
  13. self.X = X
  14. self.prediction = self.X @ self.w
  15.  
  16. return self.prediction
  17.  
  18. def score(self, X, y):
  19. self.X = X
  20. self.y = y
  21. self.sqr_error = sum((self.y - self.predict(self.X))**2)
  22. self.sqr_error_mean = sum((self.y - np.mean(self.predict(self.X)))**2)
  23. return 1 - self.sqr_error/ self.sqr_error_mean
  24.  
  25. def transformDf(df,array, train_size=0.2):
  26. temp_df = df
  27. for x in range(len(array)):
  28. temp_df = pd.get_dummies(temp_df, columns=[array[x]])
  29. temp_df = temp_df.drop(temp_df.columns[-1], axis=1)
  30. #new_df = np.append(np.ones((new_df.shape[0],1)), new_df, axis=1)
  31. train_df = temp_df[: round(temp_df.shape[0] * train_size)]
  32. test_df = temp_df[round(temp_df.shape[0]* train_size): round(temp_df.shape[0] * train_size * 2)]
  33.  
  34. return train_df, test_df
  35.  
  36. df = pd.read_csv("diamonds.csv", index_col=0)
  37. df = shuffle(df)
  38. # CUT
  39. # Fair = 1 0 0 0
  40. # Good = 0 1 0 0
  41. # Ideal = 0 0 1 0
  42. # Premium = 0 0 0 1
  43. # Very good = 0 0 0 0
  44.  
  45.  
  46. # COLOR
  47. # D = 1 0 0 0 0
  48. # E = 0 1 0 0 0
  49. # F = 0 0 1 0 0
  50. # G = 0 0 0 1 0
  51. # I = 0 0 0 0 1
  52. # J = 0 0 0 0 0
  53.  
  54. # CLARITY
  55. # I1 = 1 0 0 0 0 0 0
  56. # IF = 0 1 0 0 0 0 0
  57. # SI1 = 0 0 1 0 0 0 0
  58. # SI2 = 0 0 0 1 0 0 0
  59. # VS1 = 0 0 0 0 1 0 0
  60. # VS2 = 0 0 0 0 0 1 0
  61. # VVS1 = 0 0 0 0 0 0 1
  62. # VVS2 = 0 0 0 0 0 0 0
  63.  
  64. #Variables
  65. TRAIN_SIZE = 0.2
  66. dummies = np.array(["color","cut","clarity"])
  67. df_train, df_test = transformDf(df,dummies, TRAIN_SIZE)
  68.  
  69. #Features Train
  70. X_train = np.array(df_train.drop(["price"],axis=1))
  71. X_train = np.append(np.ones((X_train.shape[0],1)), X_train , axis=1)
  72. #Label Train
  73. y_train = np.array(df_train["price"])
  74.  
  75. #Features Test
  76. X_test = np.array(df_test.drop(["price"],axis=1))
  77. X_test = np.append(np.ones((X_test.shape[0],1)), X_test , axis=1)
  78. #Labels Test
  79. y_test = np.array(df_test["price"])
  80.  
  81. model = LinearRegression()
  82. model.fit(X_train, y_train)
  83.  
  84. predicts = model.predict(X_test)
  85.  
  86.  
  87. for i in range(10):
  88. print(f"Predict {predicts[i]} / Real {y_test[i]}")
  89.  
  90. print(model.score(X_test, y_test))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement