SHARE
TWEET

Untitled

a guest Feb 21st, 2019 74 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import pandas as pd
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. from sklearn.decomposition import NMF
  5. from sklearn.decomposition import PCA
  6. from sklearn.decomposition import TruncatedSVD as SVD
  7. from sklearn.metrics import mean_absolute_error as mae
  8. from sklearn.metrics import explained_variance_score as evs
  9. from sklearn.metrics import mean_squared_error as mse
  10.  
  11.  
  12. V = np.array([[0,1,0,1,2,2],
  13.               [2,3,1,1,2,2],
  14.               [1,1,1,0,1,1],
  15.               [0,2,3,4,1,1],
  16.               [0,0,0,0,1,0]])
  17. V = pd.DataFrame(V, index = ('Овощи', 'Фрукты', 'Сладости', 'Хлеб', 'Кофе'),
  18.                 columns = ('Миша', 'Маша' ,'Рома', 'Дима', 'Витя', 'Вова'))
  19.  
  20. def reconstruct(model, data):
  21. #input: model - sklearn model, data - pandas DataFrame
  22. #returns pandas DataFrame of reconstructed matrix
  23.     model.fit(data)
  24.     W = model.transform(data)
  25.     H = model.components_
  26.     reconstructed = pd.DataFrame(np.round(np.dot(W, H), 2), columns=data.columns, index=data.index)
  27.     # next line use in case of SVD or PCA decomposition to substitute negative elements for zeros
  28.     # reconstructed[reconstructed < 0] = 0
  29.     new_data = []
  30.     # interpolation of reconstructed matrix to range [0,1]
  31.     for i in range(0, reconstructed.shape[0]):
  32.         r = reconstructed.loc[reconstructed.index[i]]
  33.         rd = np.interp(r, (r.min(), r.max()), (0, +1))
  34.         new_data.append(rd)
  35.     interpolated = pd.DataFrame(np.round(new_data, 2), index=reconstructed.index, columns=reconstructed.columns)
  36.     return reconstructed , interpolated
  37.  
  38. def sparsity(data):
  39.     return 1.0 - np.count_nonzero(data) / data.size
  40.  
  41.  
  42. def rel_spars(data):
  43.     return 1 - (sparsity(data) / sparsity(V))
  44.  
  45.  
  46. def evaluate_plot(model, data):
  47.     EVS = []
  48.     MSE = []
  49.     MAE = []
  50.     SPARS = []
  51.     ks = [1, 2, 3, 4, 5]
  52.     for k in ks:
  53.         MAE.append(mae(data, reconstruct(model(n_components=k), data)[0]))
  54.         MSE.append((mse(data, reconstruct(model(n_components=k), data)[0])))
  55.         EVS.append(evs(data, reconstruct(model(n_components=k), data)[0]))
  56.         SPARS.append(rel_spars(reconstruct(model(n_components=k), data)[0]))
  57.     plt.xlabel('N - components')
  58.     plt.ylabel('Value')
  59.     plt.plot(MSE)
  60.     plt.plot(MAE)
  61.     plt.plot(EVS)
  62.     plt.plot(SPARS)
  63.     METRIC = np.round(MSE, 1)
  64.     for i in range(0, len((METRIC))):
  65.         if METRIC[i] == METRIC[i + 1]:
  66.             plt.scatter(i, MSE[i], c='red')
  67.             plt.scatter(i, EVS[i], c='red')
  68.             plt.scatter(i, SPARS[i], c='green')
  69.             plt.text(i, EVS[i] + 0.01, str(np.round(EVS[i], 3)))
  70.             plt.text(i, SPARS[i] + 0.01, str(np.round(SPARS[i], 3)))
  71.             plt.vlines(i, 0, 1, colors='red')
  72.             break
  73.     plt.legend(('RMSE', 'MAE', 'EVS', 'SPARSITY'), loc='best')
  74.     plt.xticks([0, 1, 2, 3, 4], ks)
  75.     plt.show()
  76.    
  77.    
  78. model = NMF
  79. # or use other models to see result
  80. #model = PCA
  81. #model = SVD
  82.  
  83. #usage
  84. evaluate_plot(model,V)
  85. print(reconstruct(model,V)[0]) #reconstructed
  86. print(reconstruct(model,V)[1]) #interpolated
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top