Advertisement
Guest User

Untitled

a guest
Feb 21st, 2019
101
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.98 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. from sklearn.decomposition import NMF
  5. from sklearn.decomposition import PCA
  6. from sklearn.decomposition import TruncatedSVD as SVD
  7. from sklearn.metrics import mean_absolute_error as mae
  8. from sklearn.metrics import explained_variance_score as evs
  9. from sklearn.metrics import mean_squared_error as mse
  10.  
  11.  
  12. V = np.array([[0,1,0,1,2,2],
  13. [2,3,1,1,2,2],
  14. [1,1,1,0,1,1],
  15. [0,2,3,4,1,1],
  16. [0,0,0,0,1,0]])
  17. V = pd.DataFrame(V, index = ('Овощи', 'Фрукты', 'Сладости', 'Хлеб', 'Кофе'),
  18. columns = ('Миша', 'Маша' ,'Рома', 'Дима', 'Витя', 'Вова'))
  19.  
  20. def reconstruct(model, data):
  21. #input: model - sklearn model, data - pandas DataFrame
  22. #returns pandas DataFrame of reconstructed matrix
  23. model.fit(data)
  24. W = model.transform(data)
  25. H = model.components_
  26. reconstructed = pd.DataFrame(np.round(np.dot(W, H), 2), columns=data.columns, index=data.index)
  27. # next line use in case of SVD or PCA decomposition to substitute negative elements for zeros
  28. # reconstructed[reconstructed < 0] = 0
  29. new_data = []
  30. # interpolation of reconstructed matrix to range [0,1]
  31. for i in range(0, reconstructed.shape[0]):
  32. r = reconstructed.loc[reconstructed.index[i]]
  33. rd = np.interp(r, (r.min(), r.max()), (0, +1))
  34. new_data.append(rd)
  35. interpolated = pd.DataFrame(np.round(new_data, 2), index=reconstructed.index, columns=reconstructed.columns)
  36. return reconstructed , interpolated
  37.  
  38. def sparsity(data):
  39. return 1.0 - np.count_nonzero(data) / data.size
  40.  
  41.  
  42. def rel_spars(data):
  43. return 1 - (sparsity(data) / sparsity(V))
  44.  
  45.  
  46. def evaluate_plot(model, data):
  47. EVS = []
  48. MSE = []
  49. MAE = []
  50. SPARS = []
  51. ks = [1, 2, 3, 4, 5]
  52. for k in ks:
  53. MAE.append(mae(data, reconstruct(model(n_components=k), data)[0]))
  54. MSE.append((mse(data, reconstruct(model(n_components=k), data)[0])))
  55. EVS.append(evs(data, reconstruct(model(n_components=k), data)[0]))
  56. SPARS.append(rel_spars(reconstruct(model(n_components=k), data)[0]))
  57. plt.xlabel('N - components')
  58. plt.ylabel('Value')
  59. plt.plot(MSE)
  60. plt.plot(MAE)
  61. plt.plot(EVS)
  62. plt.plot(SPARS)
  63. METRIC = np.round(MSE, 1)
  64. for i in range(0, len((METRIC))):
  65. if METRIC[i] == METRIC[i + 1]:
  66. plt.scatter(i, MSE[i], c='red')
  67. plt.scatter(i, EVS[i], c='red')
  68. plt.scatter(i, SPARS[i], c='green')
  69. plt.text(i, EVS[i] + 0.01, str(np.round(EVS[i], 3)))
  70. plt.text(i, SPARS[i] + 0.01, str(np.round(SPARS[i], 3)))
  71. plt.vlines(i, 0, 1, colors='red')
  72. break
  73. plt.legend(('RMSE', 'MAE', 'EVS', 'SPARSITY'), loc='best')
  74. plt.xticks([0, 1, 2, 3, 4], ks)
  75. plt.show()
  76.  
  77.  
  78. model = NMF
  79. # or use other models to see result
  80. #model = PCA
  81. #model = SVD
  82.  
  83. #usage
  84. evaluate_plot(model,V)
  85. print(reconstruct(model,V)[0]) #reconstructed
  86. print(reconstruct(model,V)[1]) #interpolated
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement