# Untitled

a guest Feb 21st, 2019
1. import pandas as pd
2. import numpy as np
3. import matplotlib.pyplot as plt
4. from sklearn.decomposition import NMF
5. from sklearn.decomposition import PCA
6. from sklearn.decomposition import TruncatedSVD as SVD
7. from sklearn.metrics import mean_absolute_error as mae
8. from sklearn.metrics import explained_variance_score as evs
9. from sklearn.metrics import mean_squared_error as mse
10.
11.
12. V = np.array([[0,1,0,1,2,2],
13.               [2,3,1,1,2,2],
14.               [1,1,1,0,1,1],
15.               [0,2,3,4,1,1],
16.               [0,0,0,0,1,0]])
17. V = pd.DataFrame(V, index = ('Овощи', 'Фрукты', 'Сладости', 'Хлеб', 'Кофе'),
18.                 columns = ('Миша', 'Маша' ,'Рома', 'Дима', 'Витя', 'Вова'))
19.
20. def reconstruct(model, data):
21. #input: model - sklearn model, data - pandas DataFrame
22. #returns pandas DataFrame of reconstructed matrix
23.     model.fit(data)
24.     W = model.transform(data)
25.     H = model.components_
26.     reconstructed = pd.DataFrame(np.round(np.dot(W, H), 2), columns=data.columns, index=data.index)
27.     # next line use in case of SVD or PCA decomposition to substitute negative elements for zeros
28.     # reconstructed[reconstructed < 0] = 0
29.     new_data = []
30.     # interpolation of reconstructed matrix to range [0,1]
31.     for i in range(0, reconstructed.shape[0]):
32.         r = reconstructed.loc[reconstructed.index[i]]
33.         rd = np.interp(r, (r.min(), r.max()), (0, +1))
34.         new_data.append(rd)
35.     interpolated = pd.DataFrame(np.round(new_data, 2), index=reconstructed.index, columns=reconstructed.columns)
36.     return reconstructed , interpolated
37.
38. def sparsity(data):
39.     return 1.0 - np.count_nonzero(data) / data.size
40.
41.
42. def rel_spars(data):
43.     return 1 - (sparsity(data) / sparsity(V))
44.
45.
46. def evaluate_plot(model, data):
47.     EVS = []
48.     MSE = []
49.     MAE = []
50.     SPARS = []
51.     ks = [1, 2, 3, 4, 5]
52.     for k in ks:
53.         MAE.append(mae(data, reconstruct(model(n_components=k), data)[0]))
54.         MSE.append((mse(data, reconstruct(model(n_components=k), data)[0])))
55.         EVS.append(evs(data, reconstruct(model(n_components=k), data)[0]))
56.         SPARS.append(rel_spars(reconstruct(model(n_components=k), data)[0]))
57.     plt.xlabel('N - components')
58.     plt.ylabel('Value')
59.     plt.plot(MSE)
60.     plt.plot(MAE)
61.     plt.plot(EVS)
62.     plt.plot(SPARS)
63.     METRIC = np.round(MSE, 1)
64.     for i in range(0, len((METRIC))):
65.         if METRIC[i] == METRIC[i + 1]:
66.             plt.scatter(i, MSE[i], c='red')
67.             plt.scatter(i, EVS[i], c='red')
68.             plt.scatter(i, SPARS[i], c='green')
69.             plt.text(i, EVS[i] + 0.01, str(np.round(EVS[i], 3)))
70.             plt.text(i, SPARS[i] + 0.01, str(np.round(SPARS[i], 3)))
71.             plt.vlines(i, 0, 1, colors='red')
72.             break
73.     plt.legend(('RMSE', 'MAE', 'EVS', 'SPARSITY'), loc='best')
74.     plt.xticks([0, 1, 2, 3, 4], ks)
75.     plt.show()
76.
77.
78. model = NMF
79. # or use other models to see result
80. #model = PCA
81. #model = SVD
82.
83. #usage
84. evaluate_plot(model,V)
85. print(reconstruct(model,V)[0]) #reconstructed
86. print(reconstruct(model,V)[1]) #interpolated
