• API
• FAQ
• Tools
• Archive
daily pastebin goal
23%
SHARE
TWEET

# Untitled

a guest Feb 21st, 2019 74 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. import pandas as pd
2. import numpy as np
3. import matplotlib.pyplot as plt
4. from sklearn.decomposition import NMF
5. from sklearn.decomposition import PCA
6. from sklearn.decomposition import TruncatedSVD as SVD
7. from sklearn.metrics import mean_absolute_error as mae
8. from sklearn.metrics import explained_variance_score as evs
9. from sklearn.metrics import mean_squared_error as mse
10.
11.
12. V = np.array([[0,1,0,1,2,2],
13.               [2,3,1,1,2,2],
14.               [1,1,1,0,1,1],
15.               [0,2,3,4,1,1],
16.               [0,0,0,0,1,0]])
17. V = pd.DataFrame(V, index = ('Овощи', 'Фрукты', 'Сладости', 'Хлеб', 'Кофе'),
18.                 columns = ('Миша', 'Маша' ,'Рома', 'Дима', 'Витя', 'Вова'))
19.
20. def reconstruct(model, data):
21. #input: model - sklearn model, data - pandas DataFrame
22. #returns pandas DataFrame of reconstructed matrix
23.     model.fit(data)
24.     W = model.transform(data)
25.     H = model.components_
26.     reconstructed = pd.DataFrame(np.round(np.dot(W, H), 2), columns=data.columns, index=data.index)
27.     # next line use in case of SVD or PCA decomposition to substitute negative elements for zeros
28.     # reconstructed[reconstructed < 0] = 0
29.     new_data = []
30.     # interpolation of reconstructed matrix to range [0,1]
31.     for i in range(0, reconstructed.shape[0]):
32.         r = reconstructed.loc[reconstructed.index[i]]
33.         rd = np.interp(r, (r.min(), r.max()), (0, +1))
34.         new_data.append(rd)
35.     interpolated = pd.DataFrame(np.round(new_data, 2), index=reconstructed.index, columns=reconstructed.columns)
36.     return reconstructed , interpolated
37.
38. def sparsity(data):
39.     return 1.0 - np.count_nonzero(data) / data.size
40.
41.
42. def rel_spars(data):
43.     return 1 - (sparsity(data) / sparsity(V))
44.
45.
46. def evaluate_plot(model, data):
47.     EVS = []
48.     MSE = []
49.     MAE = []
50.     SPARS = []
51.     ks = [1, 2, 3, 4, 5]
52.     for k in ks:
53.         MAE.append(mae(data, reconstruct(model(n_components=k), data)[0]))
54.         MSE.append((mse(data, reconstruct(model(n_components=k), data)[0])))
55.         EVS.append(evs(data, reconstruct(model(n_components=k), data)[0]))
56.         SPARS.append(rel_spars(reconstruct(model(n_components=k), data)[0]))
57.     plt.xlabel('N - components')
58.     plt.ylabel('Value')
59.     plt.plot(MSE)
60.     plt.plot(MAE)
61.     plt.plot(EVS)
62.     plt.plot(SPARS)
63.     METRIC = np.round(MSE, 1)
64.     for i in range(0, len((METRIC))):
65.         if METRIC[i] == METRIC[i + 1]:
66.             plt.scatter(i, MSE[i], c='red')
67.             plt.scatter(i, EVS[i], c='red')
68.             plt.scatter(i, SPARS[i], c='green')
69.             plt.text(i, EVS[i] + 0.01, str(np.round(EVS[i], 3)))
70.             plt.text(i, SPARS[i] + 0.01, str(np.round(SPARS[i], 3)))
71.             plt.vlines(i, 0, 1, colors='red')
72.             break
73.     plt.legend(('RMSE', 'MAE', 'EVS', 'SPARSITY'), loc='best')
74.     plt.xticks([0, 1, 2, 3, 4], ks)
75.     plt.show()
76.
77.
78. model = NMF
79. # or use other models to see result
80. #model = PCA
81. #model = SVD
82.
83. #usage
84. evaluate_plot(model,V)
85. print(reconstruct(model,V)[0]) #reconstructed
86. print(reconstruct(model,V)[1]) #interpolated
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy.

Top