Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- from sklearn.decomposition import NMF
- from sklearn.decomposition import PCA
- from sklearn.decomposition import TruncatedSVD as SVD
- from sklearn.metrics import mean_absolute_error as mae
- from sklearn.metrics import explained_variance_score as evs
- from sklearn.metrics import mean_squared_error as mse
- V = np.array([[0,1,0,1,2,2],
- [2,3,1,1,2,2],
- [1,1,1,0,1,1],
- [0,2,3,4,1,1],
- [0,0,0,0,1,0]])
- V = pd.DataFrame(V, index = ('Овощи', 'Фрукты', 'Сладости', 'Хлеб', 'Кофе'),
- columns = ('Миша', 'Маша' ,'Рома', 'Дима', 'Витя', 'Вова'))
- def reconstruct(model, data):
- #input: model - sklearn model, data - pandas DataFrame
- #returns pandas DataFrame of reconstructed matrix
- model.fit(data)
- W = model.transform(data)
- H = model.components_
- reconstructed = pd.DataFrame(np.round(np.dot(W, H), 2), columns=data.columns, index=data.index)
- # next line use in case of SVD or PCA decomposition to substitute negative elements for zeros
- # reconstructed[reconstructed < 0] = 0
- new_data = []
- # interpolation of reconstructed matrix to range [0,1]
- for i in range(0, reconstructed.shape[0]):
- r = reconstructed.loc[reconstructed.index[i]]
- rd = np.interp(r, (r.min(), r.max()), (0, +1))
- new_data.append(rd)
- interpolated = pd.DataFrame(np.round(new_data, 2), index=reconstructed.index, columns=reconstructed.columns)
- return reconstructed , interpolated
- def sparsity(data):
- return 1.0 - np.count_nonzero(data) / data.size
- def rel_spars(data):
- return 1 - (sparsity(data) / sparsity(V))
- def evaluate_plot(model, data):
- EVS = []
- MSE = []
- MAE = []
- SPARS = []
- ks = [1, 2, 3, 4, 5]
- for k in ks:
- MAE.append(mae(data, reconstruct(model(n_components=k), data)[0]))
- MSE.append((mse(data, reconstruct(model(n_components=k), data)[0])))
- EVS.append(evs(data, reconstruct(model(n_components=k), data)[0]))
- SPARS.append(rel_spars(reconstruct(model(n_components=k), data)[0]))
- plt.xlabel('N - components')
- plt.ylabel('Value')
- plt.plot(MSE)
- plt.plot(MAE)
- plt.plot(EVS)
- plt.plot(SPARS)
- METRIC = np.round(MSE, 1)
- for i in range(0, len((METRIC))):
- if METRIC[i] == METRIC[i + 1]:
- plt.scatter(i, MSE[i], c='red')
- plt.scatter(i, EVS[i], c='red')
- plt.scatter(i, SPARS[i], c='green')
- plt.text(i, EVS[i] + 0.01, str(np.round(EVS[i], 3)))
- plt.text(i, SPARS[i] + 0.01, str(np.round(SPARS[i], 3)))
- plt.vlines(i, 0, 1, colors='red')
- break
- plt.legend(('RMSE', 'MAE', 'EVS', 'SPARSITY'), loc='best')
- plt.xticks([0, 1, 2, 3, 4], ks)
- plt.show()
- model = NMF
- # or use other models to see result
- #model = PCA
- #model = SVD
- #usage
- evaluate_plot(model,V)
- print(reconstruct(model,V)[0]) #reconstructed
- print(reconstruct(model,V)[1]) #interpolated
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement