Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import numpy
- import pandas
- from pandas import DataFrame
- from sklearn.externals import joblib
- from sklearn.decomposition import ProjectedGradientNMF
- from sklearn.decomposition import PCA
- from math import sqrt
- # load raw_clean_dedupe_norm into memory
- dodge_data = joblib.load(os.environ['DATASET_D'])
- # initialize a PCA with arbitrary number of components
- pca = PCA(n_components=dodge_data.shape[1])
- # fit the pca to raw_clean_dedupe_norm
- pca.fit(dodge_data)
- # set the number of principle components equal to the first n components that explain 95% of the variance
- explained_variance = pca.explained_variance_ratio_.cumsum()
- explained_variance = explained_variance[explained_variance <= .95]
- p_comp = len(explained_variance)
- # use the number of principle components to initialize a matrix factorization model
- model = ProjectedGradientNMF(n_components=p_comp,
- init='nndsvd',
- beta=1,
- sparseness=None)
- # fit NMF model to the raw_clean_dedupe_norm dataset
- model.fit(dodge_data)
- # load the NMF model that was used to compute the original kmeans centroids
- nmf = joblib.load(os.environ['MODEL_A'])
- # load the kmeans object
- kmeans = joblib.load(os.environ['MODEL_B'])
- kmeans = kmeans[6]
- # transform the NMF centroids back into the original 35-column feature-space
- centroids = numpy.dot(kmeans.cluster_centers_, nmf.components_)
- # transform the 35-column centroids into the newly computed NMF matrix
- centroids = model.transform(centroids)
- def normalizeZeroOne(vec):
- normed = (vec - min(vec)) / (max(vec) - min(vec))
- return normed
- #normalize the transformed centroid features
- normed_centroids = numpy.zeros(centroids.shape)
- for i in range(0, centroids.shape[1]):
- normed_centroids[:, i] = normalizeZeroOne(centroids[:, i])
- normed_centroids = numpy.zeros(centroids.shape)
- for i in range(0, centroids.shape[0]):
- normed_centroids[i, :] = normalizeZeroOne(centroids[i, :])
- pandas.DataFrame(normed_centroids).to_csv('/users/zpuste/digital-personas/data/dodge/mobile/nmf_transformed_centroids.csv')
- nmf_comps = pandas.DataFrame(model.components_)
- nmf_comps.columns = dodge_data.columns
- nmf_comps.to_csv('/users/zpuste/digital-personas/data/dodge/mobile/nmf_components.csv')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement