Advertisement
Guest User

Untitled

a guest
Nov 24th, 2014
179
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import os
  2. import numpy
  3. import pandas
  4. from pandas import DataFrame
  5. from sklearn.externals import joblib
  6. from sklearn.decomposition import ProjectedGradientNMF
  7. from sklearn.decomposition import PCA
  8. from math import sqrt
  9.  
  10. # load raw_clean_dedupe_norm into memory
  11. dodge_data = joblib.load(os.environ['DATASET_D'])
  12.  
  13. # initialize a PCA with arbitrary number of components
  14. pca = PCA(n_components=dodge_data.shape[1])
  15.  
  16. # fit the pca to raw_clean_dedupe_norm
  17. pca.fit(dodge_data)
  18.  
  19. # set the number of principle components equal to the first n components that explain 95% of the variance
  20. explained_variance = pca.explained_variance_ratio_.cumsum()
  21. explained_variance = explained_variance[explained_variance <= .95]
  22. p_comp = len(explained_variance)
  23.  
  24. # use the number of principle components to initialize a matrix factorization model
  25. model = ProjectedGradientNMF(n_components=p_comp,
  26.                              init='nndsvd',
  27.                              beta=1,
  28.                              sparseness=None)
  29.  
  30. # fit NMF model to the raw_clean_dedupe_norm dataset
  31. model.fit(dodge_data)
  32.  
  33. # load the NMF model that was used to compute the original kmeans centroids
  34. nmf = joblib.load(os.environ['MODEL_A'])
  35.  
  36. # load the kmeans object
  37. kmeans = joblib.load(os.environ['MODEL_B'])
  38. kmeans = kmeans[6]
  39.  
  40. # transform the NMF centroids back into the original 35-column feature-space
  41. centroids = numpy.dot(kmeans.cluster_centers_, nmf.components_)
  42.  
  43. # transform the 35-column centroids into the newly computed NMF matrix
  44. centroids = model.transform(centroids)
  45.  
  46. def normalizeZeroOne(vec):
  47.     normed = (vec - min(vec)) / (max(vec) - min(vec))
  48.     return normed
  49.  
  50. #normalize the transformed centroid features
  51. normed_centroids = numpy.zeros(centroids.shape)
  52. for i in range(0, centroids.shape[1]):
  53.     normed_centroids[:, i] = normalizeZeroOne(centroids[:, i])
  54.  
  55.  
  56.  
  57. normed_centroids = numpy.zeros(centroids.shape)
  58. for i in range(0, centroids.shape[0]):
  59.     normed_centroids[i, :] = normalizeZeroOne(centroids[i, :])
  60.  
  61.  
  62. pandas.DataFrame(normed_centroids).to_csv('/users/zpuste/digital-personas/data/dodge/mobile/nmf_transformed_centroids.csv')
  63.  
  64. nmf_comps = pandas.DataFrame(model.components_)
  65.  
  66. nmf_comps.columns = dodge_data.columns
  67. nmf_comps.to_csv('/users/zpuste/digital-personas/data/dodge/mobile/nmf_components.csv')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement