Advertisement
neo01124

PrepData.py

Jun 7th, 2013
450
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/python
  2. #import cgitb
  3. #This script will extract the data from the .arff files and put the features in dataX and the score or the value to be predicted in dataY.
  4. import numpy as np
  5. import scipy
  6. #import arff
  7. import os
  8. #from matplotlib import pyplot as plt
  9. from sklearn.svm import NuSVR
  10. from sklearn.preprocessing import StandardScaler
  11. from scipy import sparse
  12. from sklearn.cross_validation import train_test_split
  13. from sklearn.grid_search import GridSearchCV
  14. from time import time,clock
  15. from time import gmtime, strftime
  16. import shlex
  17.  
  18. #fname = './Vowels_To_UnvoicedFricatives.arff' # Comment this line and remove the comment from the next to change the datafile.
  19. fname = './Vowels_To_Nasals.arff'
  20.  
  21. f = open(fname,'r')
  22. lines = f.readlines()[42:]
  23. f.close()      
  24.  
  25. floats = []
  26. for line in lines:    
  27.     floats.append(shlex.split(line))
  28. array = np.asarray(floats)
  29.  
  30. for (x,y), value in np.ndenumerate(array): # To remove NaNs from the data
  31.     if value == 'NaN':
  32.         array[x][y] = 0;
  33. array = array.astype(np.float)
  34.  
  35.  
  36. #scale = StandardScaler()
  37. #array = scale.fit_transform(array) # A scaling of all the data takes place here
  38. dataY = array[:,38]
  39. dataX = np.delete(array, [36,37,38,39],1)
  40.  
  41. print 'dataX size'
  42. print np.shape(dataX)
  43.  
  44. print '\ndataY size'
  45. print np.shape(dataY)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement