Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- #import cgitb
- #This script will extract the data from the .arff files and put the features in dataX and the score or the value to be predicted in dataY.
- import numpy as np
- import scipy
- #import arff
- import os
- #from matplotlib import pyplot as plt
- from sklearn.svm import NuSVR
- from sklearn.preprocessing import StandardScaler
- from scipy import sparse
- from sklearn.cross_validation import train_test_split
- from sklearn.grid_search import GridSearchCV
- from time import time,clock
- from time import gmtime, strftime
- import shlex
- #fname = './Vowels_To_UnvoicedFricatives.arff' # Comment this line and remove the comment from the next to change the datafile.
- fname = './Vowels_To_Nasals.arff'
- f = open(fname,'r')
- lines = f.readlines()[42:]
- f.close()
- floats = []
- for line in lines:
- floats.append(shlex.split(line))
- array = np.asarray(floats)
- for (x,y), value in np.ndenumerate(array): # To remove NaNs from the data
- if value == 'NaN':
- array[x][y] = 0;
- array = array.astype(np.float)
- #scale = StandardScaler()
- #array = scale.fit_transform(array) # A scaling of all the data takes place here
- dataY = array[:,38]
- dataX = np.delete(array, [36,37,38,39],1)
- print 'dataX size'
- print np.shape(dataX)
- print '\ndataY size'
- print np.shape(dataY)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement