SHOW:
|
|
- or go back to the newest paste.
1 | #!/usr/bin/python | |
2 | #import cgitb | |
3 | #This script will extract the data from the .arff files and put the features in dataX and the score or the value to be predicted in dataY. | |
4 | import numpy as np | |
5 | import scipy | |
6 | #import arff | |
7 | import os | |
8 | #from matplotlib import pyplot as plt | |
9 | from sklearn.svm import NuSVR | |
10 | from sklearn.preprocessing import StandardScaler | |
11 | from scipy import sparse | |
12 | from sklearn.cross_validation import train_test_split | |
13 | from sklearn.grid_search import GridSearchCV | |
14 | from time import time,clock | |
15 | from time import gmtime, strftime | |
16 | import shlex | |
17 | - | barr = '' |
17 | + | |
18 | - | fname = './bruno/2_mid_pitch/TestFeatures/Vowels_To_UnvoicedFricatives.arff' |
18 | + | #fname = './Vowels_To_UnvoicedFricatives.arff' # Comment this line and remove the comment from the next to change the datafile. |
19 | fname = './Vowels_To_Nasals.arff' | |
20 | ||
21 | f = open(fname,'r') | |
22 | lines = f.readlines()[42:] | |
23 | f.close() | |
24 | ||
25 | floats = [] | |
26 | - | for (x,y), value in np.ndenumerate(array): |
26 | + | |
27 | floats.append(shlex.split(line)) | |
28 | array = np.asarray(floats) | |
29 | ||
30 | - | print 'Data size' |
30 | + | for (x,y), value in np.ndenumerate(array): # To remove NaNs from the data |
31 | - | print np.shape(array) |
31 | + | |
32 | - | scale = StandardScaler() |
32 | + | |
33 | - | array = scale.fit_transform(array) |
33 | + | |
34 | - | traiY = array[:,38] |
34 | + | |
35 | - | traiX = np.delete(array, [36,37,38,39],1) |
35 | + | |
36 | - | trainY, realY, trainX, testX = train_test_split(traiY,traiX,test_size=0.8,random_state=42) |
36 | + | #scale = StandardScaler() |
37 | - | Cost = np.power(2,np.arange(1,12)); |
37 | + | #array = scale.fit_transform(array) # A scaling of all the data takes place here |
38 | - | g = [0.5,0.25,0.125,0.0625,0.03125,0.015625,0.0078125,0.00390625,0.001953125,0.0009765625,0.00048828125,0.00048828125] |
38 | + | dataY = array[:,38] |
39 | - | print '\nCost values' |
39 | + | dataX = np.delete(array, [36,37,38,39],1) |
40 | - | print Cost |
40 | + | |
41 | - | print '\ngamma values' |
41 | + | print 'dataX size' |
42 | - | print g |
42 | + | print np.shape(dataX) |
43 | - | scorebest = 0 |
43 | + | |
44 | - | Cbest = 0 |
44 | + | print '\ndataY size' |
45 | - | gammabest = 0 |
45 | + | print np.shape(dataY) |