View difference between Paste ID: 50AUePkt and 5hrvTeSJ
SHOW: | | - or go back to the newest paste.
1
#!/usr/bin/python
2
#import cgitb
3
#This script will extract the data from the .arff files and put the features in dataX and the score or the value to be predicted in dataY.
4
import numpy as np
5
import scipy
6
#import arff
7
import os
8
#from matplotlib import pyplot as plt
9
from sklearn.svm import NuSVR
10
from sklearn.preprocessing import StandardScaler
11
from scipy import sparse
12
from sklearn.cross_validation import train_test_split
13
from sklearn.grid_search import GridSearchCV
14
from time import time,clock
15
from time import gmtime, strftime
16
import shlex
17-
barr = ''
17+
18-
fname = './bruno/2_mid_pitch/TestFeatures/Vowels_To_UnvoicedFricatives.arff'
18+
#fname = './Vowels_To_UnvoicedFricatives.arff' # Comment this line and remove the comment from the next to change the datafile.
19
fname = './Vowels_To_Nasals.arff' 
20
21
f = open(fname,'r')
22
lines = f.readlines()[42:]
23
f.close()       
24
25
floats = []
26-
for (x,y), value in np.ndenumerate(array):
26+
27
    floats.append(shlex.split(line))
28
array = np.asarray(floats)
29
30-
print 'Data size'
30+
for (x,y), value in np.ndenumerate(array): # To remove NaNs from the data
31-
print np.shape(array)
31+
32-
scale = StandardScaler()
32+
33-
array = scale.fit_transform(array)
33+
34-
traiY = array[:,38]
34+
35-
traiX = np.delete(array, [36,37,38,39],1)
35+
36-
trainY, realY, trainX, testX = train_test_split(traiY,traiX,test_size=0.8,random_state=42)
36+
#scale = StandardScaler()
37-
Cost = np.power(2,np.arange(1,12));
37+
#array = scale.fit_transform(array) # A scaling of all the data takes place here
38-
g = [0.5,0.25,0.125,0.0625,0.03125,0.015625,0.0078125,0.00390625,0.001953125,0.0009765625,0.00048828125,0.00048828125]
38+
dataY = array[:,38]
39-
print '\nCost values'
39+
dataX = np.delete(array, [36,37,38,39],1)
40-
print Cost
40+
41-
print '\ngamma values'
41+
print 'dataX size'
42-
print g
42+
print np.shape(dataX)
43-
scorebest = 0
43+
44-
Cbest = 0
44+
print '\ndataY size'
45-
gammabest = 0
45+
print np.shape(dataY)