View difference between Paste ID: <a href="/50AUePkt">50AUePkt</a> and <a href="/5hrvTeSJ">5hrvTeSJ</a>

#!/usr/bin/python
1		#!/usr/bin/python
2		#import cgitb
3		#This script will extract the data from the .arff files and put the features in dataX and the score or the value to be predicted in dataY.
4		import numpy as np
5		import scipy
6		#import arff
7		import os
8		#from matplotlib import pyplot as plt
9		from sklearn.svm import NuSVR
10		from sklearn.preprocessing import StandardScaler
11		from scipy import sparse
12		from sklearn.cross_validation import train_test_split
13		from sklearn.grid_search import GridSearchCV
14		from time import time,clock
15		from time import gmtime, strftime
16		import shlex
17	-	barr = ''
17	+
18	-	fname = './bruno/2_mid_pitch/TestFeatures/Vowels_To_UnvoicedFricatives.arff'
18	+	#fname = './Vowels_To_UnvoicedFricatives.arff' # Comment this line and remove the comment from the next to change the datafile.
19		fname = './Vowels_To_Nasals.arff'
20
21		f = open(fname,'r')
22		lines = f.readlines()[42:]
23		f.close()
24
25		floats = []
26	-	for (x,y), value in np.ndenumerate(array):
26	+
27		floats.append(shlex.split(line))
28		array = np.asarray(floats)
29
30	-	print 'Data size'
30	+	for (x,y), value in np.ndenumerate(array): # To remove NaNs from the data
31	-	print np.shape(array)
31	+
32	-	scale = StandardScaler()
32	+
33	-	array = scale.fit_transform(array)
33	+
34	-	traiY = array[:,38]
34	+
35	-	traiX = np.delete(array, [36,37,38,39],1)
35	+
36	-	trainY, realY, trainX, testX = train_test_split(traiY,traiX,test_size=0.8,random_state=42)
36	+	#scale = StandardScaler()
37	-	Cost = np.power(2,np.arange(1,12));
37	+	#array = scale.fit_transform(array) # A scaling of all the data takes place here
38	-	g = [0.5,0.25,0.125,0.0625,0.03125,0.015625,0.0078125,0.00390625,0.001953125,0.0009765625,0.00048828125,0.00048828125]
38	+	dataY = array[:,38]
39	-	print '\nCost values'
39	+	dataX = np.delete(array, [36,37,38,39],1)
40	-	print Cost
40	+
41	-	print '\ngamma values'
41	+	print 'dataX size'
42	-	print g
42	+	print np.shape(dataX)
43	-	scorebest = 0
43	+
44	-	Cbest = 0
44	+	print '\ndataY size'
45	-	gammabest = 0
45	+	print np.shape(dataY)