Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import pandas as pd
- import time
- from sklearn.ensemble import RandomForestClassifier
- from sklearn.externals import joblib
- import scipy.io as sio #存mat
- start=time.clock()#开始计时
- # x_4=joblib.load('reduced_x_4.pkl') #读取数据
- # x_5=joblib.load('reduced_x_5.pkl')
- x_4=joblib.load('x_4.pkl') #读取数据
- x_5=joblib.load('x_5.pkl')
- x_6=joblib.load('x_6.pkl')
- x_7=joblib.load('x_7.pkl')
- y_4=joblib.load('y_4.pkl')
- y_5=joblib.load('y_5.pkl')
- y_6=joblib.load('y_6.pkl')
- xtrain=x_6
- ytrain=y_6[:,1]
- xtest=x_7
- # ytest=y_5[:,1]
- #RF
- clf=RandomForestClassifier(n_estimators=50,max_depth=25)
- clf.fit(xtrain,ytrain) #.astype('float')强制把类型定义为float,不然dtype=object会报错
- prediction1=clf.predict_proba(xtest)
- prediction=np.zeros((prediction1.shape[0],1))
- for i in range(0,prediction1.shape[0]):
- if prediction1[i,1]>0.5:
- prediction[i,0]=1
- else:
- prediction[i,0]=0
- # 准确率P和召回率R。真正例TP,假正例FP,真反例TN,假反例FN
- # TP,FP,FN=0,0,0
- # for i in range(len(prediction)):
- # if ytest[i]==1 and prediction[i]==1:
- # TP+=1
- # if ytest[i]==0 and prediction[i]==1:
- # FP+=1
- # if ytest[i]==1 and prediction[i]==0:
- # FN+=1
- # P=TP/(TP+FP)
- # R=TP/(TP+FN)
- index=np.lexsort([-1*prediction1[:,1]]) #按第二列降序排序
- prediction2=prediction1[index,:]
- print("Time used:",time.clock()-start,'s')#计算时间
- # print('P=',P)
- # print('R=',R)
- # '''存mat('文件名.mat', {'文件里变量名': 要存的变量名})'''
- # sio.savemat('x_4.mat', {'x_4': x_4})
- # sio.savemat('x_5.mat', {'x_5': x_5})
- # sio.savemat('y_4.mat', {'y_4': y_4})
- # sio.savemat('y_5.mat', {'y_5': y_5})
- # np.savetxt("prediction_7.csv", prediction, delimiter = ',')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement