Untitled

import numpy as np
import pandas as pd
import time
from sklearn.ensemble import RandomForestClassifier
from sklearn.externals import joblib
import scipy.io as sio #存mat

start=time.clock()#开始计时

# x_4=joblib.load('reduced_x_4.pkl') #读取数据
# x_5=joblib.load('reduced_x_5.pkl')
x_4=joblib.load('x_4.pkl') #读取数据
x_5=joblib.load('x_5.pkl')
x_6=joblib.load('x_6.pkl')
x_7=joblib.load('x_7.pkl')
y_4=joblib.load('y_4.pkl')
y_5=joblib.load('y_5.pkl')
y_6=joblib.load('y_6.pkl')

xtrain=x_6
ytrain=y_6[:,1]
xtest=x_7
# ytest=y_5[:,1]

#RF
clf=RandomForestClassifier(n_estimators=50,max_depth=25)
clf.fit(xtrain,ytrain)  #.astype('float')强制把类型定义为float，不然dtype=object会报错
prediction1=clf.predict_proba(xtest)
prediction=np.zeros((prediction1.shape[0],1))
for i in range(0,prediction1.shape[0]):
    if prediction1[i,1]>0.5:
        prediction[i,0]=1
    else:
        prediction[i,0]=0

# 准确率P和召回率R。真正例TP，假正例FP，真反例TN，假反例FN
# TP,FP,FN=0,0,0
# for i in range(len(prediction)):
#     if ytest[i]==1 and prediction[i]==1:
#         TP+=1
#     if ytest[i]==0 and prediction[i]==1:
#         FP+=1
#     if ytest[i]==1 and prediction[i]==0:
#         FN+=1
# P=TP/(TP+FP)
# R=TP/(TP+FN)

index=np.lexsort([-1*prediction1[:,1]]) #按第二列降序排序
prediction2=prediction1[index,:]
print("Time used:",time.clock()-start,'s')#计算时间
# print('P=',P)
# print('R=',R)

# '''存mat('文件名.mat', {'文件里变量名': 要存的变量名})'''
# sio.savemat('x_4.mat', {'x_4': x_4})
# sio.savemat('x_5.mat', {'x_5': x_5})
# sio.savemat('y_4.mat', {'y_4': y_4})
# sio.savemat('y_5.mat', {'y_5': y_5})

# np.savetxt("prediction_7.csv", prediction, delimiter = ',')