Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import time
- import numpy as np
- import random
- import os
- from scipy.spatial import distance
- from sklearn.model_selection import train_test_split
- from skopt import gp_minimize
- from scipy.optimize import minimize
- from sklearn.metrics import mean_squared_error
- from math import sqrt
- from scipy.stats.distributions import norm
- from sklearn.metrics.pairwise import euclidean_distances
- from scipy.spatial import distance
- from numpy import inf
- from sklearn.preprocessing import StandardScaler
- from sklearn.gaussian_process import GaussianProcessRegressor
- from sklearn.gaussian_process.kernels import (RBF, Matern, RationalQuadratic,
- ExpSineSquared, DotProduct,
- ConstantKernel,WhiteKernel)
- from sklearn import metrics
- from sklearn.cluster import KMeans
- from multiprocessing import Process
- from multiprocessing import Pool
- if __name__ == '__main__':
- with Pool(processes=4) as pool:
- np.random.seed(1)
- np.set_printoptions(suppress=True)
- ################################ Here we load dataset ###################################################################################################
- rmtest=np.loadtxt("test.dat",usecols=range(7)) # Load the test dataset
- X=rmtest
- kmeans = KMeans(n_clusters=4)
- kmeans.fit(X)
- y_kmeans = kmeans.predict(X)
- labels = kmeans.labels_
- a = X[np.where(labels==0)]
- b = X[np.where(labels==1)]
- c = X[np.where(labels==2)]
- d = X[np.where(labels==3)]
- print(a.shape)
- print(b.shape)
- print(c.shape)
- print(d.shape)
- armpredx=np.delete(a,6,1) # xvalues, in our case inverse distances
- armpredy=np.delete(a,np.s_[0:6],1) # energy values corresponding to the distances
- brmpredx=np.delete(b,6,1) # xvalues, in our case inverse distances
- brmpredy=np.delete(b,np.s_[0:6],1) # energy values corresponding to the distances
- crmpredx=np.delete(c,6,1) # xvalues, in our case inverse distances
- crmpredy=np.delete(c,np.s_[0:6],1) # energy values corresponding to the distances
- drmpredx=np.delete(d,6,1) # xvalues, in our case inverse distances
- drmpredy=np.delete(d,np.s_[0:6],1) # energy values corresponding to the distances
- sample=input("Enter number of training set points\n") # Number of points in our training set. You will be prompted to choose this.
- sample=int(sample)
- np.random.seed(24)
- apoints=a[np.random.choice(a.shape[0], sample, replace=False), :] # Training set based on number of points generated here.
- bpoints=b[np.random.choice(b.shape[0], sample, replace=False), :]
- cpoints=c[np.random.choice(c.shape[0], sample, replace=False), :]
- dpoints=d[np.random.choice(d.shape[0], sample, replace=False), :]
- print("Total number of sample points for training is ",apoints.shape)
- if os.path.exists('train.dat'): # we save the training set in train.dat
- os.remove('train.dat')
- with open('train.dat', 'a') as l:
- np.savetxt(l, apoints,fmt='%1.6f',delimiter=' ')
- ainpx=np.delete(apoints,6,1) #inpx is the x-part of the training dataset
- ainpy=np.delete(apoints,np.s_[0:6],1) #inpy is the y-part of the training dataset
- binpx=np.delete(bpoints,6,1) #inpx is the x-part of the training dataset
- binpy=np.delete(bpoints,np.s_[0:6],1) #inpy is the y-part of the training dataset
- cinpx=np.delete(cpoints,6,1) #inpx is the x-part of the training dataset
- cinpy=np.delete(cpoints,np.s_[0:6],1) #inpy is the y-part of the training dataset
- dinpx=np.delete(dpoints,6,1) #inpx is the x-part of the training dataset
- dinpy=np.delete(dpoints,np.s_[0:6],1) #inpy is the y-part of the training dataset
- ########################### GPR done here#############################################################################################
- ## Instantiate a Gaussian Process model
- ##6 types of kernels represented.
- k1 = 1**2* RBF(length_scale=0.01)
- k2=11**2*RationalQuadratic(length_scale=0.1, alpha=1.0, length_scale_bounds=(1e-05, 100000.0), alpha_bounds=(1e-05, 100000.0))
- k4=1**2*ExpSineSquared(length_scale=1.0, periodicity=1.0, length_scale_bounds=(1e-05, 100000.0), periodicity_bounds=(1e-05, 100000.0))
- k3 =Matern(length_scale=[0.1, 0.1, 0.1,0.1, 0.1, 0.1], length_scale_bounds=(1e-5, 1.0),nu=2.5)
- k5=ConstantKernel(constant_value=1.0, constant_value_bounds=(1e-05, 1000.0))
- k6=DotProduct(sigma_0=10, sigma_0_bounds=(1e-05, 100))
- k7=WhiteKernel(noise_level=0.1)
- kernel= k5*k3*k6+k7 # we choose our kernel here.
- kernel1= k5*k3*k6+k7
- kernel2= k5*k3*k6+k7
- kernel3= k5*k3*k6+k7
- seconds = time.time()
- #optimisation step.
- gp = (GaussianProcessRegressor(kernel=kernel,n_restarts_optimizer=25))
- gp2 = (GaussianProcessRegressor(kernel=kernel1,n_restarts_optimizer=25))
- gp3 = (GaussianProcessRegressor(kernel=kernel2,n_restarts_optimizer=25))
- gp4 = (GaussianProcessRegressor(kernel=kernel3,n_restarts_optimizer=25))
- input_list=[[ainpx,ainpy.reshape(-1,1)],[binpx,binpy.reshape(-1,1)]]
- f_list=[gp.fit,gp2.fit,gp3.fit,gp4.fit]
- pool.starmap(gp.fit, input_list)
- pool.close()
- # agp=pool.apply(gp.fit,args=(ainpx,ainpy.reshape(-1,1)))
- print(os.getpid())
- # bgp=pool.apply(gp2.fit,args=(binpx,binpy.reshape(-1,1)))
- # print(os.getpid())
- # #cgp=pool.apply(gp.fit,args=(cinpx,cinpy.reshape(-1,1)))
- #dgp=pool.apply(gp.fit,args=(dinpx,dinpy.reshape(-1,1)))
- # #fitting step.
- # agp=pool.apply(gp.fit(ainpx, ainpy.reshape(-1,1))) # Here the GPR finds an optimal value of the kernel based on input x,y
- # bgp=pool.apply(gp2.fit(binpx, binpy.reshape(-1,1))) # Here the GPR finds an optimal value of the kernel based on input x,y
- # cgp=pool.apply(gp3.fit(cinpx, cinpy.reshape(-1,1))) # Here the GPR finds an optimal value of the kernel based on input x,y
- # dgp=pool.apply(gp4.fit(dinpx, dinpy.reshape(-1,1))) # Here the GPR finds an optimal value of the kernel based on input x,y
- secs2=time.time()
- print("Fit done")
- print(secs2-seconds)
- print("GPML kernel: %s" % agp.kernel_) # We print the kernel and the likelihood
- print("Log-marginal-likelihood: %.3f"
- % agp.log_marginal_likelihood(agp.kernel_.theta))
- ######################################################### Generate RMSE values ###############################################################################################3
- # name=str(len(ainpx)) #Generating a filename to save our final observations
- # filename = name+'_rmse'
- # if os.path.exists(filename):
- # os.remove(filename)
- # f = open(filename , 'a')
- # arm_pred, sigma = pool.apply(gp.predict,args=(armpredx, return_std=True)) #Generating prediction values for our test database
- # brm_pred, sigma1 = pool.apply(gp2.predict,args=(brmpredx, return_std=True)) #Generating prediction values for our test database
- # crm_pred, sigma2 = pool.apply(gp3.predict,args=(crmpredx, return_std=True)) #Generating prediction values for our test database
- # drm_pred, sigma3 = pool.apply(gp4.predict,args=(drmpredx, return_std=True)) #Generating prediction values for our test database
- # brm_pred, sigma1 = gp.predict(brmpredx, return_std=True) #Generating prediction values for our test database
- # crm_pred, sigma2 = gp.predict(crmpredx, return_std=True) #Generating prediction values for our test database
- # drm_pred, sigma3 = gp.predict(drmpredx, return_std=True) #Generating prediction values for our test database
- # # print("Observed value --- Predicted value---Absolute difference\n")
- # armde=np.c_[armpredy, arm_pred,abs(armpredy-arm_pred)] #Generating an array to compare the original and predictedvalues
- # brmde=np.c_[brmpredy, brm_pred,abs(brmpredy-brm_pred)]
- # crmde=np.c_[crmpredy, crm_pred,abs(crmpredy-crm_pred)]
- # drmde=np.c_[drmpredy, drm_pred,abs(drmpredy-drm_pred)]
- # # print (rmde)
- # aerr=sqrt(mean_squared_error(armpredy, arm_pred)) #Generating an RMSE value for our testset
- # print("RMSE-A ",aerr)
- # berr=sqrt(mean_squared_error(brmpredy, brm_pred)) #Generating an RMSE value for our testset
- # print("RMSE-B ",berr)
- # cerr=sqrt(mean_squared_error(crmpredy, crm_pred)) #Generating an RMSE value for our testset
- # print("RMSE-C ",cerr)
- # derr=sqrt(mean_squared_error(drmpredy, drm_pred)) #Generating an RMSE value for our testset
- # print("RMSE-D ",derr)
- # # f.write("RMSE value is ")
- # # f.write(str(err))
- # # f.write("\n") # Saving the values to the file. We save the RMSE followed by the comparision array
- # # f.write("Observed value --- Predicted value---Absolute difference\n")
- # # np.savetxt(f, rmde,fmt='%1.6f',delimiter=' ')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement