Untitled

import time
import numpy as np
import random
import os
from scipy.spatial import distance
from sklearn.model_selection import train_test_split
from skopt import gp_minimize
from scipy.optimize import minimize
from sklearn.metrics import mean_squared_error
from math import sqrt
from scipy.stats.distributions import norm
from sklearn.metrics.pairwise import euclidean_distances
from scipy.spatial import distance
from numpy import inf
from sklearn.preprocessing import StandardScaler
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import (RBF, Matern, RationalQuadratic,
													  ExpSineSquared, DotProduct,
													  ConstantKernel,WhiteKernel)

from sklearn import metrics
from sklearn.cluster import KMeans
from multiprocessing import Process
from multiprocessing import Pool

if __name__ == '__main__':
	with Pool(processes=4) as pool:

		np.random.seed(1)
		np.set_printoptions(suppress=True)


		################################ Here we load dataset ###################################################################################################

		rmtest=np.loadtxt("test.dat",usecols=range(7))                                        # Load the test dataset
		X=rmtest
		kmeans = KMeans(n_clusters=4)
		kmeans.fit(X)
		y_kmeans = kmeans.predict(X)
		labels = kmeans.labels_

		a = X[np.where(labels==0)]
		b = X[np.where(labels==1)]
		c = X[np.where(labels==2)]
		d = X[np.where(labels==3)]

		print(a.shape)
		print(b.shape)
		print(c.shape)
		print(d.shape)


		armpredx=np.delete(a,6,1)														  # xvalues, in our case inverse distances
		armpredy=np.delete(a,np.s_[0:6],1)                                                # energy values corresponding to the distances

		brmpredx=np.delete(b,6,1)														  # xvalues, in our case inverse distances
		brmpredy=np.delete(b,np.s_[0:6],1)                                                # energy values corresponding to the distances

		crmpredx=np.delete(c,6,1)														  # xvalues, in our case inverse distances
		crmpredy=np.delete(c,np.s_[0:6],1)                                                # energy values corresponding to the distances

		drmpredx=np.delete(d,6,1)														  # xvalues, in our case inverse distances
		drmpredy=np.delete(d,np.s_[0:6],1)                                                # energy values corresponding to the distances


		sample=input("Enter number of training set points\n")                                 # Number of points in our training set. You will be prompted to choose this.
		sample=int(sample)

		np.random.seed(24)

		apoints=a[np.random.choice(a.shape[0], sample, replace=False), :]            # Training set based on number of points generated here.
		bpoints=b[np.random.choice(b.shape[0], sample, replace=False), :]
		cpoints=c[np.random.choice(c.shape[0], sample, replace=False), :]
		dpoints=d[np.random.choice(d.shape[0], sample, replace=False), :]

		print("Total number of sample points for training is ",apoints.shape)


		if os.path.exists('train.dat'):														  # we save the training set in train.dat
		  os.remove('train.dat')

		with open('train.dat', 'a') as l:
			np.savetxt(l, apoints,fmt='%1.6f',delimiter=' ')


		ainpx=np.delete(apoints,6,1)															  #inpx is the x-part of the training dataset
		ainpy=np.delete(apoints,np.s_[0:6],1)													  #inpy is the y-part of the training dataset

		binpx=np.delete(bpoints,6,1)															  #inpx is the x-part of the training dataset
		binpy=np.delete(bpoints,np.s_[0:6],1)													  #inpy is the y-part of the training dataset

		cinpx=np.delete(cpoints,6,1)															  #inpx is the x-part of the training dataset
		cinpy=np.delete(cpoints,np.s_[0:6],1)													  #inpy is the y-part of the training dataset

		dinpx=np.delete(dpoints,6,1)															  #inpx is the x-part of the training dataset
		dinpy=np.delete(dpoints,np.s_[0:6],1)													  #inpy is the y-part of the training dataset

		########################### GPR  done here#############################################################################################
		## Instantiate a Gaussian Process model
		##6 types of kernels represented.

		k1 = 1**2* RBF(length_scale=0.01)
		k2=11**2*RationalQuadratic(length_scale=0.1, alpha=1.0, length_scale_bounds=(1e-05, 100000.0), alpha_bounds=(1e-05, 100000.0))
		k4=1**2*ExpSineSquared(length_scale=1.0, periodicity=1.0, length_scale_bounds=(1e-05, 100000.0), periodicity_bounds=(1e-05, 100000.0))
		k3 =Matern(length_scale=[0.1, 0.1, 0.1,0.1, 0.1, 0.1],  length_scale_bounds=(1e-5, 1.0),nu=2.5)
		k5=ConstantKernel(constant_value=1.0, constant_value_bounds=(1e-05, 1000.0))
		k6=DotProduct(sigma_0=10, sigma_0_bounds=(1e-05, 100))
		k7=WhiteKernel(noise_level=0.1)

		kernel= k5*k3*k6+k7																	# we choose our kernel here.
		kernel1= k5*k3*k6+k7
		kernel2= k5*k3*k6+k7
		kernel3= k5*k3*k6+k7

		seconds = time.time()

		#optimisation step.
		gp = (GaussianProcessRegressor(kernel=kernel,n_restarts_optimizer=25))
		gp2 = (GaussianProcessRegressor(kernel=kernel1,n_restarts_optimizer=25))
		gp3 = (GaussianProcessRegressor(kernel=kernel2,n_restarts_optimizer=25))
		gp4 = (GaussianProcessRegressor(kernel=kernel3,n_restarts_optimizer=25))
		input_list=[[ainpx,ainpy.reshape(-1,1)],[binpx,binpy.reshape(-1,1)]]
		f_list=[gp.fit,gp2.fit,gp3.fit,gp4.fit]
		pool.starmap(gp.fit, input_list)
		pool.close()
		# agp=pool.apply(gp.fit,args=(ainpx,ainpy.reshape(-1,1)))
		 print(os.getpid())
		# bgp=pool.apply(gp2.fit,args=(binpx,binpy.reshape(-1,1)))
		# print(os.getpid())
		# #cgp=pool.apply(gp.fit,args=(cinpx,cinpy.reshape(-1,1)))
		#dgp=pool.apply(gp.fit,args=(dinpx,dinpy.reshape(-1,1)))

		# #fitting step.
		# agp=pool.apply(gp.fit(ainpx, ainpy.reshape(-1,1)))													# Here the GPR finds an optimal value of the kernel based on input x,y
		# bgp=pool.apply(gp2.fit(binpx, binpy.reshape(-1,1)))													# Here the GPR finds an optimal value of the kernel based on input x,y
		# cgp=pool.apply(gp3.fit(cinpx, cinpy.reshape(-1,1)))													# Here the GPR finds an optimal value of the kernel based on input x,y
		# dgp=pool.apply(gp4.fit(dinpx, dinpy.reshape(-1,1)))													# Here the GPR finds an optimal value of the kernel based on input x,y
		secs2=time.time()
		print("Fit done")
		print(secs2-seconds)
		print("GPML kernel: %s" % agp.kernel_)												# We print the kernel and the likelihood
		print("Log-marginal-likelihood: %.3f"
				% agp.log_marginal_likelihood(agp.kernel_.theta))

		######################################################### Generate RMSE values ###############################################################################################3

		# name=str(len(ainpx))																	#Generating a filename to save our final observations
		# filename = name+'_rmse'

		# if os.path.exists(filename):
		  # os.remove(filename)
		# f = open(filename , 'a')

		# arm_pred, sigma = pool.apply(gp.predict,args=(armpredx, return_std=True))							   #Generating prediction values for our test database
		# brm_pred, sigma1 = pool.apply(gp2.predict,args=(brmpredx, return_std=True))							   #Generating prediction values for our test database
		# crm_pred, sigma2 = pool.apply(gp3.predict,args=(crmpredx, return_std=True))							   #Generating prediction values for our test database
		# drm_pred, sigma3 = pool.apply(gp4.predict,args=(drmpredx, return_std=True))							   #Generating prediction values for our test database
		# brm_pred, sigma1 = gp.predict(brmpredx, return_std=True)							   #Generating prediction values for our test database
		# crm_pred, sigma2 = gp.predict(crmpredx, return_std=True)							   #Generating prediction values for our test database
		# drm_pred, sigma3 = gp.predict(drmpredx, return_std=True)							   #Generating prediction values for our test database


		# # print("Observed value --- Predicted value---Absolute difference\n")

		# armde=np.c_[armpredy, arm_pred,abs(armpredy-arm_pred)]                                  #Generating an array to compare the original and predictedvalues
		# brmde=np.c_[brmpredy, brm_pred,abs(brmpredy-brm_pred)]
		# crmde=np.c_[crmpredy, crm_pred,abs(crmpredy-crm_pred)]
		# drmde=np.c_[drmpredy, drm_pred,abs(drmpredy-drm_pred)]

		# # print (rmde)

		# aerr=sqrt(mean_squared_error(armpredy, arm_pred))                                     #Generating an RMSE value for our testset
		# print("RMSE-A ",aerr)

		# berr=sqrt(mean_squared_error(brmpredy, brm_pred))                                     #Generating an RMSE value for our testset
		# print("RMSE-B ",berr)

		# cerr=sqrt(mean_squared_error(crmpredy, crm_pred))                                     #Generating an RMSE value for our testset
		# print("RMSE-C ",cerr)

		# derr=sqrt(mean_squared_error(drmpredy, drm_pred))                                     #Generating an RMSE value for our testset
		# print("RMSE-D ",derr)

		# # f.write("RMSE value is ")
		# # f.write(str(err))
		# # f.write("\n")                                     								   # Saving the values to the file. We save the RMSE followed by the comparision array
		# # f.write("Observed value --- Predicted value---Absolute difference\n")
		# # np.savetxt(f, rmde,fmt='%1.6f',delimiter=' ')