Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # coding: utf-8
- # In[41]:
- import numpy as np
- import pycuda.driver as cuda
- import pycuda.autoinit
- from pycuda.compiler import SourceModule
- import numpy
- from pycuda import gpuarray
- # In[42]:
- code = """
- #include <curand_kernel.h>
- const int nstates = %(NGENERATORS)s;
- __device__ curandState_t* states[nstates];
- __global__ void initkernel(int seed)
- {
- int tidx = threadIdx.x + blockIdx.x * blockDim.x;
- if(tidx < nstates)
- {
- curandState_t* s = new curandState_t;
- if (s != 0)
- {
- curand_init(seed, tidx, 0, s);
- }
- states[tidx] = s;
- }
- }
- __global__ void randfillkernel(float *values, int N)
- {
- int tidx = threadIdx.x + blockIdx.x * blockDim.x;
- if (tidx < nstates) {
- curandState_t s = *states[tidx];
- for(int i=tidx; i < N; i += blockDim.x * gridDim.x) {
- values[i] = curand_uniform(&s);
- }
- *states[tidx] = s;
- }
- }
- """
- # In[43]:
- code1="""
- #include<stdio.h>
- #include<math.h>
- __device__ float fitness_function(int d, float *x)
- {
- float sum=0;
- for(int i=0;i<d;i++)
- {
- sum += x[i]*x[i];
- }
- return sum;
- }
- __global__ void evaluate_particles(int d, int n, float *x, float *pValue, float *pBestValue, float *pBestPos)
- {
- int i=blockDim.x*blockIdx.x + threadIdx.x;
- if(i>=n)
- return;
- pValue[i] = fitness_function(d,&x[d*i]);
- if(pValue[i]<pBestValue[i])
- {
- pBestValue[i]=pValue[i];
- for (int j=0;j<d;j++)
- {
- printf("*");
- pBestPos[(d*i)+j]=x[(d*i)+j];
- }
- }
- }"""
- # In[44]:
- code2="""
- #include<stdio.h>
- #include<cuda_runtime.h>
- __global__ void calculate_localBest(int n, float *pBestValue, int *lBestIdx)
- {
- int tid=blockDim.x * blockIdx.x + threadIdx.x;
- if(tid>=n)
- return;
- int right = (tid==(n-1))? 0 : tid+1;
- int left = (tid == 0) ? (n-1) : tid-1;
- int lBestCandidate=tid;
- if(pBestValue[right]<pBestValue[lBestCandidate])
- lBestCandidate=right;
- if(pBestValue[left]<pBestValue[lBestCandidate])
- lBestCandidate=left;
- lBestIdx[tid]=lBestCandidate;
- //printf("l %d ",lBestIdx[tid]);
- }
- """
- # In[45]:
- code3="""
- #include<stdio.h>
- __global__ void updatePositionVelocity(int n, int d, float *x, float *velocity, float *pBestPos, int *lBestIdx,float *data,float min,float max)
- {
- int i=blockDim.x * blockIdx.x + threadIdx.x;
- if(i>=(n*d))
- return;
- float X=0.72984,r1=data[0],r2=data[1];
- float c1=2.05, c2=2.05;
- velocity[i]=X*(velocity[i] + (c1 * r1 * (pBestPos[i] - x[i])) + (c2* r2 * (lBestIdx[i%d] - x[i])));
- x[i]+=velocity[i];
- if(x[i]<min)
- {
- x[i]=min;
- velocity[i]*=-1;
- }
- if(x[i]>max)
- {
- x[i]=max;
- velocity[i]*=-1;
- }
- }
- """
- # In[46]:
- def pso(h_n=3,h_d=3,iterations=5,mini=-5.12, maxi=5.12):
- ## DIMENSION EQUAL TO NO_OF_PARTICLES * DIMENSIONS
- # Particle position array
- h_x=np.random.uniform(low=-5.12,high=5.12,size=h_n*h_d)
- h_x=h_x.astype(np.float32)
- # Particle Velocity
- h_velocity=np.zeros(h_n*h_d)
- h_velocity=h_velocity.astype(np.float32)
- # Personal best position
- h_pBestPos=np.empty(h_n*h_d)
- np.copyto(h_pBestPos,h_x)
- h_pBestPos=h_pBestPos.astype(np.float32)
- # DIMENSION EQUAL TO NO_OF_PARTICLES
- # fitness value of particle
- h_pValue=np.empty(h_n)
- h_pValue.fill(999)
- h_pValue=h_pValue.astype(numpy.float32)
- # Personal Best fitness value of particle
- h_pBestValue=np.empty(h_n)
- h_pBestValue.fill(999)
- h_pBestValue=h_pBestValue.astype(numpy.float32)
- # Local best Value
- h_lBestIdx=np.empty(h_n)
- h_lBestIdx.fill(999)
- # GPU allocation
- d_n = cuda.mem_alloc(32)#n
- d_d = cuda.mem_alloc(32)#d
- d_x = cuda.mem_alloc(h_x.nbytes)#x
- d_pValue = cuda.mem_alloc(h_pValue.nbytes)#pValue
- d_pBestValue = cuda.mem_alloc(h_pBestValue.nbytes)#pBestVal
- d_pBestPos=cuda.mem_alloc(h_pBestPos.nbytes)#pbestpos
- d_lBestIdx = cuda.mem_alloc(h_lBestIdx.nbytes)#lbest
- d_velocity = cuda.mem_alloc(h_velocity.nbytes)#velocity
- #Values copy
- cuda.memcpy_htod(d_x, h_x)
- cuda.memcpy_htod(d_pValue, h_pValue)
- cuda.memcpy_htod(d_pBestValue, h_pBestValue)
- cuda.memcpy_htod(d_pBestPos, h_pBestPos)
- cuda.memcpy_htod(d_lBestIdx, h_lBestIdx)
- cuda.memcpy_htod(d_velocity, h_velocity)
- #Formula
- B= (h_n + 31)/32
- #Loop
- #i=0
- for i in range(iterations):
- N = 2
- mod = SourceModule(code % { "NGENERATORS" : N }, no_extern_c=True)
- init_func = mod.get_function("_Z10initkerneli")
- fill_func = mod.get_function("_Z14randfillkernelPfi")
- seed = np.int32(i)
- nvalues = 2
- init_func(seed, block=(N,1,1), grid=(1,1,1))
- d_data = gpuarray.zeros(nvalues, dtype=np.float32)
- fill_func(d_data, np.int32(nvalues), block=(N,1,1), grid=(1,1,1))
- #Calculate fitness value
- mod1=SourceModule(code1)
- func1 = mod1.get_function("evaluate_particles")
- func1(np.int32(h_d), np.int32(h_n), d_x, d_pValue, d_pBestValue, d_pBestPos, block=(32,1,1),grid=(B,1))
- #Calculate Local Best
- mod2=SourceModule(code2)
- func2 = mod2.get_function("calculate_localBest")
- func2(np.int32(h_n), d_pBestValue, d_lBestIdx, block=(32,1,1),grid=(B,1))
- #Calculate updated velocity & Position
- mod3=SourceModule(code3)
- func3=mod3.get_function("updatePositionVelocity")
- func3(np.int32(h_n), np.int32(h_d), d_x, d_velocity, d_pBestPos, d_lBestIdx,d_data,np.float32(mini), np.float32(maxi),block=(h_d,1,1),grid=(h_n,1))
- #---------------------------------------------------------------------------------------------------------------------
- cuda.memcpy_dtoh(h_x, d_x)
- cuda.memcpy_dtoh(h_pBestValue, d_pBestValue)
- gbest=np.amin(h_pBestValue)
- print i," "
- print "h_x: ",h_x
- print"gbest:",gbest
- #Loop end
- cuda.memcpy_dtoh(h_pBestValue, d_pBestValue)
- cuda.memcpy_dtoh(h_x, d_x)
- gbest=np.amin(h_pBestValue)
- print"****************"
- print gbest;
- print "n",h_x
- # In[48]:
- pso(h_n=400,h_d=50,iterations=2000,mini=-100, maxi=100)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement