Untitled

# coding: utf-8

# In[41]:


import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
import numpy
from pycuda import gpuarray


# In[42]:


code = """
    #include <curand_kernel.h>

    const int nstates = %(NGENERATORS)s;
    __device__ curandState_t* states[nstates];

    __global__ void initkernel(int seed)
    {
      int tidx = threadIdx.x + blockIdx.x * blockDim.x;

      if(tidx < nstates)
      {
          curandState_t* s = new curandState_t;
          if (s != 0)
          {
            curand_init(seed, tidx, 0, s);
          }
            states[tidx] = s;
        }
    }

    __global__ void randfillkernel(float *values, int N)
    {
        int tidx = threadIdx.x + blockIdx.x * blockDim.x;

        if (tidx < nstates) {
            curandState_t s = *states[tidx];
            for(int i=tidx; i < N; i += blockDim.x * gridDim.x) {
                values[i] = curand_uniform(&s);
            }
            *states[tidx] = s;
        }
    }
"""


# In[43]:


code1="""

#include<stdio.h>
#include<math.h>

__device__ float fitness_function(int d, float *x)
{

    float sum=0;
        for(int i=0;i<d;i++)
        {
            sum += x[i]*x[i];
        }
      return sum;
}


__global__ void evaluate_particles(int d, int n, float *x, float *pValue, float *pBestValue, float *pBestPos)
{
    int i=blockDim.x*blockIdx.x + threadIdx.x;

    if(i>=n)
    return;
    pValue[i] = fitness_function(d,&x[d*i]);

    if(pValue[i]<pBestValue[i])
    {
        pBestValue[i]=pValue[i];

        for (int j=0;j<d;j++)
        {
        printf("*");
            pBestPos[(d*i)+j]=x[(d*i)+j];

        }
    }
}"""


# In[44]:


code2="""
#include<stdio.h>
#include<cuda_runtime.h>

__global__ void calculate_localBest(int n, float *pBestValue, int *lBestIdx)
{
    int tid=blockDim.x * blockIdx.x + threadIdx.x;

    if(tid>=n)
    return;


    int right = (tid==(n-1))? 0 : tid+1;
    int left = (tid == 0)  ? (n-1) : tid-1;

    int lBestCandidate=tid;
    if(pBestValue[right]<pBestValue[lBestCandidate])
    lBestCandidate=right;

    if(pBestValue[left]<pBestValue[lBestCandidate])
    lBestCandidate=left;

    lBestIdx[tid]=lBestCandidate;

    //printf("l %d ",lBestIdx[tid]);


}
"""


# In[45]:


code3="""
#include<stdio.h>

__global__ void updatePositionVelocity(int n, int d, float *x, float *velocity, float *pBestPos, int *lBestIdx,float *data,float min,float max)
{
    int i=blockDim.x * blockIdx.x + threadIdx.x;

    if(i>=(n*d))
    return;

    float X=0.72984,r1=data[0],r2=data[1];
    float c1=2.05, c2=2.05;

    velocity[i]=X*(velocity[i] + (c1 * r1 * (pBestPos[i] - x[i])) + (c2* r2 * (lBestIdx[i%d] - x[i])));
    x[i]+=velocity[i];

    if(x[i]<min)
    {
        x[i]=min;
        velocity[i]*=-1;
    }


    if(x[i]>max)
    {
    x[i]=max;
    velocity[i]*=-1;
    }


}

"""


# In[46]:


def pso(h_n=3,h_d=3,iterations=5,mini=-5.12, maxi=5.12):

    ## DIMENSION EQUAL TO NO_OF_PARTICLES * DIMENSIONS
    # Particle position array
    h_x=np.random.uniform(low=-5.12,high=5.12,size=h_n*h_d)
    h_x=h_x.astype(np.float32)

    # Particle Velocity
    h_velocity=np.zeros(h_n*h_d)
    h_velocity=h_velocity.astype(np.float32)

    # Personal best position
    h_pBestPos=np.empty(h_n*h_d)
    np.copyto(h_pBestPos,h_x)
    h_pBestPos=h_pBestPos.astype(np.float32)

    # DIMENSION EQUAL TO NO_OF_PARTICLES
    # fitness value of particle
    h_pValue=np.empty(h_n)
    h_pValue.fill(999)
    h_pValue=h_pValue.astype(numpy.float32)

    # Personal Best fitness value of particle
    h_pBestValue=np.empty(h_n)
    h_pBestValue.fill(999)
    h_pBestValue=h_pBestValue.astype(numpy.float32)

    # Local best Value
    h_lBestIdx=np.empty(h_n)
    h_lBestIdx.fill(999)


    # GPU allocation

    d_n = cuda.mem_alloc(32)#n
    d_d = cuda.mem_alloc(32)#d

    d_x = cuda.mem_alloc(h_x.nbytes)#x
    d_pValue = cuda.mem_alloc(h_pValue.nbytes)#pValue
    d_pBestValue = cuda.mem_alloc(h_pBestValue.nbytes)#pBestVal
    d_pBestPos=cuda.mem_alloc(h_pBestPos.nbytes)#pbestpos
    d_lBestIdx = cuda.mem_alloc(h_lBestIdx.nbytes)#lbest
    d_velocity = cuda.mem_alloc(h_velocity.nbytes)#velocity


     #Values copy
    cuda.memcpy_htod(d_x, h_x)
    cuda.memcpy_htod(d_pValue, h_pValue)
    cuda.memcpy_htod(d_pBestValue, h_pBestValue)
    cuda.memcpy_htod(d_pBestPos, h_pBestPos)
    cuda.memcpy_htod(d_lBestIdx, h_lBestIdx)
    cuda.memcpy_htod(d_velocity, h_velocity)

    #Formula
    B= (h_n + 31)/32

    #Loop
    #i=0
    for i in range(iterations):
        N = 2
        mod = SourceModule(code % { "NGENERATORS" : N }, no_extern_c=True)
        init_func = mod.get_function("_Z10initkerneli")
        fill_func = mod.get_function("_Z14randfillkernelPfi")

        seed = np.int32(i)
        nvalues = 2
        init_func(seed, block=(N,1,1), grid=(1,1,1))
        d_data = gpuarray.zeros(nvalues, dtype=np.float32)
        fill_func(d_data, np.int32(nvalues), block=(N,1,1), grid=(1,1,1))

        #Calculate fitness value
        mod1=SourceModule(code1)
        func1 = mod1.get_function("evaluate_particles")
        func1(np.int32(h_d), np.int32(h_n), d_x, d_pValue, d_pBestValue, d_pBestPos, block=(32,1,1),grid=(B,1))

    #Calculate Local Best
        mod2=SourceModule(code2)
        func2 = mod2.get_function("calculate_localBest")
        func2(np.int32(h_n), d_pBestValue, d_lBestIdx, block=(32,1,1),grid=(B,1))

    #Calculate updated velocity & Position
        mod3=SourceModule(code3)
        func3=mod3.get_function("updatePositionVelocity")
        func3(np.int32(h_n), np.int32(h_d), d_x, d_velocity, d_pBestPos, d_lBestIdx,d_data,np.float32(mini), np.float32(maxi),block=(h_d,1,1),grid=(h_n,1))
    #---------------------------------------------------------------------------------------------------------------------
    cuda.memcpy_dtoh(h_x, d_x)
    cuda.memcpy_dtoh(h_pBestValue, d_pBestValue)
    gbest=np.amin(h_pBestValue)
    print i," "
    print "h_x: ",h_x
    print"gbest:",gbest

    #Loop end
    cuda.memcpy_dtoh(h_pBestValue, d_pBestValue)
    cuda.memcpy_dtoh(h_x, d_x)
    gbest=np.amin(h_pBestValue)
    print"****************"
    print gbest;
    print "n",h_x


# In[48]:


pso(h_n=400,h_d=50,iterations=2000,mini=-100, maxi=100)