Advertisement
Guest User

Untitled

a guest
Sep 21st, 2017
65
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.16 KB | None | 0 0
  1. # coding: utf-8
  2.  
  3. # In[41]:
  4.  
  5.  
  6. import numpy as np
  7. import pycuda.driver as cuda
  8. import pycuda.autoinit
  9. from pycuda.compiler import SourceModule
  10. import numpy
  11. from pycuda import gpuarray
  12.  
  13.  
  14. # In[42]:
  15.  
  16.  
  17. code = """
  18. #include <curand_kernel.h>
  19.  
  20. const int nstates = %(NGENERATORS)s;
  21. __device__ curandState_t* states[nstates];
  22.  
  23. __global__ void initkernel(int seed)
  24. {
  25. int tidx = threadIdx.x + blockIdx.x * blockDim.x;
  26.  
  27. if(tidx < nstates)
  28. {
  29. curandState_t* s = new curandState_t;
  30. if (s != 0)
  31. {
  32. curand_init(seed, tidx, 0, s);
  33. }
  34. states[tidx] = s;
  35. }
  36. }
  37.  
  38. __global__ void randfillkernel(float *values, int N)
  39. {
  40. int tidx = threadIdx.x + blockIdx.x * blockDim.x;
  41.  
  42. if (tidx < nstates) {
  43. curandState_t s = *states[tidx];
  44. for(int i=tidx; i < N; i += blockDim.x * gridDim.x) {
  45. values[i] = curand_uniform(&s);
  46. }
  47. *states[tidx] = s;
  48. }
  49. }
  50. """
  51.  
  52.  
  53. # In[43]:
  54.  
  55.  
  56. code1="""
  57.  
  58. #include<stdio.h>
  59. #include<math.h>
  60.  
  61. __device__ float fitness_function(int d, float *x)
  62. {
  63.  
  64. float sum=0;
  65. for(int i=0;i<d;i++)
  66. {
  67. sum += x[i]*x[i];
  68. }
  69. return sum;
  70. }
  71.  
  72.  
  73. __global__ void evaluate_particles(int d, int n, float *x, float *pValue, float *pBestValue, float *pBestPos)
  74. {
  75. int i=blockDim.x*blockIdx.x + threadIdx.x;
  76.  
  77. if(i>=n)
  78. return;
  79. pValue[i] = fitness_function(d,&x[d*i]);
  80.  
  81. if(pValue[i]<pBestValue[i])
  82. {
  83. pBestValue[i]=pValue[i];
  84.  
  85. for (int j=0;j<d;j++)
  86. {
  87. printf("*");
  88. pBestPos[(d*i)+j]=x[(d*i)+j];
  89.  
  90. }
  91. }
  92. }"""
  93.  
  94.  
  95. # In[44]:
  96.  
  97.  
  98. code2="""
  99. #include<stdio.h>
  100. #include<cuda_runtime.h>
  101.  
  102. __global__ void calculate_localBest(int n, float *pBestValue, int *lBestIdx)
  103. {
  104. int tid=blockDim.x * blockIdx.x + threadIdx.x;
  105.  
  106. if(tid>=n)
  107. return;
  108.  
  109.  
  110. int right = (tid==(n-1))? 0 : tid+1;
  111. int left = (tid == 0) ? (n-1) : tid-1;
  112.  
  113. int lBestCandidate=tid;
  114. if(pBestValue[right]<pBestValue[lBestCandidate])
  115. lBestCandidate=right;
  116.  
  117. if(pBestValue[left]<pBestValue[lBestCandidate])
  118. lBestCandidate=left;
  119.  
  120. lBestIdx[tid]=lBestCandidate;
  121.  
  122. //printf("l %d ",lBestIdx[tid]);
  123.  
  124.  
  125. }
  126. """
  127.  
  128.  
  129. # In[45]:
  130.  
  131.  
  132. code3="""
  133. #include<stdio.h>
  134.  
  135. __global__ void updatePositionVelocity(int n, int d, float *x, float *velocity, float *pBestPos, int *lBestIdx,float *data,float min,float max)
  136. {
  137. int i=blockDim.x * blockIdx.x + threadIdx.x;
  138.  
  139. if(i>=(n*d))
  140. return;
  141.  
  142. float X=0.72984,r1=data[0],r2=data[1];
  143. float c1=2.05, c2=2.05;
  144.  
  145. velocity[i]=X*(velocity[i] + (c1 * r1 * (pBestPos[i] - x[i])) + (c2* r2 * (lBestIdx[i%d] - x[i])));
  146. x[i]+=velocity[i];
  147.  
  148. if(x[i]<min)
  149. {
  150. x[i]=min;
  151. velocity[i]*=-1;
  152. }
  153.  
  154.  
  155. if(x[i]>max)
  156. {
  157. x[i]=max;
  158. velocity[i]*=-1;
  159. }
  160.  
  161.  
  162. }
  163.  
  164. """
  165.  
  166.  
  167. # In[46]:
  168.  
  169.  
  170. def pso(h_n=3,h_d=3,iterations=5,mini=-5.12, maxi=5.12):
  171.  
  172. ## DIMENSION EQUAL TO NO_OF_PARTICLES * DIMENSIONS
  173. # Particle position array
  174. h_x=np.random.uniform(low=-5.12,high=5.12,size=h_n*h_d)
  175. h_x=h_x.astype(np.float32)
  176.  
  177. # Particle Velocity
  178. h_velocity=np.zeros(h_n*h_d)
  179. h_velocity=h_velocity.astype(np.float32)
  180.  
  181. # Personal best position
  182. h_pBestPos=np.empty(h_n*h_d)
  183. np.copyto(h_pBestPos,h_x)
  184. h_pBestPos=h_pBestPos.astype(np.float32)
  185.  
  186. # DIMENSION EQUAL TO NO_OF_PARTICLES
  187. # fitness value of particle
  188. h_pValue=np.empty(h_n)
  189. h_pValue.fill(999)
  190. h_pValue=h_pValue.astype(numpy.float32)
  191.  
  192. # Personal Best fitness value of particle
  193. h_pBestValue=np.empty(h_n)
  194. h_pBestValue.fill(999)
  195. h_pBestValue=h_pBestValue.astype(numpy.float32)
  196.  
  197. # Local best Value
  198. h_lBestIdx=np.empty(h_n)
  199. h_lBestIdx.fill(999)
  200.  
  201.  
  202. # GPU allocation
  203.  
  204. d_n = cuda.mem_alloc(32)#n
  205. d_d = cuda.mem_alloc(32)#d
  206.  
  207. d_x = cuda.mem_alloc(h_x.nbytes)#x
  208. d_pValue = cuda.mem_alloc(h_pValue.nbytes)#pValue
  209. d_pBestValue = cuda.mem_alloc(h_pBestValue.nbytes)#pBestVal
  210. d_pBestPos=cuda.mem_alloc(h_pBestPos.nbytes)#pbestpos
  211. d_lBestIdx = cuda.mem_alloc(h_lBestIdx.nbytes)#lbest
  212. d_velocity = cuda.mem_alloc(h_velocity.nbytes)#velocity
  213.  
  214.  
  215. #Values copy
  216. cuda.memcpy_htod(d_x, h_x)
  217. cuda.memcpy_htod(d_pValue, h_pValue)
  218. cuda.memcpy_htod(d_pBestValue, h_pBestValue)
  219. cuda.memcpy_htod(d_pBestPos, h_pBestPos)
  220. cuda.memcpy_htod(d_lBestIdx, h_lBestIdx)
  221. cuda.memcpy_htod(d_velocity, h_velocity)
  222.  
  223. #Formula
  224. B= (h_n + 31)/32
  225.  
  226. #Loop
  227. #i=0
  228. for i in range(iterations):
  229. N = 2
  230. mod = SourceModule(code % { "NGENERATORS" : N }, no_extern_c=True)
  231. init_func = mod.get_function("_Z10initkerneli")
  232. fill_func = mod.get_function("_Z14randfillkernelPfi")
  233.  
  234. seed = np.int32(i)
  235. nvalues = 2
  236. init_func(seed, block=(N,1,1), grid=(1,1,1))
  237. d_data = gpuarray.zeros(nvalues, dtype=np.float32)
  238. fill_func(d_data, np.int32(nvalues), block=(N,1,1), grid=(1,1,1))
  239.  
  240. #Calculate fitness value
  241. mod1=SourceModule(code1)
  242. func1 = mod1.get_function("evaluate_particles")
  243. func1(np.int32(h_d), np.int32(h_n), d_x, d_pValue, d_pBestValue, d_pBestPos, block=(32,1,1),grid=(B,1))
  244.  
  245. #Calculate Local Best
  246. mod2=SourceModule(code2)
  247. func2 = mod2.get_function("calculate_localBest")
  248. func2(np.int32(h_n), d_pBestValue, d_lBestIdx, block=(32,1,1),grid=(B,1))
  249.  
  250. #Calculate updated velocity & Position
  251. mod3=SourceModule(code3)
  252. func3=mod3.get_function("updatePositionVelocity")
  253. func3(np.int32(h_n), np.int32(h_d), d_x, d_velocity, d_pBestPos, d_lBestIdx,d_data,np.float32(mini), np.float32(maxi),block=(h_d,1,1),grid=(h_n,1))
  254. #---------------------------------------------------------------------------------------------------------------------
  255. cuda.memcpy_dtoh(h_x, d_x)
  256. cuda.memcpy_dtoh(h_pBestValue, d_pBestValue)
  257. gbest=np.amin(h_pBestValue)
  258. print i," "
  259. print "h_x: ",h_x
  260. print"gbest:",gbest
  261.  
  262. #Loop end
  263. cuda.memcpy_dtoh(h_pBestValue, d_pBestValue)
  264. cuda.memcpy_dtoh(h_x, d_x)
  265. gbest=np.amin(h_pBestValue)
  266. print"****************"
  267. print gbest;
  268. print "n",h_x
  269.  
  270.  
  271.  
  272.  
  273. # In[48]:
  274.  
  275.  
  276. pso(h_n=400,h_d=50,iterations=2000,mini=-100, maxi=100)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement