__global__ void addKernel(float* cuPOSX, float* cuPOSY, float* cuPOSR, float *cuAX_M, float *cuAY_M) { int first = threadIdx.x; for (int next = 0; next < N; next++) { if (first != next) { cuAX_M[first] = 0.1 * (cuPOSX[next] - cuPOSX[first]) / sqrt((cuPOSX[next] - cuPOSX[first])*(cuPOSX[next] - cuPOSX[first]) + (cuPOSY[next] - cuPOSY[first])*(cuPOSY[next] - cuPOSY[first])); cuAY_M[first] = 0.1 * (cuPOSY[next] - cuPOSY[first]) / sqrt((cuPOSX[next] - cuPOSX[first])*(cuPOSX[next] - cuPOSX[first]) + (cuPOSY[next] - cuPOSY[first])*(cuPOSY[next] - cuPOSY[first])); } } cuPOSR[first] += cuAX_M[first]; cuPOSR[first + N] += cuAY_M[first]; } void GPU_Math() { float *CU_POSX, *CU_POSY, *CU_POSR, *CU_AX_M, *CU_AY_M; int size = N; cudaMalloc((void**)&CU_POSX, size); cudaMalloc((void**)&CU_POSY, size); cudaMalloc((void**)&CU_POSR, *2); cudaMalloc((void**)&CU_AX_M, size); cudaMalloc((void**)&CU_AY_M, size); cudaMemcpy(CU_POSX, POSX, size, cudaMemcpyKind::cudaMemcpyHostToDevice); cudaMemcpy(CU_POSY, POSY, size, cudaMemcpyKind::cudaMemcpyHostToDevice); cudaMemcpy(CU_POSR, POSR, size*2, cudaMemcpyKind::cudaMemcpyHostToDevice); cudaMemcpy(CU_AX_M, AX_M, size, cudaMemcpyKind::cudaMemcpyHostToDevice); cudaMemcpy(CU_AY_M, AY_M, size, cudaMemcpyKind::cudaMemcpyHostToDevice); addKernel<<<1, size>>>(CU_POSX, CU_POSY, CU_POSR, CU_AX_M, CU_AY_M); cudaMemcpy(POSR, CU_POSR, size + size, cudaMemcpyKind::cudaMemcpyDeviceToHost); cudaMemcpy(AX_M, CU_AX_M, size, cudaMemcpyKind::cudaMemcpyDeviceToHost); cudaMemcpy(AY_M, CU_AY_M, size, cudaMemcpyKind::cudaMemcpyDeviceToHost); cudaFree(CU_POSX); cudaFree(CU_POSY); cudaFree(CU_POSR); cudaFree(CU_AX_M); cudaFree(CU_AY_M); }