__global__ void addKernel(float *POSX, float *POSY, float *POSR, float *AX_M, float *AY_M) { int first = threadIdx.x; for (int next = 0; next < N; next++) { if (first != next) { int den; den = 10 * sqrt((POSX[next] - POSX[first])*(POSX[next] - POSX[first]) + (POSY[next] - POSY[first])*(POSY[next] - POSY[first])); AX_M[first] = (POSX[next] - POSX[first]) / den; AY_M[first] = (POSY[next] - POSY[first]) / den; } } POSR[first] += AX_M[first]; POSR[first + N] += AY_M[first]; } void GPU_Math() { float *CU_POSX, *CU_POSY, *CU_POSR, *CU_AX_M, *CU_AY_M; int size = N; cudaMalloc((void**)&CU_POSX, size); cudaMalloc((void**)&CU_POSY, size); cudaMalloc((void**)&CU_POSR, size*2); cudaMalloc((void**)&CU_AX_M, size); cudaMalloc((void**)&CU_AY_M, size); cudaMemcpy(CU_POSX, POSX, size, cudaMemcpyKind::cudaMemcpyHostToDevice); cudaMemcpy(CU_POSY, POSY, size, cudaMemcpyKind::cudaMemcpyHostToDevice); cudaMemcpy(CU_POSR, POSR, size*2, cudaMemcpyKind::cudaMemcpyHostToDevice); cudaMemcpy(CU_AX_M, AX_M, size, cudaMemcpyKind::cudaMemcpyHostToDevice); cudaMemcpy(CU_AY_M, AY_M, size, cudaMemcpyKind::cudaMemcpyHostToDevice); addKernel<<<1, size>>>(CU_POSX, CU_POSY, CU_POSR, CU_AX_M, CU_AY_M); cudaMemcpy(POSR, CU_POSR, size*2, cudaMemcpyKind::cudaMemcpyDeviceToHost); cudaMemcpy(AX_M, CU_AX_M, size, cudaMemcpyKind::cudaMemcpyDeviceToHost); cudaMemcpy(AY_M, CU_AY_M, size, cudaMemcpyKind::cudaMemcpyDeviceToHost); cudaFree(CU_POSX); cudaFree(CU_POSY); cudaFree(CU_POSR); cudaFree(CU_AX_M); cudaFree(CU_AY_M); }