Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include "cuda_runtime.h"
- #include "device_launch_parameters.h"
- #include <omp.h>
- #include <GL/glut.h>
- #include <cmath>
- #include <iostream>
- #include <ctime>
- #include <cstring>
- #include <cstdio>
- const int N = 8000;
- const int h = 1000;
- const int w = 1000;
- //For CUDA
- const int blocksize = 500; //1024 max
- int nblocks = N / blocksize;
- int button = 1;
- float *CU_POSM, *CU_POSR, *CU_POSX, *CU_POSY, *CU_POSZ;
- //For rotate
- float rot_x = 0, rot_y = 0, x_angle = 0, y_angle = 0;
- //Point coord
- float POSX[N], POSY[N], POSZ[N], float POSR[N * 3];
- //Point mass
- float POSM[N];
- void initialization()
- {
- glClearColor(0, 0, 0, 1);
- glMatrixMode(GL_MODELVIEW);
- glLoadIdentity();
- glOrtho(-1000, 1000, -1000, 1000, -1000, 1000);
- }
- void display()
- {
- //FPS meter
- static float fps, time_f, timebase = 0;
- static int frames = 0;
- static char res[5 + 31] = "FPS: ";
- glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
- glEnable(GL_DEPTH_TEST);
- glPushMatrix();
- glRotatef(x_angle, 1.0, 0.0, 0.0); //Rot axis X
- glRotatef(y_angle, 0.0, 1.0, 0.0); //Rot axis Y
- glBegin(GL_POINTS);
- for (int draw = 0; draw < N; draw++) {
- glColor3f(1, 1, 1);
- glVertex3f(POSX[draw], POSY[draw], POSZ[draw]);
- }
- glEnd();
- glBegin(GL_LINES);
- //Axis X
- glColor3f(1, 0, 0);
- glVertex3f(-500.0, -500.0, -500.0);
- glVertex3f(-250.0, -500.0, -500.0);
- //Axis Y
- glColor3f(0, 1, 0);
- glVertex3f(-500.0, -500.0, -500.0);
- glVertex3f(-500.0, -250.0, -500.0);
- //Axis Z
- glColor3f(0, 0, 1);
- glVertex3f(-500.0, -500.0, -500.0);
- glVertex3f(-500.0, -500.0, -250.0);
- glEnd();
- glPopMatrix();
- glDisable(GL_DEPTH_TEST);
- glutSwapBuffers();
- //FPS calc + name & display on window
- frames++;
- time_f = glutGet(GLUT_ELAPSED_TIME);
- if (time_f - timebase >= 1000)
- {
- fps = frames*1000.0 / (time_f - timebase);
- timebase = time_f;
- frames = 0;
- sprintf(res, "Nbody karasiki V2.5 | FPS: %.3f", fps);
- glutSetWindowTitle(res);
- }
- }
- //CPU_OpenMP
- void OMP_Math()
- {
- float AX_M, AY_M, AZ_M, det_X, det_Y, det_Z, den;
- #pragma omp parallel for private(den, AX_M, AY_M, AZ_M)
- for (int first = 0; first < N; first++) {
- AX_M = 0;
- AY_M = 0;
- AZ_M = 0;
- for (int next = 0; next < N; next++) {
- if ((first != next) && ((POSX[next] != POSX[first]) || (POSY[next] != POSY[first]) || (POSZ[next] != POSZ[first]))) {
- det_X = POSX[next] - POSX[first];
- det_Y = POSY[next] - POSY[first];
- det_Z = POSZ[next] - POSZ[first];
- den = (POSM[next] * POSM[first]) / (100000 * pow((det_X*det_X + det_Y*det_Y + det_Z*det_Z), 3 / 2));
- AX_M += det_X * den;
- AY_M += det_Y * den;
- AZ_M += det_Z * den;
- }
- }
- POSR[first] += AX_M;
- POSR[first + N] += AY_M;
- POSR[first + N + N] += AZ_M;
- POSX[first] += POSR[first];
- POSY[first] += POSR[first + N];
- POSZ[first] += POSR[first + N + N];
- }
- }
- //CUDA_Device
- __global__ void addKernel(float *POSM, float *POSX, float *POSY, float *POSZ, float *POSR)
- {
- float AX_M, AY_M, AZ_M, det_X, det_Y, det_Z, den;
- int first = threadIdx.x + blockIdx.x * blockDim.x;
- AX_M = 0;
- AY_M = 0;
- AZ_M = 0;
- for (int next = 0; next < N; next++) {
- if ((first != next) && ((POSX[next] != POSX[first]) || (POSY[next] != POSY[first]) || (POSZ[next] != POSZ[first]))) {
- det_X = POSX[next] - POSX[first];
- det_Y = POSY[next] - POSY[first];
- det_Z = POSZ[next] - POSZ[first];
- den = (POSM[next] * POSM[first]) / (100000 * pow((det_X*det_X + det_Y*det_Y + det_Z*det_Z), 3 / 2));
- AX_M += det_X * den;
- AY_M += det_Y * den;
- AZ_M += det_Z * den;
- }
- }
- POSR[first] += AX_M;
- POSR[first + N] += AY_M;
- POSR[first + N + N] += AZ_M;
- POSX[first] += POSR[first];
- POSY[first] += POSR[first + N];
- POSZ[first] += POSR[first + N + N];
- }
- //CUDA_Host
- void GPU_Math()
- {
- cudaMalloc((void**)&CU_POSM, N * sizeof(float));
- cudaMalloc((void**)&CU_POSX, N * sizeof(float));
- cudaMalloc((void**)&CU_POSY, N * sizeof(float));
- cudaMalloc((void**)&CU_POSZ, N * sizeof(float));
- cudaMalloc((void**)&CU_POSR, 3 * N * sizeof(float));
- cudaMemcpy(CU_POSM, POSM, N * sizeof(float), cudaMemcpyHostToDevice);
- cudaMemcpy(CU_POSX, POSX, N * sizeof(float), cudaMemcpyHostToDevice);
- cudaMemcpy(CU_POSY, POSY, N * sizeof(float), cudaMemcpyHostToDevice);
- cudaMemcpy(CU_POSZ, POSZ, N * sizeof(float), cudaMemcpyHostToDevice);
- cudaMemcpy(CU_POSR, POSR, 3 * N * sizeof(float), cudaMemcpyHostToDevice);
- addKernel <<<nblocks, blocksize>>>(CU_POSM, CU_POSX, CU_POSY, CU_POSZ, CU_POSR);
- cudaMemcpy(POSX, CU_POSX, N * sizeof(float), cudaMemcpyDeviceToHost);
- cudaMemcpy(POSY, CU_POSY, N * sizeof(float), cudaMemcpyDeviceToHost);
- cudaMemcpy(POSZ, CU_POSZ, N * sizeof(float), cudaMemcpyDeviceToHost);
- cudaMemcpy(POSR, CU_POSR, 3 * N * sizeof(float), cudaMemcpyDeviceToHost);
- cudaFree(CU_POSM);
- cudaFree(CU_POSX);
- cudaFree(CU_POSY);
- cudaFree(CU_POSZ);
- cudaFree(CU_POSR);
- }
- void Timer(int)
- {
- //CPU or GPU calc
- if (button == 1)
- GPU_Math();
- else
- OMP_Math();
- //Draw func & call build new frame
- display();
- glutTimerFunc(0, Timer, 0);
- }
- //Point generator
- void characters()
- {
- for (int i = 0; i < N; i++) {
- POSX[i] = rand() % 1000 - 500;
- POSY[i] = rand() % 1000 - 500;
- POSZ[i] = rand() % 1000 - 500;
- POSM[i] = rand() % 100 + 1;
- for (int j = 0; j < i; j++) {
- if ((i != j) && (POSX[i] == POSX[j]) && (POSY[i] == POSY[j]) && (POSZ[i] == POSZ[j]))
- i--;
- }
- }
- }
- //Select GPU(1) or CPU(2)
- void keyboard(unsigned char key, int x, int y)
- {
- switch (key)
- {
- case '1': button = 1;
- break;
- case '2': button = 2;
- break;
- }
- }
- void MouseMove(int a, int b)
- {
- glutSetCursor(GLUT_CURSOR_RIGHT_ARROW);
- }
- //Mouse rotation
- void MousePressedMove(int ay, int ax)
- {
- glutSetCursor(GLUT_CURSOR_NONE);
- int x_center = (int) h/2, y_center = (int) w/2;
- if (ax != x_center || ay != y_center){
- if (x_center < ax)
- x_angle += 6;
- if (x_center > ax)
- x_angle -= 6;
- if (y_center < ay)
- y_angle += 6;
- if (y_center > ay)
- y_angle -= 6;
- glutWarpPointer(x_center, y_center);
- }
- }
- int main(int argc, char **argv)
- {
- srand(time(0));
- setlocale(LC_ALL, "russian");
- //Call generator
- characters();
- //CUDA device init
- int nDevices;
- cudaGetDeviceCount(&nDevices);
- for (int i = 0; i < nDevices; i++) {
- cudaDeviceProp prop;
- cudaGetDeviceProperties(&prop, i);
- std::cout << "GPU # " << i;
- std::cout << " - " << prop.name;
- }
- std::cout << "\n\nN: " << N << std::endl << std::endl;
- std::cout << "[1] -- Select GPU\n";
- std::cout << "[2] -- Select CPU\n";
- std::cout << "[Move + click] -- Rotate" << std::endl << std::endl;
- //For freeglut
- glutInit(&argc, argv);
- glutInitDisplayMode(GLUT_RGB | GLUT_SINGLE);
- glutInitWindowPosition(100, 100);
- glutInitWindowSize(h, w);
- glutCreateWindow("");
- glutDisplayFunc(display);
- glutTimerFunc(0, Timer, 0);
- glutKeyboardFunc(keyboard);
- glutPassiveMotionFunc(MouseMove);
- glutMotionFunc(MousePressedMove);
- initialization();
- glutMainLoop();
- return 0;
- }
- /*
- N = 8000
- i5-3450 ~1.66 fps
- GTX 1050Ti ~79 fps
- */
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement