Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- * Architektura procesoru (ACH 2014)
- * Projekt c. 2 (cuda)
- * Login: xlogin00
- */
- #include <sys/time.h>
- #include <cstdio>
- #include <cmath>
- #include "nbody.h"
- int main(int argc, char **argv)
- {
- FILE *fp;
- struct timeval t1, t2;
- int N;
- float dt;
- int steps;
- int thr_blc;
- // parametry
- if (argc != 7)
- {
- printf("Usage: nbody <N> <dt> <steps> <thr/blc> <input> <output>\n");
- exit(1);
- }
- N = atoi(argv[1]);
- dt = atof(argv[2]);
- steps = atoi(argv[3]);
- thr_blc = atoi(argv[4]);
- printf("N: %d\n", N);
- printf("dt: %f\n", dt);
- printf("steps: %d\n", steps);
- printf("threads/block: %d\n", thr_blc);
- // alokace pameti na CPU
- t_particles particles_cpu;
- // ZDE DOPLNTE ALOKACI PAMETI NA CPU
- particles_cpu.pos_x = (float *)malloc(N*sizeof(float));
- particles_cpu.pos_y = (float *)malloc(N*sizeof(float));
- particles_cpu.pos_z = (float *)malloc(N*sizeof(float));
- particles_cpu.vel_x = (float *)malloc(N*sizeof(float));
- particles_cpu.vel_y = (float *)malloc(N*sizeof(float));
- particles_cpu.vel_z = (float *)malloc(N*sizeof(float));
- particles_cpu.weight = (float*)malloc(N*sizeof(float));
- // nacteni castic ze souboru
- fp = fopen(argv[5], "r");
- if (fp == NULL)
- {
- printf("Can't open file %s!\n", argv[2]);
- exit(1);
- }
- particles_read(fp, particles_cpu, N);
- fclose(fp);
- printf("%f",particles_cpu.pos_x[0]);
- t_particles particles_gpu[2];
- for (int i = 0; i < 2; i++)
- {
- float *pos_x;
- float *pos_y;
- float *pos_z;
- float *vel_x;
- float *vel_y;
- float *vel_z;
- float *weight;
- // alokace pameti na GPU
- cudaMalloc((void**) &(pos_x), sizeof(float)*N);
- cudaMalloc((void**) &(pos_y), sizeof(float)*N);
- cudaMalloc((void**) &(pos_z), sizeof(float)*N);
- cudaMalloc((void**) &(vel_x), sizeof(float)*N);
- cudaMalloc((void**) &(vel_y), sizeof(float)*N);
- cudaMalloc((void**) &(vel_z), sizeof(float)*N);
- cudaMalloc((void**) &(weight), sizeof(float)*N);
- // kopirovani castic na GPU
- cudaMemcpy(pos_x, particles_cpu.pos_x, sizeof(float)*N, cudaMemcpyHostToDevice);
- cudaMemcpy(pos_y, particles_cpu.pos_y, sizeof(float)*N, cudaMemcpyHostToDevice);
- cudaMemcpy(pos_z, particles_cpu.pos_z, sizeof(float)*N, cudaMemcpyHostToDevice);
- cudaMemcpy(vel_x, particles_cpu.vel_x, sizeof(float)*N, cudaMemcpyHostToDevice);
- cudaMemcpy(vel_y, particles_cpu.vel_y, sizeof(float)*N, cudaMemcpyHostToDevice);
- cudaMemcpy(vel_z, particles_cpu.vel_z, sizeof(float)*N, cudaMemcpyHostToDevice);
- cudaMemcpy(weight, particles_cpu.weight, sizeof(float)*N, cudaMemcpyHostToDevice);
- particles_gpu[i].pos_x = pos_x;
- particles_gpu[i].pos_y = pos_y;
- particles_gpu[i].pos_z = pos_z;
- particles_gpu[i].vel_x = vel_x;
- particles_gpu[i].vel_y = vel_y;
- particles_gpu[i].vel_z = vel_z;
- particles_gpu[i].weight = weight;
- }
- // vypocet
- gettimeofday(&t1, 0);
- for (int s = 0; s < steps; s++)
- {
- // ZDE DOPLNTE SPUSTENI KERNELU
- int c = s%2;
- int d = (s+1)%2;
- //int block_size = 1024;
- int blockSize; // The launch configurator returned block size
- int minGridSize; // The minimum grid size needed to achieve the maximum occupancy for a full device launch
- cudaOccupancyMaxPotentialBlockSize(&minGridSize, &blockSize, particles_simulate, 0, N);
- int n_blocks = N/blockSize + (N%blockSize == 0 ? 0:1);
- particles_simulate <<<1, thr_blc>>>(particles_gpu[c],particles_gpu[d],N,dt);
- }
- // ZDE DOPLNTE SYNCHRONIZACI
- cudaDeviceSynchronize();
- gettimeofday(&t2, 0);
- // cas
- double t = (1000000.0 * (t2.tv_sec - t1.tv_sec) + t2.tv_usec - t1.tv_usec) / 1000000.0;
- printf("Time: %f s\n", t);
- // kpirovani castic zpet na CPU
- // ZDE DOPLNTE KOPIROVANI DAT Z GPU NA CPU
- int d = (steps+1)%2;
- cudaMemcpy(particles_cpu.pos_x, particles_gpu[d].pos_x, sizeof(int)*N, cudaMemcpyDeviceToHost);
- printf("%f",particles_cpu.pos_x[0]);
- cudaMemcpy(particles_cpu.pos_y, particles_gpu[d].pos_y, sizeof(int)*N, cudaMemcpyDeviceToHost);
- cudaMemcpy(particles_cpu.pos_z, particles_gpu[d].pos_z, sizeof(int)*N, cudaMemcpyDeviceToHost);
- cudaMemcpy(particles_cpu.vel_x, particles_gpu[d].vel_x, sizeof(int)*N, cudaMemcpyDeviceToHost);
- cudaMemcpy(particles_cpu.vel_y, particles_gpu[d].vel_y, sizeof(int)*N, cudaMemcpyDeviceToHost);
- cudaMemcpy(particles_cpu.vel_z, particles_gpu[d].vel_z, sizeof(int)*N, cudaMemcpyDeviceToHost);
- cudaMemcpy(particles_cpu.weight, particles_gpu[d].weight, sizeof(int)*N, cudaMemcpyDeviceToHost);
- // ulozeni castic do souboru
- fp = fopen(argv[6], "w");
- if (fp == NULL)
- {
- printf("Can't open file %s!\n", argv[6]);
- exit(1);
- }
- particles_write(fp, particles_cpu, N);
- fclose(fp);
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement