Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include "cuda_runtime.h"
- #include "device_launch_parameters.h"
- #include<stdio.h>
- #include<chrono>
- #include<iostream>
- using namespace std::chrono;
- using namespace std;
- __global__
- void check_kernel(unsigned long long int* number, bool* prime_device, int counter) {
- unsigned long long int i = blockIdx.x*blockDim.x + threadIdx.x + counter;
- if (*number % i == 0 && i < *number)
- {
- *prime_device = false;
- }
- }
- unsigned long long int get_number() {
- unsigned long long int number;
- cout << "Give the program a number to check: ";
- cin >> number;
- return number;
- }
- bool check_non_parallel(unsigned long long int number) {
- long double square_root = sqrt(number);
- bool prime = true;
- high_resolution_clock::time_point start = steady_clock::now();
- for (unsigned long long int i = 2; i <= square_root; i++) {
- if (number % i == 0) {
- prime = false;
- break;
- }
- }
- high_resolution_clock::time_point end = steady_clock::now();
- auto diff = duration_cast<microseconds>(end - start);
- cout << "NON-Parallel (CPU) checking took: " << diff.count() << " microseconds." << endl;
- return prime;
- }
- bool check_parallel(unsigned long long int number, bool prime) {
- cudaEvent_t start, stop;
- unsigned long long int* number_device;
- number_device = &number;
- bool* prime_device;
- prime_device = ′
- int threadsperblock = 256, counter = 0;
- int N = 2048;
- int blockspergrid = (N + threadsperblock - 1) / threadsperblock;
- float time;
- long double square_root = sqrt(number);
- cudaEventCreate(&start);
- cudaEventRecord(start, 0);
- while (counter <= square_root)
- {
- check_kernel <<<blockspergrid, threadsperblock >>> (number_device, prime_device, counter);
- counter += threadsperblock * blockspergrid;
- }
- cudaEventCreate(&stop);
- cudaEventRecord(stop, 0);
- cudaEventSynchronize(stop);
- cudaEventElapsedTime(&time, start, stop);
- printf("Parallel (GPU) checking took: %f ms\n", time);
- prime_device = false;
- return *prime_device;
- }
- cudaError_t addWithCuda(unsigned long long int number, bool *prime)
- {
- unsigned long long int *dev_number;
- bool *dev_prime;
- cudaError_t cudaStatus;
- float elapsedTime;
- int threadsPerBlock = 128;
- int N = 2048;
- int blocksInGrid = (N + threadsPerBlock - 1) / threadsPerBlock;
- int times = 0;
- high_resolution_clock::time_point start = steady_clock::now();
- while (times <= sqrt(number))
- {
- check_kernel << <blocksInGrid, threadsPerBlock >> > (dev_number, dev_prime, times);
- times += threadsPerBlock * blocksInGrid;
- }
- high_resolution_clock::time_point end = steady_clock::now();
- auto diff = duration_cast<microseconds>(end - start);
- cout << "Parallel (GPU) checking took: " << diff.count() << " microseconds. "<< endl;
- cudaStatus = cudaMemcpy(prime, dev_prime, sizeof(bool), cudaMemcpyDeviceToHost);
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "cudaMemcpy failed!");
- }
- cudaFree(dev_prime);
- cudaFree(dev_number);
- return cudaStatus;
- }
- int main(){
- unsigned long long int number;
- bool prime_host, prime_device;
- int threadsperblock;
- int blockspergrid;
- while (true) {
- cudaError_t cudaStatus;
- prime_device = true;
- number = get_number();
- prime_host = check_non_parallel(number);
- cudaStatus = addWithCuda(number, &prime_device);
- cout << prime_device << endl;
- if (prime_host) {
- cout << "-------- THE NUMBER IS PRIME --------" << endl << endl;
- }
- else {
- cout << "-------- THE NUMBER IS NOT PRIME --------" << endl << endl;
- }
- }
- system("PAUSE");
- return true;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement