Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include "cuda_runtime.h"
- #include "device_launch_parameters.h"
- #include <stdio.h>
- #include <stdlib.h>
- #include <cstdlib>
- #include <time.h>
- #include <curand_kernel.h>
- #define pnt 100
- #define SEED 9876543
- #define THREADS 80
- #define BLOCKS 10
- __global__ void mykernel(void) {
- }
- __global__ void addKernel(int* c, const int* a, const int* b) {
- int i = threadIdx.x;
- c[i] = a[i] + b[i];
- }
- __device__ float f(float x) {
- return x;
- }
- // инициализация генератора
- __global__ void initfGENS(curandStatePhilox4_32_10_t* d_gen) {
- for (int id = threadIdx.x + blockIdx.x * blockDim.x; id < THREADS * BLOCKS; id += THREADS * BLOCKS) {
- curand_init(SEED, id, 0, d_gen + id);
- }
- }
- __global__ void parallel_monte_carlo_integrate(curandStatePhilox4_32_10_t* d_gen, int R, float* result) {
- float w = 1 / (1.0 * R);
- int success = 0;
- int cp = 0;
- int id = threadIdx.x + blockIdx.x * blockDim.x;
- for (int j = 0; j < pnt; j++) {
- float x = 1.0 * curand_uniform(&d_gen[id]) / R + w * id;
- float y = 1.0 * curand_uniform(&d_gen[id]);
- success = success + int(f(x) < y);
- cp++;
- }
- *result = success * 1.0 / cp;
- printf("%lf\n", *result);
- }
- int main() {
- curandStatePhilox4_32_10_t* d_gen;
- cudaMalloc((void**)&d_gen, THREADS * BLOCKS * sizeof(curandStatePhilox4_32_10_t));
- initfGENS << < 1, 10 >> > (d_gen);
- float result;
- int R = 10;
- float* dev_result;
- cudaMalloc((void**)&dev_result, sizeof(float));
- parallel_monte_carlo_integrate << <THREADS, BLOCKS >> > (d_gen, R, dev_result);
- cudaMemcpy(&result, dev_result, sizeof(float), cudaMemcpyDeviceToHost);
- printf("%lf", result);
- //освобождение памяти
- cudaFree(d_gen);
- cudaFree(dev_result);
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement