Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include "cuda_runtime.h"
- #include "device_launch_parameters.h"
- #include <stdio.h>
- #include <stdlib.h>
- #include <cstdlib>
- #include <time.h>
- #include <curand_kernel.h>
- #define pnt 100
- __global__ void mykernel(void) {
- }
- __global__ void addKernel(int* c, const int* a, const int* b) {
- int i = threadIdx.x;
- c[i] = a[i] + b[i];
- }
- __device__ float f(float x) {
- return x;
- }
- __global__ void parallel_monte_carlo_integrate(curandStatePhilox4_32_10_t* state, int R, float* result) {
- //srand(time(NULL));
- //float* dev_state;
- //cudaMalloc((void**)&dev_state, sizeof(float));
- //curandStatePhilox4_32_10_t* dev_state;
- float w = 1 / (1.0 * R);
- int success = 0;
- int cp = 0;
- int i = threadIdx.x;
- for (int j = 0; j < pnt; j++) {
- float x = 1.0 * state;
- x = 1.0 * state / RAND_MAX;
- x = 1.0 * state / RAND_MAX + i * w;
- x = 1.0 * state / RAND_MAX / R + i * w;
- float y = 1.0 * state / RAND_MAX;
- success = success + int(f(x) < y);
- //printf("x=%lf\n", x);
- cp++;
- }
- *result = success * 1.0 / cp;
- printf("%lf\n", *result);
- }
- int main(void) {
- //printf("%d", RAND_MAX);
- float result;
- int R = 2;
- float* dev_result;
- cudaMalloc((void**)&dev_result, sizeof(float));
- //cudaMemcpy(&result, &result_gpu, size_t count, cudaMemcpyHostToDevice = 1);
- parallel_monte_carlo_integrate << <1, R >> > (curandStatePhilox4_32_10_t* state, R, dev_result);
- cudaMemcpy(&result, dev_result, sizeof(float), cudaMemcpyDeviceToHost);
- /*for (int R = 1; R < 100; R++) {
- parallel_monte_carlo_integrate(R, &result);
- printf("%x %lf\n", &result, result);
- }*/
- printf("%lf", result);
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement