Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <stdlib.h>
- #include <cstring>
- #include <time.h>
- #include "cuda_runtime.h"
- #include "device_launch_parameters.h"
- #include <curand_kernel.h>
- #include <curand.h>
- #include <stack>
- // #define GRIDSIZE 2048;
- // #define BLOCKSIZE 128;
- #define GRIDSIZE 1024;
- #define BLOCKSIZE 64;
- // nvcc -O3 main.cu --run
- // -1:雷
- // -10:1~8
- // 0~60:空及其标记
- __device__ void print_board(signed short board[][30]) {
- printf("[");
- for(unsigned short i = 0; i < 16; i++) {
- printf("[");
- for(unsigned short j = 0; j < 30; j++) {
- printf("%3d,", board[i][j]);
- }
- printf("],\n");
- }
- printf("]\n");
- }
- __device__ unsigned int curand_int(unsigned int n, curandStateMRG32k3a_t* state) {
- // 0到n-1之间的随机数,闭区间
- if (n == 0) {
- return 0;
- }
- unsigned int t = curand(state);
- while (t > (0xffffffff / n * n)) {
- t = curand(state);
- }
- return t % n;
- return t;
- }
- __device__ void lay_mine_exp(signed short board[][30], curandStateMRG32k3a_t* state) {
- for(unsigned short i = 0; i < 3; i++) {
- for(unsigned short j = 0; j < 30; j++) {
- board[i][j] = -1;
- }
- }
- for(unsigned short j = 0; j < 9; j++) {
- board[3][j] = -1;
- }
- for (unsigned short m = 479; m > 0; m--) {
- unsigned int e = curand_int(m + 1, state);
- unsigned int i = e & 0x0000000f;
- unsigned int j = e >> 4;
- unsigned int x = m & 0x0000000f;
- unsigned int y = m >> 4;
- if (board[i][j] != board[x][y]){
- // 交换数字
- if (board[i][j] == 0) {
- board[i][j] = -1;
- board[x][y] = 0;
- } else {
- board[i][j] = 0;
- board[x][y] = -1;
- }
- }
- } // 至此,埋雷第一步完成
- for(signed char i = 0; i < 16; i++) {
- for(signed char j = 0; j < 30; j++) {
- for(signed char m = max(0, i - 1); m < min(16, i + 2); m++) {
- for(signed char n = max(0, j - 1); n < min(30, j + 2); n++) {
- if (board[m][n] == -1 && board[i][j] != -1) {
- board[i][j] = -10;
- break;
- }
- }
- }
- }
- } // 计算数字,只算到1
- }
- __device__ unsigned short cal_bbbv_exp(signed short board[][30]) {
- unsigned short op_num = 0; // op数量
- unsigned short op_head = 0; // 栈顶
- unsigned short bbbv_is_num = 0;
- bool op_id[116]; // 高级最多有58个op,但这里不能这么少,只能试
- memset(op_id,false,sizeof(op_id));
- for(signed char i = 0; i < 16; i++) {
- for(signed char j = 0; j < 30; j++) {
- if (board[i][j] == -10) {
- // 把岛上的3BV数出来
- bool is_bbbv = true;
- for(signed char m = max(0, i - 1); m < min(16, i + 2); m++) {
- for(signed char n = max(0, j - 1); n < min(30, j + 2); n++) {
- if (board[m][n] >= 0) {
- is_bbbv = false;
- break;
- }
- }
- }
- if (is_bbbv) {
- bbbv_is_num += 1;
- // printf("(%2d, %2d), ", i, j);
- }
- } else if (board[i][j] >= 0) {
- bool has_neighbour = false;
- if (i > 0) {
- for(signed char n = max(0, j - 1); n < min(30, j + 2); n++) {
- if (board[i - 1][n] > 0) {
- if (!has_neighbour) {
- // 从0变成op的编号
- has_neighbour = true;
- board[i][j] = board[i - 1][n];
- } else if(board[i][j] > board[i - 1][n]) {
- if (op_id[board[i][j]]) {
- op_id[board[i][j]] = false;
- op_num -= 1;
- board[i][j] = board[i - 1][n];
- }
- } else if(board[i][j] < board[i - 1][n]) {
- if (op_id[board[i - 1][n]]) {
- op_id[board[i - 1][n]] = false;
- op_num -= 1;
- }
- }
- }
- }
- }
- if (j > 0) {
- if (board[i][j - 1] > 0) {
- if (!has_neighbour) {
- // 从0变成op的编号
- has_neighbour = true;
- board[i][j] = board[i][j - 1];
- } else if(board[i][j] > board[i][j - 1]) {
- if (op_id[board[i][j]]) {
- op_id[board[i][j]] = false;
- op_num -= 1;
- board[i][j] = board[i][j - 1];
- }
- } else if(board[i][j] < board[i][j - 1]) {
- if (op_id[board[i][j - 1]]) {
- op_id[board[i][j - 1]] = false;
- op_num -= 1;
- }
- }
- }
- }
- if (!has_neighbour) {
- op_head += 1;
- op_num += 1;
- op_id[op_head] = true;
- board[i][j] = op_head;
- }
- }
- }
- } // 计算数字,只算到1
- // printf("%3d, ", op_num);
- // print_board(board);
- return op_num + bbbv_is_num;
- }
- __global__ void hello_world_from_gpu(int* bbbv_) {
- // printf("blockDim.x: %d, blockDim.y: %d, blockDim.z: %d\n", blockDim.x, blockDim.y, blockDim.z);
- //printf("threadIdx.x: %d, threadIdx.y: %d, threadIdx.z: %d, blockIdx.x: %d, blockIdx.y: %d, blockIdx.z: %d\n", threadIdx.x, threadIdx.y, threadIdx.z, blockIdx.x, blockIdx.y, blockIdx.z);
- //printf("Hello World from GPU\n");
- unsigned long long tid = threadIdx.x + blockIdx.x * BLOCKSIZE;
- curandStateMRG32k3a_t state;
- unsigned long long subsequence = 0;
- unsigned long long offset = 0;
- curand_init(tid, subsequence, offset, &state);
- signed short board[16][30];
- for (int t = 0; t < 100; t++){
- memset(board,0,sizeof(board));
- lay_mine_exp(board, &state);
- unsigned short bbbv = cal_bbbv_exp(board);
- // unsigned short bbbv = 1;
- atomicAdd(&bbbv_[bbbv], 1);
- }
- return;
- }
- int main(void) {
- printf("Hello World from CPU\n");
- int nx = GRIDSIZE;
- int ny = BLOCKSIZE;
- //dim3 block(3, 2);
- //dim3 grid(nx / block.x, ny / block.y);
- const int N = 381;
- const int M = sizeof(int) * N;
- int bbbv[381] = {0};
- int *cuda_bbbv;
- cudaMalloc(&cuda_bbbv, M);
- cudaMemcpy(cuda_bbbv, bbbv, M, cudaMemcpyHostToDevice);
- clock_t start, finish;
- float costtime;
- start = clock();
- hello_world_from_gpu <<< nx, ny >>> (cuda_bbbv);
- int call_back = cudaDeviceSynchronize();
- finish = clock();
- //得到两次记录之间的时间差
- costtime = (float)(finish - start) / CLOCKS_PER_SEC;
- cudaMemcpy(&bbbv, cuda_bbbv, M, cudaMemcpyDeviceToHost);
- //time_t t;
- //srand((unsigned)time(&t));
- //printf(" %d \n", (int)(rand() & 0xff));
- int aaa = 0;
- for (int n = 0; n < 381; ++n)
- {
- aaa += bbbv[n];
- printf("%d: %d\n", n, bbbv[n]);
- }
- printf("一共: %d\n", aaa);
- printf("耗时:%f \n", costtime);
- printf("速度:%f \n", aaa/costtime);
- cudaDeviceReset();
- cudaFree(cuda_bbbv);
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement