Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <stdlib.h>
- #include <cstring>
- #include <time.h>
- #include "cuda_runtime.h"
- #include "device_launch_parameters.h"
- #include <curand_kernel.h>
- #include <curand.h>
- #include <stack>
- // #define GRIDSIZE 2048;
- // #define BLOCKSIZE 128;
- // nvcc main.cu --run
- // Connected-Component-Labelling
- // 同时计算的局面数,即线程块的数量
- const int N = 1024;
- // -1:雷
- // -10:1~8
- // 0~60:空及其标记
- __device__ unsigned int curand_int(unsigned int n, curandStateMRG32k3a_t* state) {
- // 0到n-1之间的随机数,闭区间
- if (n == 0) {
- return 0;
- }
- unsigned int t = curand(state);
- while (t > (0xffffffff / n * n)) {
- t = curand(state);
- }
- return t % n;
- return t;
- }
- __global__ void init_all_curand(curandStateMRG32k3a_t *states) {
- // 对所有随机数种子初始化
- unsigned long long tid = threadIdx.x + blockIdx.x * 99;
- unsigned long long subsequence = threadIdx.x;
- unsigned long long offset = 0;
- curand_init(tid, subsequence, offset, &states[tid]);
- // printf("%3d, ", tid);
- // printf("%u, ", curand(&states[tid]));
- }
- __device__ void cal_num_for_cell(int *cuda_board, unsigned int tid, unsigned int x, unsigned int y) {
- if (cuda_board[tid] == -1) {
- return;
- }
- if (x == 0) {
- // 正下
- if (cuda_board[tid + 1] == -1) {
- cuda_board[tid] = -10;
- return;
- }
- if (y == 29) {
- // 正左、左下角
- if (cuda_board[tid - 16] == -1 || cuda_board[tid - 15] == -1) {
- cuda_board[tid] = -10;
- return;
- }
- } else if (y == 0) {
- // 正右、右下角
- if (cuda_board[tid + 16] == -1 || cuda_board[tid + 17] == -1) {
- cuda_board[tid] = -10;
- return;
- }
- } else {
- if (cuda_board[tid - 16] == -1 || cuda_board[tid - 15] == -1 || cuda_board[tid + 16] == -1 || cuda_board[tid + 17] == -1) {
- cuda_board[tid] = -10;
- return;
- }
- }
- } else if (x == 15) {
- if (cuda_board[tid - 1] == -1) {
- cuda_board[tid] = -10;
- return;
- }
- if (y == 29) {
- // 正左、左下角
- if (cuda_board[tid - 16] == -1 || cuda_board[tid - 17] == -1) {
- cuda_board[tid] = -10;
- return;
- }
- } else if (y == 0) {
- // 正右、右下角
- if (cuda_board[tid + 16] == -1 || cuda_board[tid + 15] == -1) {
- cuda_board[tid] = -10;
- return;
- }
- } else {
- if (cuda_board[tid - 16] == -1 || cuda_board[tid - 17] == -1 || cuda_board[tid + 16] == -1 || cuda_board[tid + 15] == -1) {
- cuda_board[tid] = -10;
- return;
- }
- }
- } else {
- if (cuda_board[tid - 1] == -1 || cuda_board[tid + 1] == -1) {
- cuda_board[tid] = -10;
- return;
- }
- if (y == 29) {
- // 左
- if (cuda_board[tid - 16] == -1 || cuda_board[tid - 17] == -1 || cuda_board[tid - 15] == -1) {
- cuda_board[tid] = -10;
- return;
- }
- } else if (y == 0) {
- // 正右、右下角
- if (cuda_board[tid + 16] == -1 || cuda_board[tid + 15] == -1 || cuda_board[tid + 17] == -1) {
- cuda_board[tid] = -10;
- return;
- }
- } else {
- if (cuda_board[tid - 16] == -1 || cuda_board[tid - 17] == -1 || cuda_board[tid + 16] == -1 || cuda_board[tid + 15] == -1 ||
- cuda_board[tid + 17] == -1 || cuda_board[tid - 15] == -1) {
- cuda_board[tid] = -10;
- return;
- }
- }
- }
- }
- __device__ bool is_3BV_on_island (int *cuda_board, unsigned int tid, unsigned int x, unsigned int y) {
- // cuda_board[tid]是数字的情况下,判断是不是一个3BV
- if (x == 0) {
- // 正下
- if (cuda_board[tid + 1] >= 0) {
- return false;
- }
- if (y == 29) {
- // 正左、左下角
- if (cuda_board[tid - 16] >= 0 || cuda_board[tid - 15] >= 0) {
- return false;
- }
- } else if (y == 0) {
- // 正右、右下角
- if (cuda_board[tid + 16] >= 0 || cuda_board[tid + 17] >= 0) {
- return false;
- }
- } else {
- if (cuda_board[tid - 16] >= 0 || cuda_board[tid - 15] >= 0 || cuda_board[tid + 16] >= 0 || cuda_board[tid + 17] >= 0) {
- return false;
- }
- }
- } else if (x == 15) {
- if (cuda_board[tid - 1] >= 0) {
- return false;
- }
- if (y == 29) {
- // 正左、左下角
- if (cuda_board[tid - 16] >= 0 || cuda_board[tid - 17] >= 0) {
- return false;
- }
- } else if (y == 0) {
- // 正右、右下角
- if (cuda_board[tid + 16] >= 0 || cuda_board[tid + 15] >= 0) {
- return false;
- }
- } else {
- if (cuda_board[tid - 16] >= 0 || cuda_board[tid - 17] >= 0 || cuda_board[tid + 16] >= 0 || cuda_board[tid + 15] >= 0) {
- return false;
- }
- }
- } else {
- if (cuda_board[tid - 1] >= 0 || cuda_board[tid + 1] >= 0) {
- return false;
- }
- if (y == 29) {
- // 左
- if (cuda_board[tid - 16] >= 0 || cuda_board[tid - 17] >= 0 || cuda_board[tid - 15] >= 0) {
- return false;
- }
- } else if (y == 0) {
- // 正右、右下角
- if (cuda_board[tid + 16] >= 0 || cuda_board[tid + 15] >= 0 || cuda_board[tid + 17] >= 0) {
- return false;
- }
- } else {
- if (cuda_board[tid - 16] >= 0 || cuda_board[tid - 17] >= 0 || cuda_board[tid + 16] >= 0 || cuda_board[tid + 15] >= 0 ||
- cuda_board[tid + 17] >= 0 || cuda_board[tid - 15] >= 0) {
- return false;
- }
- }
- }
- return true;
- }
- __device__ void check_surrounding_cells(int *board, bool *board_is_3BV, int *bbbv_current, bool *keep_running, unsigned int tid, unsigned int x, unsigned int y, unsigned int idN, unsigned int cell) {
- // cuda_board[tid]是空(>=0)的情况下,处理八方
- if (board[tid] == 0) {
- // printf("666");
- keep_running[idN] = true;
- board[tid] = cell + 1;
- board_is_3BV[tid] = true;
- // atomicAdd(&bbbv_current[idN], 1);
- return;
- }
- int min_op_id = 500;
- if (x == 0) {
- // 正下
- if (board[tid + 1] > 0 && board[tid + 1] < min_op_id) {
- min_op_id = board[tid + 1];
- }
- if (y == 29) {
- // 正左、左下角
- if (board[tid - 16] > 0 && board[tid - 16] < min_op_id) {
- min_op_id = board[tid - 16];
- }
- if (board[tid - 15] > 0 && board[tid - 15] < min_op_id) {
- min_op_id = board[tid - 15];
- }
- } else if (y == 0) {
- // 正右、右下角
- if (board[tid + 16] > 0 && board[tid + 16] < min_op_id) {
- min_op_id = board[tid + 16];
- }
- if (board[tid + 17] > 0 && board[tid + 17] < min_op_id) {
- min_op_id = board[tid + 17];
- }
- } else {
- if (board[tid - 16] > 0 && board[tid - 16] < min_op_id) {
- min_op_id = board[tid - 16];
- }
- if (board[tid - 15] > 0 && board[tid - 15] < min_op_id) {
- min_op_id = board[tid - 15];
- }
- if (board[tid + 16] > 0 && board[tid + 16] < min_op_id) {
- min_op_id = board[tid + 16];
- }
- if (board[tid + 17] > 0 && board[tid + 17] < min_op_id) {
- min_op_id = board[tid + 17];
- }
- }
- } else if (x == 15) {
- if (board[tid - 1] > 0 && board[tid - 1] < min_op_id) {
- min_op_id = board[tid - 1];
- }
- if (y == 29) {
- // 正左、左下角
- if (board[tid - 16] > 0 && board[tid - 16] < min_op_id) {
- min_op_id = board[tid - 16];
- }
- if (board[tid - 17] > 0 && board[tid - 17] < min_op_id) {
- min_op_id = board[tid - 17];
- }
- } else if (y == 0) {
- // 正右、右下角
- if (board[tid + 15] > 0 && board[tid + 15] < min_op_id) {
- min_op_id = board[tid + 15];
- }
- if (board[tid + 16] > 0 && board[tid + 16] < min_op_id) {
- min_op_id = board[tid + 16];
- }
- } else {
- if (board[tid - 16] > 0 && board[tid - 16] < min_op_id) {
- min_op_id = board[tid - 16];
- }
- if (board[tid - 17] > 0 && board[tid - 17] < min_op_id) {
- min_op_id = board[tid - 17];
- }
- if (board[tid + 15] > 0 && board[tid + 15] < min_op_id) {
- min_op_id = board[tid + 15];
- }
- if (board[tid + 16] > 0 && board[tid + 16] < min_op_id) {
- min_op_id = board[tid + 16];
- }
- }
- } else {
- if (board[tid - 1] > 0 && board[tid - 1] < min_op_id) {
- min_op_id = board[tid - 1];
- }
- if (board[tid + 1] > 0 && board[tid + 1] < min_op_id) {
- min_op_id = board[tid + 1];
- }
- if (y == 29) {
- // 左
- if (board[tid - 15] > 0 && board[tid - 15] < min_op_id) {
- min_op_id = board[tid - 15];
- }
- if (board[tid - 16] > 0 && board[tid - 16] < min_op_id) {
- min_op_id = board[tid - 16];
- }
- if (board[tid - 17] > 0 && board[tid - 17] < min_op_id) {
- min_op_id = board[tid - 17];
- }
- } else if (y == 0) {
- // 正右、右下角
- if (board[tid + 15] > 0 && board[tid + 15] < min_op_id) {
- min_op_id = board[tid + 15];
- }
- if (board[tid + 16] > 0 && board[tid + 16] < min_op_id) {
- min_op_id = board[tid + 16];
- }
- if (board[tid + 17] > 0 && board[tid + 17] < min_op_id) {
- min_op_id = board[tid + 17];
- }
- } else {
- if (board[tid - 15] > 0 && board[tid - 15] < min_op_id) {
- min_op_id = board[tid - 15];
- }
- if (board[tid - 16] > 0 && board[tid - 16] < min_op_id) {
- min_op_id = board[tid - 16];
- }
- if (board[tid - 17] > 0 && board[tid - 17] < min_op_id) {
- min_op_id = board[tid - 17];
- }
- if (board[tid + 15] > 0 && board[tid + 15] < min_op_id) {
- min_op_id = board[tid + 15];
- }
- if (board[tid + 16] > 0 && board[tid + 16] < min_op_id) {
- min_op_id = board[tid + 16];
- }
- if (board[tid + 17] > 0 && board[tid + 17] < min_op_id) {
- min_op_id = board[tid + 17];
- }
- }
- }
- // printf("'%u', ", min_op_id);
- if (min_op_id < board[tid]) {
- // printf("666");
- board[tid] = min_op_id;
- keep_running[idN] = true;
- if (board_is_3BV[tid]) {
- board_is_3BV[tid] = false;
- // atomicSub(&bbbv_current[idN], 1);
- }
- }
- return;
- }
- __global__ void laymine_cal_3BV(int *board, unsigned long long *bbbv, curandStateMRG32k3a_t *states, bool *board_is_3BV, int *bbbv_current, bool *keep_running) {
- // 全局的第几个格子
- unsigned long long tid = threadIdx.x + blockIdx.x * 480;
- // 第几局
- const unsigned int idN = (unsigned int) blockIdx.x;
- // 计算是本局的第几个格子
- const unsigned int cell = tid % 480;
- // 第几行
- const unsigned int x = cell & 0x0000000f;
- // 第几列
- const unsigned int y = cell >> 4;
- // 480个线程,前99个同时负责埋雷
- bool can_laymine = (cell < 99)? true:false;
- const unsigned int mines_id = blockIdx.x * 99 + cell;
- const unsigned int mines_id_start_at = tid - cell;
- for (unsigned long long k = 0; k < 1000; k++) {
- // 循环:完成埋雷、算3BV
- // 初始化整个局面
- board[tid] = 0;
- board_is_3BV[tid] = false;
- __syncthreads();
- // 并行地为所有局面的所有位置埋雷
- if (can_laymine) {
- int old;
- do {
- int cell_id = curand_int(480, &states[mines_id]) + mines_id_start_at;
- old = atomicCAS(&board[cell_id], 0, -1);
- } while (old == -1);
- }
- // 块内同步
- __syncthreads();
- cal_num_for_cell(board, tid, x, y);
- // 块内同步
- __syncthreads();
- do {
- // 初始化op计算完成标志
- keep_running[idN] = false;
- __syncthreads();
- if (board[tid] == -10) {
- if (!board_is_3BV[tid]) {
- board_is_3BV[tid] = true;
- if (is_3BV_on_island (board, tid, x, y)) {
- atomicAdd(&bbbv_current[idN], 1);
- }
- }
- } else if (board[tid] >= 0) {
- check_surrounding_cells(board, board_is_3BV, bbbv_current, keep_running, tid, x, y, idN, cell);
- // flag_ok = false;
- }
- __syncthreads();
- } while(keep_running[idN]);
- if (board_is_3BV[tid] && board[tid] >= 0) {
- atomicAdd(&bbbv_current[idN], 1);
- }
- __syncthreads();
- if (threadIdx.x == 0) {
- atomicAdd(&bbbv[bbbv_current[idN]], 1);
- // printf("666, %d; ", bbbv_current[idN]);
- bbbv_current[idN] = 0;
- }
- // 每个线程块的第一个线程负责统计结果
- }
- }
- int main(void) {
- // int nx = GRIDSIZE;
- // int ny = BLOCKSIZE;
- // bool *flag_ok = true;
- bool *cuda_flag_ok; // 连通域计数时,是否全部计算完
- cudaMalloc(&cuda_flag_ok, sizeof(bool));
- // cudaMemcpy(cuda_flag_ok, flag_ok, sizeof(bool), cudaMemcpyHostToDevice);
- const int M_bbbv = sizeof(unsigned long long) * 382;
- // 3BV记录在此
- unsigned long long bbbv[382] = {0};
- unsigned long long *cuda_bbbv;
- cudaMalloc(&cuda_bbbv, M_bbbv);
- cudaMemcpy(cuda_bbbv, bbbv, M_bbbv, cudaMemcpyHostToDevice);
- // 是否继续跑
- bool *cuda_keep_running;
- cudaMalloc(&cuda_keep_running, sizeof(bool) * N);
- int *cuda_bbbv_current;
- cudaMalloc(&cuda_bbbv_current, sizeof(int) * N);
- // 随机数状态
- curandStateMRG32k3a_t *cuda_states;
- cudaMalloc(&cuda_states, sizeof(curandStateMRG32k3a_t) * 99 * N);
- // 初始化随机数
- init_all_curand <<< N, 99 >>> (cuda_states);
- int call_back = cudaDeviceSynchronize();
- // 辅助记录是不是op的变量
- bool *cuda_board_is_3BV;
- cudaMalloc(&cuda_board_is_3BV, sizeof(bool) * 480 * N);
- // 局面
- int *cuda_board;
- cudaMalloc(&cuda_board, sizeof(int) * 480 * N);
- clock_t start, finish;
- float costtime;
- start = clock();
- // 埋雷、计算3bv
- laymine_cal_3BV <<< N, 480 >>> (cuda_board, cuda_bbbv, cuda_states, cuda_board_is_3BV, cuda_bbbv_current, cuda_keep_running);
- call_back = cudaDeviceSynchronize();
- finish = clock();
- //得到两次记录之间的时间差
- costtime = (float)(finish - start) / CLOCKS_PER_SEC;
- cudaMemcpy(&bbbv, cuda_bbbv, M_bbbv, cudaMemcpyDeviceToHost);
- // //time_t t;
- // //srand((unsigned)time(&t));
- // //printf(" %d \n", (int)(rand() & 0xff));
- int aaa = 0;
- for (int n = 0; n < 381; ++n)
- {
- aaa += bbbv[n];
- printf("%d: %u\n", n, bbbv[n]);
- }
- printf("一共: %d\n", aaa);
- printf("耗时:%f \n", costtime);
- printf("速度:%f \n", aaa/costtime);
- cudaDeviceReset();
- cudaFree(cuda_bbbv);
- cudaFree(cuda_keep_running);
- cudaFree(cuda_bbbv_current);
- cudaFree(cuda_flag_ok);
- cudaFree(cuda_states);
- cudaFree(cuda_board);
- cudaFree(cuda_board_is_3BV);
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment