Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Threads or processors per block
- threads_per_block = 512
- # Blocks per grid (or blocks per GPU)
- blocks_per_grid = 36
- # Send coord1 to GPU
- coord1_gpu = cuda.to_device(coord1)
- # Send coord2 to GPU
- coord2_gpu = cuda.to_device(coord2)
- # Create an output on GPU
- out_gpu = cuda.device_array(shape=(n,), dtype=np.int32)
- # Oof, okay, function[block_def,thread_def](*args)
- get_nearby_kernel[blocks_per_grid, threads_per_block](coord1_gpu, coord2_gpu, 1.0, out_gpu)
- # Get the result!
- out_gpu.copy_to_host()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement