Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import asyncio
- import time
- import numpy as np
- import cupy
- import numba
- import dask.array as da
- from dask_cuda import DGX, LocalCUDACluster
- from dask.distributed import Client, wait
- @numba.cuda.jit
- def _smooth_gpu(x, out):
- i, j = numba.cuda.grid(2)
- n, m = x.shape
- if 1 <= i < n - 1 and 1 <= j < m - 1:
- out[i, j] = (x[i - 1, j - 1] + x[i - 1, j] + x[i - 1, j + 1] +
- x[i , j - 1] + x[i , j] + x[i , j + 1] +
- x[i + 1, j - 1] + x[i + 1, j] + x[i + 1, j + 1]) / 9
- def smooth_gpu(x, out):
- import math
- threadsperblock = (16, 16)
- blockspergrid_x = math.ceil(x.shape[0] / threadsperblock[0])
- blockspergrid_y = math.ceil(x.shape[1] / threadsperblock[1])
- blockspergrid = (blockspergrid_x, blockspergrid_y)
- _smooth_gpu[blockspergrid, threadsperblock](x, out)
- def dispatch_smooth_gpu(x):
- out = cupy.zeros(x.shape, x.dtype)
- smooth_gpu(x, out)
- return out
- async def f():
- #async with LocalCUDACluster(asynchronous=True) as cluster:
- async with DGX(asynchronous=True, silence_logs=True) as cluster:
- async with Client(cluster, asynchronous=True) as client:
- # Create a simple random array
- rs = da.random.RandomState(RandomState=cupy.random.RandomState)
- x = rs.random((80000, 80000), chunks=(10000, 10000)).persist()
- await wait(x)
- import time
- t = time.time()
- y = x.map_overlap(dispatch_smooth_gpu, depth=1)
- result = await y.persist()
- print("Time:", time.time() - t)
- if __name__ == '__main__':
- asyncio.get_event_loop().run_until_complete(f())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement