Untitled
By: a guest | Mar 14th, 2010 | Syntax:
Python | Size: 0.63 KB | Hits: 60 | Expires: Never
#!/usr/bin/env python
import pycuda.autoinit
import pycuda.driver as cuda
import numpy
a = numpy.array([ [1., 2., 1., 1.],
[1., 1., 1., 1.],
[1., 1., 1., 1.],
[1., 1., 6., 1.] ]).astype(numpy.float32)
a_gpu = cuda.mem_alloc(a.nbytes)
cuda.memcpy_htod(a_gpu, a)
mod = cuda.SourceModule("""
__global__ void doublify(float *a)
{
int idx = threadIdx.x + threadIdx.y * 4;
a[idx] *= 2;
}
""")
func = mod.get_function("doublify")
func(a_gpu, block=(4,4,1))
a_doubled = numpy.empty_like(a)
cuda.memcpy_dtoh(a_doubled, a_gpu)
print "Input:"
print a
print "Output:"
print a_doubled