Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- require 'nn'
- require 'cutorch'
- require 'cunn'
- --[[
- -- A simple benchmark comparing fully-connected net times on CPU and GPU.
- --
- -- We construct a five-layer network with 100-D inputs, 4D outputs, and
- -- four hidden layers of 1024 units each with ReLU between layers.
- --
- -- For each datatype (float, double, cuda) we run 10 forward/backward
- -- passes of the network and report the elapsed time. Note the use of
- -- cutorch.synchronize to ensure that we are properly handling GPU timing.
- --]]
- local layer_sizes = {100, 1024, 1024, 1024, 1024, 4}
- local model = nn.Sequential()
- for i = 2, #layer_sizes do
- model:add(nn.Linear(layer_sizes[i - 1], layer_sizes[i]))
- model:add(nn.ReLU(true))
- end
- model:float()
- local crit = nn.MSECriterion():float()
- local batch_size = 1000
- local in_dim = layer_sizes[1]
- local out_dim = layer_sizes[#layer_sizes]
- local dtypes = {'torch.DoubleTensor', 'torch.FloatTensor', 'torch.CudaTensor'}
- local timer = torch.Timer()
- for _, dtype in ipairs(dtypes) do
- print(string.format('Testing dtype %s', dtype))
- model:type(dtype)
- crit:type(dtype)
- for t = 1, 10 do
- local X = torch.randn(batch_size, in_dim):type(dtype)
- local y = torch.randn(batch_size, out_dim):type(dtype)
- timer:reset()
- cutorch.synchronize()
- local y_pred = model:forward(X)
- local loss = crit:forward(y_pred, y)
- local dy_pred = crit:backward(y_pred, y)
- model:backward(X, dy_pred)
- cutorch.synchronize()
- local t = timer:time().real
- print(t)
- end
- end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement