Untitled

require 'nn'
require 'cutorch'
require 'cunn'

--[[
-- A simple benchmark comparing fully-connected net times on CPU and GPU.
--
-- We construct a five-layer network with 100-D inputs, 4D outputs, and
-- four hidden layers of 1024 units each with ReLU between layers.
--
-- For each datatype (float, double, cuda) we run 10 forward/backward
-- passes of the network and report the elapsed time. Note the use of
-- cutorch.synchronize to ensure that we are properly handling GPU timing.
--]]

local layer_sizes = {100, 1024, 1024, 1024, 1024, 4}
local model = nn.Sequential()
for i = 2, #layer_sizes do
  model:add(nn.Linear(layer_sizes[i - 1], layer_sizes[i]))
  model:add(nn.ReLU(true))
end
model:float()
local crit = nn.MSECriterion():float()

local batch_size = 1000
local in_dim = layer_sizes[1]
local out_dim = layer_sizes[#layer_sizes]

local dtypes = {'torch.DoubleTensor', 'torch.FloatTensor', 'torch.CudaTensor'}
local timer = torch.Timer()
for _, dtype in ipairs(dtypes) do
  print(string.format('Testing dtype %s', dtype))
  model:type(dtype)
  crit:type(dtype)
  for t = 1, 10 do
    local X = torch.randn(batch_size, in_dim):type(dtype)
    local y = torch.randn(batch_size, out_dim):type(dtype)
    timer:reset()
    cutorch.synchronize()
    local y_pred = model:forward(X)
    local loss = crit:forward(y_pred, y)
    local dy_pred = crit:backward(y_pred, y)
    model:backward(X, dy_pred)
    cutorch.synchronize()
    local t = timer:time().real
    print(t)
  end
end