Untitled

import torch
import time
from torch.autograd import Variable
import numpy as np

"""
This program gets slower and slower by time without the c = z.detach()...
No noticeable change in memory consumption seen on nvidia-smi.
"""

usecuda = 1

dtype = torch.FloatTensor
N=1
if usecuda:
  N = 100*N
  dtype = torch.cuda.FloatTensor

xdim = (100,50,2)

x0 = torch.randn(*xdim).type(dtype)
z0 = torch.randn(N, xdim[0]).type(dtype)

x = Variable(x0, requires_grad=True)
z = Variable(z0, requires_grad=False)


t0 = time.clock()
tsum = np.zeros(5)
tsump = np.zeros(5)
i = 0
M=100
times = []
for i in range(M*30):
  t1 = time.clock()
  b = x.repeat(N,1,1,1).view(N, -1, 2).sum(2).view(N, xdim[0], xdim[1])
  c = z.detach().view(N, xdim[0], 1).expand(N, xdim[0], xdim[1])/b
  loss = c.sum()
  t2 = time.clock()
  loss.backward()
  t3 = time.clock()
  z -= 0.1*b.view(N, -1 , xdim[1]).lt(0.1).sum(2).type(dtype)
  t4 = time.clock()
  x.data -= 0.0000000001*x.grad.data
  x.grad.data.zero_()
  t5 = time.clock()
  tsum += np.array((t1 - t0, t2 - t1, t3 - t2, t4 - t3, t5 - t4))
  t0 = t5
  if i%M == M-1:
    times.append((tsum, tsum - tsump))
    print(tsum, tsum - tsump)
    #print("%.2fs +%.1f%%" % (tsum, tsum*100/tsump - 100))
    tsump = tsum
    tsum = np.zeros(5)
  i = i + 1

from matplotlib import pyplot as plt
t = np.arange(len(times))
plt.plot(t, np.array(times)[:,0,0], 'r', label='t0..1')
plt.plot(t, np.array(times)[:,0,1], 'g', label='t1..2')
plt.plot(t, np.array(times)[:,0,2], 'b', label='t2..3')
plt.plot(t, np.array(times)[:,0,3], 'y', label='t3..4')
plt.plot(t, np.array(times)[:,0,4], 'w', label='t4..5')
plt.legend(loc=2)
plt.show()