Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import torch
- import time
- from torch.autograd import Variable
- import numpy as np
- """
- This program gets slower and slower by time without the c = z.detach()...
- No noticeable change in memory consumption seen on nvidia-smi.
- """
- usecuda = 1
- dtype = torch.FloatTensor
- N=1
- if usecuda:
- N = 100*N
- dtype = torch.cuda.FloatTensor
- xdim = (100,50,2)
- x0 = torch.randn(*xdim).type(dtype)
- z0 = torch.randn(N, xdim[0]).type(dtype)
- x = Variable(x0, requires_grad=True)
- z = Variable(z0, requires_grad=False)
- t0 = time.clock()
- tsum = np.zeros(5)
- tsump = np.zeros(5)
- i = 0
- M=100
- times = []
- for i in range(M*30):
- t1 = time.clock()
- b = x.repeat(N,1,1,1).view(N, -1, 2).sum(2).view(N, xdim[0], xdim[1])
- c = z.detach().view(N, xdim[0], 1).expand(N, xdim[0], xdim[1])/b
- loss = c.sum()
- t2 = time.clock()
- loss.backward()
- t3 = time.clock()
- z -= 0.1*b.view(N, -1 , xdim[1]).lt(0.1).sum(2).type(dtype)
- t4 = time.clock()
- x.data -= 0.0000000001*x.grad.data
- x.grad.data.zero_()
- t5 = time.clock()
- tsum += np.array((t1 - t0, t2 - t1, t3 - t2, t4 - t3, t5 - t4))
- t0 = t5
- if i%M == M-1:
- times.append((tsum, tsum - tsump))
- print(tsum, tsum - tsump)
- #print("%.2fs +%.1f%%" % (tsum, tsum*100/tsump - 100))
- tsump = tsum
- tsum = np.zeros(5)
- i = i + 1
- from matplotlib import pyplot as plt
- t = np.arange(len(times))
- plt.plot(t, np.array(times)[:,0,0], 'r', label='t0..1')
- plt.plot(t, np.array(times)[:,0,1], 'g', label='t1..2')
- plt.plot(t, np.array(times)[:,0,2], 'b', label='t2..3')
- plt.plot(t, np.array(times)[:,0,3], 'y', label='t3..4')
- plt.plot(t, np.array(times)[:,0,4], 'w', label='t4..5')
- plt.legend(loc=2)
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement