Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import torch
- import time
- torch.backends.cudnn.benchmark = True
- # 1a)
- I, J, K = 64, 1024, 1024
- A = torch.randn(I, J, device='cuda', dtype=torch.half)
- B = torch.randn(J, K, device='cuda', dtype=torch.half)
- # warumup
- for _ in range(50):
- C = torch.matmul(A, B)
- torch.cuda.synchronize()
- nb_iters = 1000
- torch.cuda.synchronize()
- t0 = time.time()
- for _ in range(nb_iters):
- C = torch.matmul(A, B)
- torch.cuda.synchronize()
- t1 = time.time()
- print('1a) {:.3f}us per iteration)'.format((t1 - t0) / nb_iters * 1e6))
- # 1b)
- I, J, K = 1, 1024, 1024
- A = torch.randn(I, J, device='cuda', dtype=torch.half)
- B = torch.randn(J, K, device='cuda', dtype=torch.half)
- # warumup
- for _ in range(50):
- C = torch.matmul(A, B)
- torch.cuda.synchronize()
- nb_iters = 1000
- torch.cuda.synchronize()
- t0 = time.time()
- for _ in range(nb_iters):
- C = torch.matmul(A, B)
- torch.cuda.synchronize()
- t1 = time.time()
- print('1b) {:.3f}us per iteration'.format((t1 - t0) / nb_iters * 1e6))
- # 2a)
- I, J, K = 63, 1023, 1023
- A = torch.randn(I, J, device='cuda', dtype=torch.half)
- B = torch.randn(J, K, device='cuda', dtype=torch.half)
- # warumup
- for _ in range(50):
- C = torch.matmul(A, B)
- torch.cuda.synchronize()
- nb_iters = 1000
- torch.cuda.synchronize()
- t0 = time.time()
- for _ in range(nb_iters):
- C = torch.matmul(A, B)
- torch.cuda.synchronize()
- t1 = time.time()
- print('2a) {:.3f}us per iteration'.format((t1 - t0) / nb_iters * 1e6))
- # 2b)
- I, J, K = 1, 1023, 1023
- A = torch.randn(I, J, device='cuda', dtype=torch.half)
- B = torch.randn(J, K, device='cuda', dtype=torch.half)
- # warumup
- for _ in range(50):
- C = torch.matmul(A, B)
- torch.cuda.synchronize()
- nb_iters = 1000
- torch.cuda.synchronize()
- t0 = time.time()
- for _ in range(nb_iters):
- C = torch.matmul(A, B)
- torch.cuda.synchronize()
- t1 = time.time()
- print('2b) {:.3f}us per iteration'.format((t1 - t0) / nb_iters * 1e6))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement