Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Import PyTorch
- import torch # import main library
- import torch.nn as nn # import modules like nn.ReLU()
- import torch.nn.functional as F # import torch functions like F.relu() and F.relu_()
- def get_memory_allocated(device, inplace = False):
- '''
- Function measures allocated memory before and after the ReLU function call.
- INPUT:
- - device: gpu device to run the operation
- - inplace: True - to run ReLU in-place, False - for normal ReLU call
- '''
- # Create a large tensor
- t = torch.randn(10000, 10000, device=device)
- # Measure allocated memory
- torch.cuda.synchronize()
- start_max_memory = torch.cuda.max_memory_allocated() / 1024**2
- start_memory = torch.cuda.memory_allocated() / 1024**2
- # Call in-place or normal ReLU
- if inplace:
- F.relu_(t)
- else:
- output = F.relu(t)
- # Measure allocated memory after the call
- torch.cuda.synchronize()
- end_max_memory = torch.cuda.max_memory_allocated() / 1024**2
- end_memory = torch.cuda.memory_allocated() / 1024**2
- # Return amount of memory allocated for ReLU call
- return end_memory - start_memory, end_max_memory - start_max_memory
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement