Untitled

# Import PyTorch
import torch # import main library
import torch.nn as nn # import modules like nn.ReLU()
import torch.nn.functional as F # import torch functions like F.relu() and F.relu_()

def get_memory_allocated(device, inplace = False):
    '''
    Function measures allocated memory before and after the ReLU function call.
    INPUT:
      - device: gpu device to run the operation
      - inplace: True - to run ReLU in-place, False - for normal ReLU call
    '''

    # Create a large tensor
    t = torch.randn(10000, 10000, device=device)

    # Measure allocated memory
    torch.cuda.synchronize()
    start_max_memory = torch.cuda.max_memory_allocated() / 1024**2
    start_memory = torch.cuda.memory_allocated() / 1024**2

    # Call in-place or normal ReLU
    if inplace:
        F.relu_(t)
    else:
        output = F.relu(t)

    # Measure allocated memory after the call
    torch.cuda.synchronize()
    end_max_memory = torch.cuda.max_memory_allocated() / 1024**2
    end_memory = torch.cuda.memory_allocated() / 1024**2

    # Return amount of memory allocated for ReLU call
    return end_memory - start_memory, end_max_memory - start_max_memory