Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import argparse
- import os
- import subprocess
- import sys
- import torch
- def main():
- parser = argparse.ArgumentParser()
- parser.add_argument("--batch", type=int, default=256)
- parser.add_argument("--img-size", type=int, default=32)
- parser.add_argument("--in-channels", type=int, default=128)
- parser.add_argument("--out-channels", type=int, default=128)
- parser.add_argument("--kernel-size", type=int, default=3)
- parser.add_argument("--groups", type=bool, default=True)
- parser.add_argument("--internal", action="store_true")
- args = parser.parse_args()
- if args.internal:
- internal(args)
- else:
- wrapper(args)
- def internal(args):
- # Profiling is switched on: do convolution.
- assert torch.cuda.is_available()
- torch.backends.cudnn.benchmark = True
- groups = args.in_channels if args.groups else 1
- conv = torch.nn.Conv2d(args.in_channels, args.out_channels,
- args.kernel_size, groups = groups)
- conv = conv.cuda()
- data = torch.Tensor(args.batch, args.in_channels, args.img_size,
- args.img_size)
- data = torch.autograd.Variable(data.cuda())
- with torch.cuda.profiler.profile():
- conv(data) # Warmup CUDA memory allocator and profiler
- with torch.autograd.profiler.emit_nvtx():
- conv(data)
- def wrapper(args):
- # Switch profiling on, run this script again, and interpret the results.
- command = "nvprof --profile-from-start off -o trace.prof -- "
- command += "python3 " + " ".join(sys.argv[:]) + " --internal"
- print(command)
- subprocess.run(command, shell=True)
- # Now read the trace file.
- profile = torch.autograd.profiler.load_nvprof("trace.prof")
- print(profile)
- os.remove("trace.prof")
- if __name__ == "__main__":
- main()
Add Comment
Please, Sign In to add comment