Advertisement
Guest User

Untitled

a guest
Sep 23rd, 2019
102
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 18.99 KB | None | 0 0
  1. #!/usr/bin/python
  2. #
  3. # memleak Trace and display outstanding allocations to detect
  4. # memory leaks in user-mode processes and the kernel.
  5. #
  6. # USAGE: memleak [-h] [-p PID] [-t] [-a] [-o OLDER] [-c COMMAND]
  7. # [--combined-only] [-s SAMPLE_RATE] [-T TOP] [-z MIN_SIZE]
  8. # [-Z MAX_SIZE] [-O OBJ]
  9. # [interval] [count]
  10. #
  11. # Licensed under the Apache License, Version 2.0 (the "License")
  12. # Copyright (C) 2016 Sasha Goldshtein.
  13.  
  14. from bcc import BPF
  15. from time import sleep
  16. from datetime import datetime
  17. import resource
  18. import argparse
  19. import subprocess
  20. import os
  21. import sys
  22.  
  23. class Allocation(object):
  24. def __init__(self, stack, size):
  25. self.stack = stack
  26. self.count = 1
  27. self.size = size
  28.  
  29. def update(self, size):
  30. self.count += 1
  31. self.size += size
  32.  
  33. def run_command_get_output(command):
  34. p = subprocess.Popen(command.split(),
  35. stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
  36. return iter(p.stdout.readline, b'')
  37.  
  38. def run_command_get_pid(command):
  39. p = subprocess.Popen(command.split())
  40. return p.pid
  41.  
  42. examples = """
  43. EXAMPLES:
  44.  
  45. ./memleak -p $(pidof allocs)
  46. Trace allocations and display a summary of "leaked" (outstanding)
  47. allocations every 5 seconds
  48. ./memleak -p $(pidof allocs) -t
  49. Trace allocations and display each individual allocator function call
  50. ./memleak -ap $(pidof allocs) 10
  51. Trace allocations and display allocated addresses, sizes, and stacks
  52. every 10 seconds for outstanding allocations
  53. ./memleak -c "./allocs"
  54. Run the specified command and trace its allocations
  55. ./memleak
  56. Trace allocations in kernel mode and display a summary of outstanding
  57. allocations every 5 seconds
  58. ./memleak -o 60000
  59. Trace allocations in kernel mode and display a summary of outstanding
  60. allocations that are at least one minute (60 seconds) old
  61. ./memleak -s 5
  62. Trace roughly every 5th allocation, to reduce overhead
  63. """
  64.  
  65. description = """
  66. Trace outstanding memory allocations that weren't freed.
  67. Supports both user-mode allocations made with libc functions and kernel-mode
  68. allocations made with kmalloc/kmem_cache_alloc/get_free_pages and corresponding
  69. memory release functions.
  70. """
  71.  
  72. parser = argparse.ArgumentParser(description=description,
  73. formatter_class=argparse.RawDescriptionHelpFormatter,
  74. epilog=examples)
  75. parser.add_argument("-p", "--pid", type=int, default=-1,
  76. help="the PID to trace; if not specified, trace kernel allocs")
  77. parser.add_argument("-t", "--trace", action="store_true",
  78. help="print trace messages for each alloc/free call")
  79. parser.add_argument("interval", nargs="?", default=5, type=int,
  80. help="interval in seconds to print outstanding allocations")
  81. parser.add_argument("count", nargs="?", type=int,
  82. help="number of times to print the report before exiting")
  83. parser.add_argument("-a", "--show-allocs", default=False, action="store_true",
  84. help="show allocation addresses and sizes as well as call stacks")
  85. parser.add_argument("-o", "--older", default=500, type=int,
  86. help="prune allocations younger than this age in milliseconds")
  87. parser.add_argument("-c", "--command",
  88. help="execute and trace the specified command")
  89. parser.add_argument("--combined-only", default=False, action="store_true",
  90. help="show combined allocation statistics only")
  91. parser.add_argument("-s", "--sample-rate", default=1, type=int,
  92. help="sample every N-th allocation to decrease the overhead")
  93. parser.add_argument("-T", "--top", type=int, default=10,
  94. help="display only this many top allocating stacks (by size)")
  95. parser.add_argument("-z", "--min-size", type=int,
  96. help="capture only allocations larger than this size")
  97. parser.add_argument("-Z", "--max-size", type=int,
  98. help="capture only allocations smaller than this size")
  99. parser.add_argument("-O", "--obj", type=str, default="c",
  100. help="attach to allocator functions in the specified object")
  101. parser.add_argument("--ebpf", action="store_true",
  102. help=argparse.SUPPRESS)
  103. parser.add_argument("--percpu", default=False, action="store_true",
  104. help="trace percpu allocations")
  105.  
  106. args = parser.parse_args()
  107.  
  108. pid = args.pid
  109. command = args.command
  110. kernel_trace = (pid == -1 and command is None)
  111. trace_all = args.trace
  112. interval = args.interval
  113. min_age_ns = 1e6 * args.older
  114. sample_every_n = args.sample_rate
  115. num_prints = args.count
  116. top_stacks = args.top
  117. min_size = args.min_size
  118. max_size = args.max_size
  119. obj = args.obj
  120.  
  121. if min_size is not None and max_size is not None and min_size > max_size:
  122. print("min_size (-z) can't be greater than max_size (-Z)")
  123. exit(1)
  124.  
  125. if command is not None:
  126. print("Executing '%s' and tracing the resulting process." % command)
  127. pid = run_command_get_pid(command)
  128.  
  129. bpf_source = """
  130. #include <uapi/linux/ptrace.h>
  131.  
  132. struct alloc_info_t {
  133. u64 size;
  134. u64 timestamp_ns;
  135. int stack_id;
  136. };
  137.  
  138. struct combined_alloc_info_t {
  139. u64 total_size;
  140. u64 number_of_allocs;
  141. };
  142.  
  143. BPF_HASH(sizes, u64);
  144. BPF_HASH(allocs, u64, struct alloc_info_t, 1000000);
  145. BPF_HASH(memptrs, u64, u64);
  146. BPF_STACK_TRACE(stack_traces, 10240);
  147. BPF_HASH(combined_allocs, u64, struct combined_alloc_info_t, 10240);
  148.  
  149. static inline void update_statistics_add(u64 stack_id, u64 sz) {
  150. struct combined_alloc_info_t *existing_cinfo;
  151. struct combined_alloc_info_t cinfo = {0};
  152.  
  153. existing_cinfo = combined_allocs.lookup(&stack_id);
  154. if (existing_cinfo != 0)
  155. cinfo = *existing_cinfo;
  156.  
  157. cinfo.total_size += sz;
  158. cinfo.number_of_allocs += 1;
  159.  
  160. combined_allocs.update(&stack_id, &cinfo);
  161. }
  162.  
  163. static inline void update_statistics_del(u64 stack_id, u64 sz) {
  164. struct combined_alloc_info_t *existing_cinfo;
  165. struct combined_alloc_info_t cinfo = {0};
  166.  
  167. existing_cinfo = combined_allocs.lookup(&stack_id);
  168. if (existing_cinfo != 0)
  169. cinfo = *existing_cinfo;
  170.  
  171. if (sz >= cinfo.total_size)
  172. cinfo.total_size = 0;
  173. else
  174. cinfo.total_size -= sz;
  175.  
  176. if (cinfo.number_of_allocs > 0)
  177. cinfo.number_of_allocs -= 1;
  178.  
  179. combined_allocs.update(&stack_id, &cinfo);
  180. }
  181.  
  182. static inline int gen_alloc_enter(struct pt_regs *ctx, size_t size) {
  183. SIZE_FILTER
  184. if (SAMPLE_EVERY_N > 1) {
  185. u64 ts = bpf_ktime_get_ns();
  186. if (ts % SAMPLE_EVERY_N != 0)
  187. return 0;
  188. }
  189.  
  190. u64 pid = bpf_get_current_pid_tgid();
  191. u64 size64 = size;
  192. sizes.update(&pid, &size64);
  193.  
  194. if (SHOULD_PRINT)
  195. bpf_trace_printk("alloc entered, size = %u\\n", size);
  196. return 0;
  197. }
  198.  
  199. static inline int gen_alloc_exit2(struct pt_regs *ctx, u64 address) {
  200. u64 pid = bpf_get_current_pid_tgid();
  201. u64* size64 = sizes.lookup(&pid);
  202. struct alloc_info_t info = {0};
  203.  
  204. if (size64 == 0)
  205. return 0; // missed alloc entry
  206.  
  207. info.size = *size64;
  208. sizes.delete(&pid);
  209.  
  210. info.timestamp_ns = bpf_ktime_get_ns();
  211. info.stack_id = stack_traces.get_stackid(ctx, STACK_FLAGS);
  212. allocs.update(&address, &info);
  213. update_statistics_add(info.stack_id, info.size);
  214.  
  215. if (SHOULD_PRINT) {
  216. bpf_trace_printk("alloc exited, size = %lu, result = %lx\\n",
  217. info.size, address);
  218. }
  219. return 0;
  220. }
  221.  
  222. static inline int gen_alloc_exit(struct pt_regs *ctx) {
  223. return gen_alloc_exit2(ctx, PT_REGS_RC(ctx));
  224. }
  225.  
  226. static inline int gen_free_enter(struct pt_regs *ctx, void *address) {
  227. u64 addr = (u64)address;
  228. struct alloc_info_t *info = allocs.lookup(&addr);
  229. if (info == 0)
  230. return 0;
  231.  
  232. allocs.delete(&addr);
  233. update_statistics_del(info->stack_id, info->size);
  234.  
  235. if (SHOULD_PRINT) {
  236. bpf_trace_printk("free entered, address = %lx, size = %lu\\n",
  237. address, info->size);
  238. }
  239. return 0;
  240. }
  241.  
  242. int malloc_enter(struct pt_regs *ctx, size_t size) {
  243. return gen_alloc_enter(ctx, size);
  244. }
  245.  
  246. int malloc_exit(struct pt_regs *ctx) {
  247. return gen_alloc_exit(ctx);
  248. }
  249.  
  250. int free_enter(struct pt_regs *ctx, void *address) {
  251. return gen_free_enter(ctx, address);
  252. }
  253.  
  254. int calloc_enter(struct pt_regs *ctx, size_t nmemb, size_t size) {
  255. return gen_alloc_enter(ctx, nmemb * size);
  256. }
  257.  
  258. int calloc_exit(struct pt_regs *ctx) {
  259. return gen_alloc_exit(ctx);
  260. }
  261.  
  262. int realloc_enter(struct pt_regs *ctx, void *ptr, size_t size) {
  263. gen_free_enter(ctx, ptr);
  264. return gen_alloc_enter(ctx, size);
  265. }
  266.  
  267. int realloc_exit(struct pt_regs *ctx) {
  268. return gen_alloc_exit(ctx);
  269. }
  270.  
  271. int posix_memalign_enter(struct pt_regs *ctx, void **memptr, size_t alignment,
  272. size_t size) {
  273. u64 memptr64 = (u64)(size_t)memptr;
  274. u64 pid = bpf_get_current_pid_tgid();
  275.  
  276. memptrs.update(&pid, &memptr64);
  277. return gen_alloc_enter(ctx, size);
  278. }
  279.  
  280. int posix_memalign_exit(struct pt_regs *ctx) {
  281. u64 pid = bpf_get_current_pid_tgid();
  282. u64 *memptr64 = memptrs.lookup(&pid);
  283. void *addr;
  284.  
  285. if (memptr64 == 0)
  286. return 0;
  287.  
  288. memptrs.delete(&pid);
  289.  
  290. if (bpf_probe_read(&addr, sizeof(void*), (void*)(size_t)*memptr64))
  291. return 0;
  292.  
  293. u64 addr64 = (u64)(size_t)addr;
  294. return gen_alloc_exit2(ctx, addr64);
  295. }
  296.  
  297. int aligned_alloc_enter(struct pt_regs *ctx, size_t alignment, size_t size) {
  298. return gen_alloc_enter(ctx, size);
  299. }
  300.  
  301. int aligned_alloc_exit(struct pt_regs *ctx) {
  302. return gen_alloc_exit(ctx);
  303. }
  304.  
  305. int valloc_enter(struct pt_regs *ctx, size_t size) {
  306. return gen_alloc_enter(ctx, size);
  307. }
  308.  
  309. int valloc_exit(struct pt_regs *ctx) {
  310. return gen_alloc_exit(ctx);
  311. }
  312.  
  313. int memalign_enter(struct pt_regs *ctx, size_t alignment, size_t size) {
  314. return gen_alloc_enter(ctx, size);
  315. }
  316.  
  317. int memalign_exit(struct pt_regs *ctx) {
  318. return gen_alloc_exit(ctx);
  319. }
  320.  
  321. int pvalloc_enter(struct pt_regs *ctx, size_t size) {
  322. return gen_alloc_enter(ctx, size);
  323. }
  324.  
  325. int pvalloc_exit(struct pt_regs *ctx) {
  326. return gen_alloc_exit(ctx);
  327. }
  328.  
  329. int mallocx_enter(struct pt_regs *ctx, size_t size, int flag) {
  330. return gen_alloc_enter(ctx, size);
  331. }
  332.  
  333. int mallocx_exit(struct pt_regs *ctx) {
  334. return gen_alloc_exit(ctx);
  335. }
  336.  
  337. int sdallocx_enter(struct pt_regs *ctx, void *address, size_t size, int flag) {
  338. return gen_free_enter(ctx, address);
  339. }
  340.  
  341. int rallocx_enter(struct pt_regs *ctx, void *ptr, size_t size, int flag) {
  342. gen_free_enter(ctx, ptr);
  343. return gen_alloc_enter(ctx, size);
  344. }
  345.  
  346. int rallocx_exit(struct pt_regs *ctx) {
  347. return gen_alloc_exit(ctx);
  348. }
  349. """
  350.  
  351. bpf_source_kernel = """
  352.  
  353. TRACEPOINT_PROBE(kmem, kmalloc) {
  354. gen_alloc_enter((struct pt_regs *)args, args->bytes_alloc);
  355. return gen_alloc_exit2((struct pt_regs *)args, (size_t)args->ptr);
  356. }
  357.  
  358. TRACEPOINT_PROBE(kmem, kmalloc_node) {
  359. gen_alloc_enter((struct pt_regs *)args, args->bytes_alloc);
  360. return gen_alloc_exit2((struct pt_regs *)args, (size_t)args->ptr);
  361. }
  362.  
  363. TRACEPOINT_PROBE(kmem, kfree) {
  364. return gen_free_enter((struct pt_regs *)args, (void *)args->ptr);
  365. }
  366.  
  367. TRACEPOINT_PROBE(kmem, kmem_cache_alloc) {
  368. gen_alloc_enter((struct pt_regs *)args, args->bytes_alloc);
  369. return gen_alloc_exit2((struct pt_regs *)args, (size_t)args->ptr);
  370. }
  371.  
  372. TRACEPOINT_PROBE(kmem, kmem_cache_alloc_node) {
  373. gen_alloc_enter((struct pt_regs *)args, args->bytes_alloc);
  374. return gen_alloc_exit2((struct pt_regs *)args, (size_t)args->ptr);
  375. }
  376.  
  377. TRACEPOINT_PROBE(kmem, kmem_cache_free) {
  378. return gen_free_enter((struct pt_regs *)args, (void *)args->ptr);
  379. }
  380.  
  381. TRACEPOINT_PROBE(kmem, mm_page_alloc) {
  382. gen_alloc_enter((struct pt_regs *)args, PAGE_SIZE << args->order);
  383. return gen_alloc_exit2((struct pt_regs *)args, args->pfn);
  384. }
  385.  
  386. TRACEPOINT_PROBE(kmem, mm_page_free) {
  387. return gen_free_enter((struct pt_regs *)args, (void *)args->pfn);
  388. }
  389. """
  390.  
  391. bpf_source_percpu = """
  392.  
  393. TRACEPOINT_PROBE(percpu, percpu_alloc_percpu) {
  394. gen_alloc_enter((struct pt_regs *)args, args->size);
  395. return gen_alloc_exit2((struct pt_regs *)args, (size_t)args->ptr);
  396. }
  397.  
  398. TRACEPOINT_PROBE(percpu, percpu_free_percpu) {
  399. return gen_free_enter((struct pt_regs *)args, (void *)args->ptr);
  400. }
  401. """
  402.  
  403. if kernel_trace:
  404. if args.percpu:
  405. bpf_source += bpf_source_percpu
  406. else:
  407. bpf_source += bpf_source_kernel
  408.  
  409. bpf_source = bpf_source.replace("SHOULD_PRINT", "1" if trace_all else "0")
  410. bpf_source = bpf_source.replace("SAMPLE_EVERY_N", str(sample_every_n))
  411. bpf_source = bpf_source.replace("PAGE_SIZE", str(resource.getpagesize()))
  412.  
  413. size_filter = ""
  414. if min_size is not None and max_size is not None:
  415. size_filter = "if (size < %d || size > %d) return 0;" % \
  416. (min_size, max_size)
  417. elif min_size is not None:
  418. size_filter = "if (size < %d) return 0;" % min_size
  419. elif max_size is not None:
  420. size_filter = "if (size > %d) return 0;" % max_size
  421. bpf_source = bpf_source.replace("SIZE_FILTER", size_filter)
  422.  
  423. stack_flags = "BPF_F_REUSE_STACKID"
  424. if not kernel_trace:
  425. stack_flags += "|BPF_F_USER_STACK"
  426. bpf_source = bpf_source.replace("STACK_FLAGS", stack_flags)
  427.  
  428. if args.ebpf:
  429. print(bpf_source)
  430. exit()
  431.  
  432. bpf = BPF(text=bpf_source)
  433.  
  434. if not kernel_trace:
  435. print("Attaching to pid %d, Ctrl+C to quit." % pid)
  436.  
  437. def attach_probes(sym, fn_prefix=None, can_fail=False):
  438. if fn_prefix is None:
  439. fn_prefix = sym
  440.  
  441. try:
  442. bpf.attach_uprobe(name=obj, sym=sym,
  443. fn_name=fn_prefix + "_enter",
  444. pid=pid)
  445. bpf.attach_uretprobe(name=obj, sym=sym,
  446. fn_name=fn_prefix + "_exit",
  447. pid=pid)
  448. except Exception:
  449. if can_fail:
  450. return
  451. else:
  452. raise
  453.  
  454. attach_probes("malloc")
  455. attach_probes("calloc")
  456. attach_probes("realloc")
  457. attach_probes("posix_memalign")
  458. attach_probes("valloc")
  459. attach_probes("memalign")
  460. # attach_probes("pvalloc")
  461. attach_probes("aligned_alloc", can_fail=True) # added in C11
  462. bpf.attach_uprobe(name=obj, sym="free", fn_name="free_enter",
  463. pid=pid)
  464.  
  465. attach_probes("mallocx")
  466. attach_probes("rallocx")
  467. bpf.attach_uprobe(name=obj, sym="sdallocx", fn_name="sdallocx_enter",
  468. pid=pid)
  469.  
  470. else:
  471. print("Attaching to kernel allocators, Ctrl+C to quit.")
  472.  
  473. # No probe attaching here. Allocations are counted by attaching to
  474. # tracepoints.
  475. #
  476. # Memory allocations in Linux kernel are not limited to malloc/free
  477. # equivalents. It's also common to allocate a memory page or multiple
  478. # pages. Page allocator have two interfaces, one working with page
  479. # frame numbers (PFN), while other working with page addresses. It's
  480. # possible to allocate pages with one kind of functions, and free them
  481. # with another. Code in kernel can easy convert PFNs to addresses and
  482. # back, but it's hard to do the same in eBPF kprobe without fragile
  483. # hacks.
  484. #
  485. # Fortunately, Linux exposes tracepoints for memory allocations, which
  486. # can be instrumented by eBPF programs. Tracepoint for page allocations
  487. # gives access to PFNs for both allocator interfaces. So there is no
  488. # need to guess which allocation corresponds to which free.
  489.  
  490. def print_outstanding():
  491. print("[%s] Top %d stacks with outstanding allocations:" %
  492. (datetime.now().strftime("%H:%M:%S"), top_stacks))
  493. alloc_info = {}
  494. allocs = bpf["allocs"]
  495. stack_traces = bpf["stack_traces"]
  496. for address, info in sorted(allocs.items(), key=lambda a: a[1].size):
  497. if BPF.monotonic_time() - min_age_ns < info.timestamp_ns:
  498. continue
  499. if info.stack_id < 0:
  500. continue
  501. if info.stack_id in alloc_info:
  502. alloc_info[info.stack_id].update(info.size)
  503. else:
  504. stack = list(stack_traces.walk(info.stack_id))
  505. combined = []
  506. for addr in stack:
  507. combined.append(bpf.sym(addr, pid,
  508. show_module=True, show_offset=True))
  509. alloc_info[info.stack_id] = Allocation(combined,
  510. info.size)
  511. if args.show_allocs:
  512. print("\taddr = %x size = %s" %
  513. (address.value, info.size))
  514. to_show = sorted(alloc_info.values(),
  515. key=lambda a: a.size)[-top_stacks:]
  516. for alloc in to_show:
  517. print("\t%d bytes in %d allocations from stack\n\t\t%s" %
  518. (alloc.size, alloc.count,
  519. b"\n\t\t".join(alloc.stack).decode("ascii")))
  520.  
  521. def print_outstanding_combined():
  522. stack_traces = bpf["stack_traces"]
  523. stacks = sorted(bpf["combined_allocs"].items(),
  524. key=lambda a: -a[1].total_size)
  525. cnt = 1
  526. entries = []
  527. for stack_id, info in stacks:
  528. try:
  529. trace = []
  530. for addr in stack_traces.walk(stack_id.value):
  531. sym = bpf.sym(addr, pid,
  532. show_module=True,
  533. show_offset=True)
  534. trace.append(sym)
  535. trace = "\n\t\t".join(trace)
  536. except KeyError:
  537. trace = "stack information lost"
  538.  
  539. entry = ("\t%d bytes in %d allocations from stack\n\t\t%s" %
  540. (info.total_size, info.number_of_allocs, trace))
  541. entries.append(entry)
  542.  
  543. cnt += 1
  544. if cnt > top_stacks:
  545. break
  546.  
  547. print("[%s] Top %d stacks with outstanding allocations:" %
  548. (datetime.now().strftime("%H:%M:%S"), top_stacks))
  549.  
  550. print('\n'.join(reversed(entries)))
  551.  
  552. count_so_far = 0
  553. while True:
  554. if trace_all:
  555. print(bpf.trace_fields())
  556. else:
  557. try:
  558. sleep(interval)
  559. except KeyboardInterrupt:
  560. exit()
  561. if args.combined_only:
  562. print_outstanding_combined()
  563. else:
  564. print_outstanding()
  565. sys.stdout.flush()
  566. count_so_far += 1
  567. if num_prints is not None and count_so_far >= num_prints:
  568. exit()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement