hivefans

yarn_mem_track.sh

Jun 17th, 2019
26
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/bin/bash
  2.  
  3. YARN_ENV=$HADOOP_HOME/etc/hadoop/yarn-env.sh
  4. CONTAINER_PATTERN="container_*"
  5. MEM_GREP_PATTERN="INFO org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl: Memory usage of ProcessTree"
  6.  
  7. # log example
  8. # yarn-wdong-nodemanager-klose1.log:2015-01-05 04:00:21,921 INFO org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl: Memory usage of ProcessTree 27966 for container-id container_1420401494322_0001_01_000001: 78.1 MB of 2 GB physical memory used; 1.6 GB of 4.2 GB virtual memory used
  9.  
  10. # we want to use awk convert this to
  11. # klose1 2015-01-05 04:00:21 TS 27966 container_1420401494322_0001_01_000001 78.1 MB 2 GB 1.6 GB 4.2 GB
  12. # where TS is equivalent timestamp
  13. # $1: yarn-wdong-nodemanager-klose1.log:2015-01-05
  14. # $2: 04:00:21,921
  15. # $9: 27966
  16. # $12: container_1420401494322_0001_01_000001
  17. # $13: 78.1
  18. # $14: MB
  19. # $16: 2
  20. # $17: GB
  21. # $21: 1.6
  22. # $22: GB
  23. # $24: 4.2
  24. # $25: GB
  25.  
  26. # output format is
  27. # washtenaw 2015-01-06 14:56:40 1420574200 16669 container_1420574192658_0001_01_000001 0.274023 9 8.9 18.9
  28.  
  29. # extract node from $1:
  30.  
  31. AWK_PROG='
  32.    function calc_mem (n, unit) {
  33.        if (unit == "B") {
  34.            return n / 1024.0 / 1024.0 / 1024.0;
  35.        }
  36.        if (unit == "KB") {
  37.            return n / 1024.0 / 1024.0;
  38.        }
  39.        if (unit == "MB") {
  40.            return n / 1024.0;
  41.        }
  42.        if (unit == "GB") {
  43.            return n;
  44.        }
  45.        print "Found memory unit of neither MB or GB, do not know what to do." > "/dev/stderr";
  46.        print $0 > "/dev/stderr";
  47.    }
  48.    {
  49.    split($1, arr, "[:]");
  50.    node = arr[1];
  51.    date = arr[2];
  52.    split(node, arr, "[-.]");
  53.    node=arr[4];
  54.    split($2, arr, ",");
  55.    time = arr[1];
  56.    date_time=date  " "  time;
  57.    gsub(/[:-]/, " ", date_time);
  58.    ts = mktime(date_time);
  59.    pid=$9;
  60.    split($12, arr, ":");
  61.    container = arr[1];
  62.    p_use = calc_mem($13, $14);
  63.    p_cap = calc_mem($16, $17);
  64.    v_use = calc_mem($21, $22);
  65.    v_cap = calc_mem($24, $25);
  66.    print node, date, time, ts, pid, container, p_use, p_cap, v_use, v_cap;
  67. } '
  68.  
  69. if [ -z "$1" ]
  70. then
  71.     echo "usage:    $0 app-id app-id ..."
  72.     exit 1
  73. fi
  74.  
  75. if [ ! -f $YARN_ENV ]
  76. then
  77.     echo "yarn-env.sh not found under \$HADOOP_HOME/etc/hadoop/yarn-env.sh"
  78.     echo "\$HADOOP_HOME=$HADOOP_HOME"
  79.     exit 1
  80. fi
  81.  
  82. if [ -z "$HADOOP_YARN_HOME" ]
  83. then
  84.     HADOOP_YARN_HOME=$HADOOP_HOME
  85. fi
  86.  
  87. . $YARN_ENV
  88.  
  89. LOG_DIR=$YARN_LOG_DIR/userlogs/$APP
  90.  
  91. if [ ! -d $LOG_DIR ]
  92. then
  93.     echo "Log for app $APP not found under $LOG_DIR"
  94.     exit 1
  95. fi
  96.  
  97. NC=`find $LOG_DIR/ -type d -name "$CONTAINER_PATTERN" | wc -l`
  98.  
  99. MEMLOG=`mktemp`
  100.  
  101. grep "$MEM_GREP_PATTERN"  $YARN_LOG_DIR/yarn-*.log | sed 's/0B of/0 GB of/g' | awk "$AWK_PROG" > $MEMLOG
  102.  
  103. SUM=`mktemp`
  104.  
  105. while true
  106. do
  107.  
  108. APP=$1
  109.  
  110. shift
  111.  
  112. if [ -z "$APP" ]; then break; fi
  113.  
  114. > $SUM
  115.  
  116. echo $NC containers found for app $APP
  117. NC=1
  118. find $LOG_DIR/ -type d -name "$CONTAINER_PATTERN" | sort | while read C
  119. do
  120.     CID=`basename $C`
  121.     printf "%s: " $CID
  122.     NC=$((NC+1))
  123.     grep $CID $MEMLOG | cut -f 7,8 -d ' ' | awk 'BEGIN{m=0;c=0;}{if ($1 > m) {m = $1;} c=$2;}END{print $1, $2}' | while read U C
  124.     do
  125.         echo $U of $C GB
  126.         echo $U $C >> $SUM
  127.     done
  128. done
  129.  
  130. awk 'BEGIN{u=0;c=0;}{u+=$1;c+=$2;}END{print u, c;}' $SUM | while read U C
  131. do
  132.     echo Total: $U of $C GB.
  133. done
  134.  
  135. echo
  136. done
  137.  
  138. rm $MEMLOG
RAW Paste Data