Advertisement
Guest User

Untitled

a guest
Feb 27th, 2019
169
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.06 KB | None | 0 0
  1. w#!/bin/bash -eu
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. #
  15. # Author: Michael Spector <spektom@gmail.com>
  16.  
  17. trap 'kill $(jobs -p) 2>/dev/null' EXIT
  18.  
  19. function find_unused_port() {
  20. for port in $(seq $1 65000); do
  21. if [ "$(uname)" == "Darwin" ]; then
  22. nc -nz 127.0.0.1 $port >/dev/null 2>&1;
  23. else
  24. echo -ne "\035" | telnet 127.0.0.1 $port >/dev/null 2>&1;
  25. fi
  26. if [ $? -eq 1 ]; then
  27. echo $port
  28. exit
  29. fi
  30. done
  31. echo "ERROR: Can't find unused port in range $1-65000"
  32. exit 1
  33. }
  34.  
  35. function install_deps() {
  36. for cmd in python2.7 perl pip; do
  37. if ! which $cmd >/dev/null 2>&1; then
  38. echo "ERROR: $cmd is not installed!"
  39. exit 1
  40. fi
  41. done
  42.  
  43. echo -e "[$(date -Ins)] Installing dependencies"
  44. [ ! -d $install_dir ] && mkdir $install_dir
  45. pushd $install_dir >/dev/null
  46. pip -q install --user influxdb blist pytz
  47.  
  48. wget -qc https://raw.githubusercontent.com/aviemzur/statsd-jvm-profiler/master/visualization/influxdb_dump.py
  49. wget -qc https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl
  50.  
  51. if [ "$(uname)" == "Darwin" ]; then
  52. brew list influxdb &> /dev/null || brew install influxdb
  53. else
  54. wget -qc https://dl.influxdata.com/influxdb/releases/influxdb-1.2.4_linux_amd64.tar.gz
  55. tar -xzf influxdb-1.2.4_linux_amd64.tar.gz
  56. rm -f influxdb
  57. ln -s influxdb-1.2.4-1 influxdb
  58. export PATH=$install_dir/influxdb/usr/bin:$PATH
  59. fi
  60.  
  61. wget -qc https://github.com/etsy/statsd-jvm-profiler/releases/download/2.1.0/statsd-jvm-profiler-2.1.0-jar-with-dependencies.jar
  62. wget -qc https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/jarjar/jarjar-1.4.jar
  63.  
  64. if [ ! -f statsd-jvm-profiler-2.1.0-shaded.jar ]; then
  65. echo -e "[$(date -Ins)] Shading possibly conflicing Java packages in statsd-jvm-profiler.jar"
  66. cat << EOF >jarjar-rules.txt
  67. rule com.timgroup.** com.timgroup.sparkflamegraph.@1
  68. rule com.google.guava.** com.google.guava.sparkflamegraph.@1
  69. rule org.influxdb.** org.influxdb.sparkflamegraph.@1
  70. EOF
  71. java -jar jarjar-1.4.jar process jarjar-rules.txt statsd-jvm-profiler-2.1.0-jar-with-dependencies.jar statsd-jvm-profiler-2.1.0-shaded.jar
  72. fi
  73. ln -sf statsd-jvm-profiler-2.1.0-shaded.jar statsd-jvm-profiler.jar
  74.  
  75. popd >/dev/null
  76. }
  77.  
  78. function run_influxdb() {
  79. echo -e "[$(date -Ins)] Starting InfluxDB"
  80. cat << EOF >influxdb.conf
  81. reporting-disabled = true
  82. hostname = "${local_ip}"
  83. bind-address = ":${influx_meta_port}"
  84. [meta]
  85. dir = "$(pwd)/influxdb/meta"
  86. [data]
  87. dir = "$(pwd)/influxdb/data"
  88. wal-dir = "$(pwd)/influxdb/wal"
  89. [admin]
  90. enabled = false
  91. [http]
  92. bind-address = ":${influx_http_port}"
  93. EOF
  94. rm -rf influxdb
  95. influxd -config influxdb.conf >influxdb.log 2>&1 &
  96.  
  97. wait_secs=5
  98. while [ $wait_secs -gt 0 ]; do
  99. if curl -sS -i $influx_uri/ping 2>/dev/null | grep X-Influxdb-Version >/dev/null; then
  100. break
  101. fi
  102. sleep 1
  103. wait_secs=$(($wait_secs-1))
  104. done
  105.  
  106. if [ $wait_secs -eq 0 ]; then
  107. echo "ERROR: Couldn't start InfluxDB!"
  108. exit 1
  109. fi
  110.  
  111. curl -fsS -X POST $influx_uri/query \
  112. --data-urlencode "q=CREATE DATABASE profiler" >/dev/null
  113.  
  114. curl -fsS -X POST $influx_uri/query \
  115. --data-urlencode "q=CREATE USER profiler WITH PASSWORD 'profiler' WITH ALL PRIVILEGES" >/dev/null
  116. }
  117.  
  118. function run_spark_submit() {
  119. spark_args=()
  120.  
  121. jars=$install_dir/statsd-jvm-profiler.jar
  122.  
  123. executor_java_opts="-javaagent:statsd-jvm-profiler.jar=server=${local_ip},\
  124. port=${influx_http_port},reporter=InfluxDBReporter,database=profiler,\
  125. username=profiler,password=profiler,prefix=sparkapp,tagMapping=spark"
  126.  
  127. driver_java_opts="-javaagent:statsd-jvm-profiler.jar=server=${local_ip},\
  128. port=${influx_http_port},reporter=InfluxDBReporter,database=profiler,username=profiler,\
  129. password=profiler,prefix=sparkapp,tagMapping=spark"
  130.  
  131. while [[ $# > 0 ]]; do
  132. case "$1" in
  133. --jars)
  134. jars="$jars,$2"
  135. shift
  136. ;;
  137. spark.executor.extraJavaOptions=*)
  138. spark_args+=("$1 ${executor_java_opts}")
  139. executor_java_opts_patched=true
  140. ;;
  141. spark.driver.extraJavaOptions=*)
  142. spark_args+=("$1 ${driver_java_opts}")
  143. driver_java_opts_patched=true
  144. ;;
  145. *)
  146. spark_args+=("$1")
  147. [[ "$1" == *.jar ]] && flamegraph_title="$1"
  148. ;;
  149. esac
  150. shift
  151. done
  152.  
  153. spark_cmd=("${spark_cmd}")
  154. spark_cmd+=(--jars)
  155. spark_cmd+=("${jars}")
  156.  
  157. if [ -z ${executor_java_opts_patched+x} ]; then
  158. spark_cmd+=(--conf)
  159. spark_cmd+=("spark.executor.extraJavaOptions=-javaagent:statsd-jvm-profiler.jar\
  160. =server=${local_ip},port=${influx_http_port},reporter=InfluxDBReporter,database=profiler,\
  161. username=profiler,password=profiler,prefix=sparkapp,tagMapping=spark")
  162. fi
  163.  
  164. if [ -z ${driver_java_opts_patched+x} ]; then
  165. spark_cmd+=(--conf)
  166. spark_cmd+=("spark.driver.extraJavaOptions=-javaagent:statsd-jvm-profiler.jar\
  167. =server=${local_ip},port=${influx_http_port},reporter=InfluxDBReporter,database=profiler,\
  168. username=profiler,password=profiler,prefix=sparkapp,tagMapping=spark")
  169. fi
  170.  
  171. if [ "${#spark_args[@]}" -gt 0 ]; then
  172. spark_cmd+=("${spark_args[@]}")
  173. fi
  174.  
  175. echo -e "[$(date -Ins)] Executing: ${spark_cmd[@]}"
  176. "${spark_cmd[@]}" && :
  177. spark_exit_code=$?
  178. if [ $spark_exit_code -ne 0 ]; then
  179. echo -e "[$(date -Ins)] Spark has exited with bad exit code ($spark_exit_code)"
  180. fi
  181. }
  182.  
  183. function generate_flamegraph() {
  184. rm -rf stack_traces
  185. echo -e "[$(date -Ins)] Collecting profiling metrics"
  186.  
  187. python2.7 $install_dir/influxdb_dump.py \
  188. -o $local_ip -r $influx_http_port -u profiler \
  189. -p profiler -d profiler -t spark -e sparkapp -x stack_traces >/dev/null 2>&1 && :
  190.  
  191. if [ $? -ne 0 ]; then
  192. echo -e "[$(date -Ins)] No profiling metrics were recorded!"
  193. return 0
  194. fi
  195.  
  196. perl $install_dir/flamegraph.pl \
  197. --title "$flamegraph_title" \
  198. stack_traces/all_*.txt > flamegraph.svg
  199.  
  200. rm -rf stack_traces
  201. echo -e "[$(date -Ins)] Created flamegraph: $(pwd)/flamegraph.svg"
  202. }
  203.  
  204. if [ "$(uname)" == "Darwin" ]; then
  205. local_ip=$(ifconfig | awk '/inet / && $2 != "127.0.0.1"{print $2; exit}')
  206. else
  207. local_ip=$(ip route get 1.1.1.1 | awk '{print $NF; exit}')
  208. fi
  209. install_dir=$HOME/.spark-flamegraph
  210. influx_meta_port=$(find_unused_port 48080)
  211. influx_http_port=$(find_unused_port $(($influx_meta_port+1)))
  212. influx_uri=http://${local_ip}:${influx_http_port}
  213. flamegraph_title="Spark Application"
  214. spark_cmd=${SPARK_CMD:-"spark-submit"}
  215.  
  216. install_deps
  217. run_influxdb
  218. run_spark_submit "$@"
  219. generate_flamegraph
  220.  
  221. exit $spark_exit_code
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement