Advertisement
Guest User

Untitled

a guest
Jan 28th, 2016
81
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.68 KB | None | 0 0
  1. #!/bin/bash
  2. #set -xv
  3. #
  4. SSH="sudo ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o ConnectTimeout=5 -o PreferredAuthentications=publickey -o PasswordAuthentication=no -q"
  5. SCP="sudo scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o ConnectTimeout=5 -o PreferredAuthentications=publickey -o PasswordAuthentication=no -q"
  6. MSQ="mysql -h ccdndb.sys.comcast.net --skip-column-names --batch --protocol=TCP --port=3306 --user=ccdn_tools --password=ccdn_tools --database=ccdn -e"
  7. SCRIPTNAME=`basename $0`
  8. SCRIPTPATH=/usr/local/ccdn/teak_bonding
  9. RANDOMNESS=`uuidgen -r`
  10.  
  11. function usage() {
  12.  
  13. cat << EOF
  14.  
  15. This script is used to facilitate the addition of a second 10G interface to a Teak node
  16.  
  17. Options:
  18.  
  19. -p : initiate a pre-check against the node
  20. -P : initiate a post-check against the node
  21. -c : cutover the node - the node will be converted to a dual 10G setup
  22. -h : the node against which we perform any action
  23. EOF
  24. exit 1
  25. }
  26.  
  27. function update_db_pre_status() {
  28.  
  29. # $1 hostname
  30. # $2 message
  31. echo "$1 - $2"
  32. ${MSQ} "update teak_pre_load set onecache_prenotes=\"${2}\" where hostname=\"${1}\";"
  33.  
  34. }
  35.  
  36. function update_db_post_status() {
  37.  
  38. # $1 hostname
  39. # $2 message
  40. echo "$1 - $2"
  41. ${MSQ} "update teak_pre_load set onecache_postnotes=\"${2}\" where hostname=\"${1}\";"
  42.  
  43. }
  44.  
  45. function precheck() {
  46.  
  47. # $1 hostname
  48. # $2 extensive
  49. local host=${1}
  50. local extensive=$2
  51. echo "##########"
  52. echo "Performing check on ${fqdn} with DRAC of https://${dracip}"
  53. echo
  54.  
  55. #ssh check
  56. local sshret=`${SSH} ${host} uname -m `
  57.  
  58. if [[ ! ${sshret} =~ x86_64 ]]
  59. then
  60. update_db_pre_status $host "${fqdn} does not appear to be up right now - we could not log to it."
  61. return 2
  62. else
  63. #echo "OK, host is up and responding to ssh."
  64. :
  65.  
  66. fi
  67.  
  68. # drac ssh check
  69. nc -w 3 -z ${dracip} 443 > /dev/null 2>&1
  70.  
  71. if [[ $? -ne 0 ]]
  72. then
  73. update_db_pre_status $host "DRAC (${dracip}) does not appear to be responding for ${fqdn}."
  74. return 2
  75. else
  76. #echo "OK, we can ssh into the DRAC (${dracip})."
  77. :
  78. fi
  79.  
  80. #still streaming check
  81. local streamcount=`${SSH} $host "netstat -na | egrep -v 'LISTEN|TIME_WAIT|CLOSE_WAIT' | egrep ${myip}':80[^0-9]' | wc -l"`
  82.  
  83. echo "${fqdn} - stream count : ${streamcount}"
  84.  
  85. if [[ ${extensive} -eq 1 ]] && [[ ${streamcount} -gt 0 ]]
  86. then
  87. update_db_pre_status $host "Not performing bonding change - we still have ${streamcount} streams active on ${host}."
  88. return 2
  89. fi
  90.  
  91. #check other nodes are up
  92. if [[ ${extensive} -eq 1 ]]
  93. then
  94. declare -a peers
  95.  
  96. peers=`${SSH} ${host} "grep node= /opt/teak/etc/teak.config" | grep -v ${myip} | awk -F, '{print $2}' | sed -e 's/:80/:8087/g'`
  97.  
  98. local proceed=0
  99. for peer in $peers
  100. do
  101. response_code=`curl -H: "Connection: close" --silent -D /dev/stdout http://${peer}/status | head -n 1`
  102. if [[ ${response_code} =~ 200 ]]
  103. then
  104. proceed=1
  105. fi
  106. done
  107.  
  108. if [[ ${proceed} -eq 0 ]]
  109. then
  110. update_db_pre_status $host "No peer nodes in cluster are alive - not doing bonding change."
  111. return 2
  112. else
  113. #echo "OK, at least one other node in our cluster is alive and able to service pump requests."
  114. :
  115. fi
  116. fi
  117.  
  118. #account for p4p1 vs p2p1
  119.  
  120. p_prefix=`${SSH} ${host} cat /proc/net/bonding/bond0 | grep "Slave Interface" | head -n 1 | awk '{print $NF}' | cut -c 1-2`
  121.  
  122. #check for link on slave NICs
  123.  
  124. slave1_link=`${SSH} ${host} "/sbin/ethtool ${p_prefix}p1" | grep "Link detected" | awk '{print $NF}'`
  125. slave2_link=`${SSH} ${host} "/sbin/ethtool ${p_prefix}p2" | grep "Link detected" | awk '{print $NF}'`
  126. slave1_mac=`${SSH} ${host} "cat /sys/class/net/${p_prefix}p1/address"`
  127. slave2_mac=`${SSH} ${host} "cat /sys/class/net/${p_prefix}p2/address"`
  128.  
  129.  
  130. echo "${fqdn} - current server side link state: ${p_prefix}p1 (${slave1_mac}) - ${slave1_link}, ${p_prefix}p2 (${slave2_mac}) - ${slave2_link}"
  131.  
  132. if [[ ${extensive} -eq 1 ]]
  133. then
  134. if [[ ${slave1_link} != 'yes' ]] || [[ ${slave2_link} != 'yes' ]]
  135. then
  136. update_db_pre_status $host "We do not have link on both interfaces, not proceeding."
  137. return 2
  138. fi
  139. fi
  140.  
  141. #check the /proc file
  142. up_count=`${SSH} ${host} "cat /proc/net/bonding/bond0" | grep -c "Slave Interface"`
  143.  
  144. if [[ ${up_count} -eq 2 ]]
  145. then
  146. update_db_post_status $host "already converted to a bonded configuration."
  147. exit 1
  148. else
  149. update_db_post_status $host "not yet converted to a bonded configuration."
  150. fi
  151.  
  152. #if all previous checks are good, then return back a 0
  153. return 0
  154. }
  155.  
  156. function postcheck() {
  157.  
  158. # $1 hostname
  159. # $2 extensive
  160. local host=${1}
  161.  
  162. #ssh check
  163. local sshret=`${SSH} ${host} uname -m `
  164.  
  165. if [[ ! ${sshret} =~ x86_64 ]]
  166. then
  167. update_db_post_status $host "Failed ssh post-check."
  168. return 2
  169. fi
  170.  
  171. #purge some content to refetch again
  172. ${SSH} ${host} "tail -n 100 /opt/trafficserver/var/log/trafficserver/custom_ats_2.log | egrep \"bytes.*pssc=200\" | awk -F\"url=\" '{print \$2}' | awk '{print \$1}' > /dev/shm/bonding_urls; perl -pi -e \"s#http://quika.*comcast.net:80/\d+/(.*)#http://localhost:8088/\1#g\" /dev/shm/bonding_urls; cat /dev/shm/bonding_urls | while read url; do curl -X PURGE --silent \$url; done "
  173.  
  174. preslave1=`${SSH} ${host} cat /sys/devices/virtual/net/bond0/slave_${p_prefix}p1/statistics/rx_bytes 2> /dev/null`
  175. preslave2=`${SSH} ${host} cat /sys/devices/virtual/net/bond0/slave_${p_prefix}p2/statistics/rx_bytes 2> /dev/null`
  176.  
  177. if [[ -z ${preslave1} ]] || [[ -z ${preslave2} ]]
  178. then
  179. echo "${fqdn} - Uh-oh, looks like node is not bonded correctly, exiting."
  180. echo "${fqdn} - please check state of /proc/net/bonding/bond0"
  181. exit 1
  182. fi
  183.  
  184. ${SSH} ${host} "cat /dev/shm/bonding_urls | while read url; do curl -o /dev/null --silent \$url ; done"
  185.  
  186. postslave1=`${SSH} ${host} cat /sys/devices/virtual/net/bond0/slave_${p_prefix}p1/statistics/rx_bytes`
  187. postslave2=`${SSH} ${host} cat /sys/devices/virtual/net/bond0/slave_${p_prefix}p2/statistics/rx_bytes`
  188.  
  189. deltaslave1=$((postslave1 - preslave1))
  190. deltaslave2=$((postslave2 - preslave2))
  191.  
  192. echo "We just purged and refetched some content...here is count of bytes pulled by each interface."
  193. echo
  194. echo "${p_prefix}p1 : ${deltaslave1}"
  195. echo "${p_prefix}p2 : ${deltaslave2}"
  196. echo
  197. echo "These values should be both greater than 0."
  198.  
  199. echo
  200. #get ethtool statistics
  201. echo "${p_prefix}p1 stats"
  202. ${SSH} ${host} "ethtool -S ${p_prefix}p1 | egrep '[rt]x_(errors|dropped|packets|bytes):' | sort"
  203. echo
  204. echo "${p_prefix}p2 stats"
  205. ${SSH} ${host} "ethtool -S ${p_prefix}p2 | egrep '[rt]x_(errors|dropped|packets|bytes):' | sort"
  206. echo
  207.  
  208. #check the /proc file
  209. up_count=`${SSH} ${host} "cat /proc/net/bonding/bond0" | grep -c "MII Status: up"`
  210.  
  211. if [[ ${up_count} -ne 3 ]]
  212. then
  213. update_db_post_status $host "Please check the state of /proc/net/bonding/bond0 on ${host} - we should see 3 interfaces up but I am not seeing that."
  214. fi
  215.  
  216. #reenable checks
  217.  
  218. #${SCRIPTPATH}/nagios_enable_checks.sh -h ${fqdn}
  219.  
  220. }
  221.  
  222. function cutover() {
  223.  
  224. local host=$1
  225.  
  226. precheck ${host} 1
  227.  
  228. if [[ $? -ne 0 ]]
  229. then
  230. update_db_post_status $host "Found some problems with precheck - not proceeding."
  231. exit 2
  232. else
  233. echo
  234. echo "Starting cutover process for $host at `date`."
  235. fi
  236. echo
  237.  
  238. #echo "Putting node into downtime so hopefully CNOC does not page out....fat chance, I know."
  239. #echo
  240. #${SCRIPTPATH}/nagios_disable_checks.sh -h ${fqdn}
  241. #echo
  242.  
  243. echo "DRAC IP: https://${dracip}/"
  244. echo
  245.  
  246. #check the /proc file
  247. up_count=`${SSH} ${host} "cat /proc/net/bonding/bond0" | grep -c "Slave Interface"`
  248.  
  249. if [[ ${up_count} -eq 2 ]]
  250. then
  251. update_db_post_status $host "already converted to a bonded configuration, exiting."
  252. exit 1
  253. fi
  254.  
  255.  
  256. echo "Starting bonding change for $host...."
  257.  
  258. ${p_prefix}p2_mac=`${SSH} ${host} "cat /sys/class/net/${p_prefix}p2/address"`
  259. cat > /dev/shm/teak_bonding.${host}.${RANDOMNESS} << EOF
  260. DEVICE="${p_prefix}p2"
  261. HWADDR="${slave2_mac}"
  262. ONBOOT="yes"
  263. SLAVE="yes"
  264. MASTER="bond0"
  265. EOF
  266.  
  267. ${SCP} /dev/shm/teak_bonding.${host}.${RANDOMNESS} ${host}:/etc/sysconfig/network-scripts/ifcfg-${p_prefix}p2
  268. ${SSH} ${host} "/sbin/service network stop ; modprobe -r ixgbe; modprobe ixgbe ; sleep 5; /sbin/service network restart"
  269. echo
  270.  
  271. rm -f /dev/shm/teak_bonding.${host}.${RANDOMNESS}
  272. echo "Cutover complete...now attempting to validate."
  273. echo
  274.  
  275. postcheck ${host}
  276. }
  277.  
  278.  
  279. extensive=0
  280. action=null
  281. host=null
  282.  
  283. if [[ ${EUID} -ne 0 ]]
  284. then
  285. echo "You must be root or use sudo for this script."
  286. exit 1
  287. fi
  288.  
  289. while getopts "h:pPc" opt
  290. do
  291. case $opt in
  292. h)
  293. host=$OPTARG
  294. ;;
  295. p)
  296. action=pre
  297. ;;
  298. P)
  299. action=post
  300. ;;
  301. c)
  302. action=cutover
  303. ;;
  304. *)
  305. usage
  306. ;;
  307. esac
  308. done
  309.  
  310. if [[ ${host} = null ]] || [[ ${action} = null ]]
  311. then
  312. usage
  313. fi
  314.  
  315. #get our FQDN
  316. fqdn_count=`grep -w ${host} /etc/hosts | awk '{print $2}' | wc -l `
  317.  
  318. if [[ ${fqdn_count} -ne 1 ]]
  319. then
  320. echo "Unable to match $host from /etc/hosts file - please check the host."
  321. echo "We need to only match one node so please be more specific."
  322. exit 1
  323. else
  324. fqdn=`grep -w ${host} /etc/hosts | awk '{print $2}'`
  325.  
  326. if [[ -z ${fqdn} ]]
  327. then
  328. update_db_pre_status $host "Unable to determine FQDN."
  329. exit 1
  330. fi
  331. myip=`grep -w ${host} /etc/hosts | awk '{print $1}'`
  332.  
  333. if [[ -z ${myip} ]]
  334. then
  335. update_db_pre_status $host "Unable to determine IP address."
  336. exit 1
  337. fi
  338. dracip=`${MSQ} "select terminal_server from hosts where hostname=\"${fqdn}\""`
  339.  
  340. if [[ -z ${dracip} ]]
  341. then
  342. update_db_pre_status $host "Unable to determine DRAC IP address."
  343. exit 1
  344. fi
  345.  
  346. host=${fqdn}
  347.  
  348. fi
  349.  
  350. if [[ ${action} = pre ]]
  351. then
  352. precheck ${host} ${extensive}
  353.  
  354. if [[ $? -ne 0 ]]
  355. then
  356. echo "Pre-check failed for $host."
  357. exit 1
  358. fi
  359. fi
  360.  
  361. if [[ ${action} = cutover ]]
  362. then
  363. cutover ${host}
  364. fi
  365.  
  366. if [[ ${action} = post ]]
  367. then
  368. postcheck ${host}
  369. fi
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement