Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/bash
- #set -xv
- #
- SSH="sudo ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o ConnectTimeout=5 -o PreferredAuthentications=publickey -o PasswordAuthentication=no -q"
- SCP="sudo scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o ConnectTimeout=5 -o PreferredAuthentications=publickey -o PasswordAuthentication=no -q"
- MSQ="mysql -h ccdndb.sys.comcast.net --skip-column-names --batch --protocol=TCP --port=3306 --user=ccdn_tools --password=ccdn_tools --database=ccdn -e"
- SCRIPTNAME=`basename $0`
- SCRIPTPATH=/usr/local/ccdn/teak_bonding
- RANDOMNESS=`uuidgen -r`
- function usage() {
- cat << EOF
- This script is used to facilitate the addition of a second 10G interface to a Teak node
- Options:
- -p : initiate a pre-check against the node
- -P : initiate a post-check against the node
- -c : cutover the node - the node will be converted to a dual 10G setup
- -h : the node against which we perform any action
- EOF
- exit 1
- }
- function update_db_pre_status() {
- # $1 hostname
- # $2 message
- echo "$1 - $2"
- ${MSQ} "update teak_pre_load set onecache_prenotes=\"${2}\" where hostname=\"${1}\";"
- }
- function update_db_post_status() {
- # $1 hostname
- # $2 message
- echo "$1 - $2"
- ${MSQ} "update teak_pre_load set onecache_postnotes=\"${2}\" where hostname=\"${1}\";"
- }
- function precheck() {
- # $1 hostname
- # $2 extensive
- local host=${1}
- local extensive=$2
- echo "##########"
- echo "Performing check on ${fqdn} with DRAC of https://${dracip}"
- echo
- #ssh check
- local sshret=`${SSH} ${host} uname -m `
- if [[ ! ${sshret} =~ x86_64 ]]
- then
- update_db_pre_status $host "${fqdn} does not appear to be up right now - we could not log to it."
- return 2
- else
- #echo "OK, host is up and responding to ssh."
- :
- fi
- # drac ssh check
- nc -w 3 -z ${dracip} 443 > /dev/null 2>&1
- if [[ $? -ne 0 ]]
- then
- update_db_pre_status $host "DRAC (${dracip}) does not appear to be responding for ${fqdn}."
- return 2
- else
- #echo "OK, we can ssh into the DRAC (${dracip})."
- :
- fi
- #still streaming check
- local streamcount=`${SSH} $host "netstat -na | egrep -v 'LISTEN|TIME_WAIT|CLOSE_WAIT' | egrep ${myip}':80[^0-9]' | wc -l"`
- echo "${fqdn} - stream count : ${streamcount}"
- if [[ ${extensive} -eq 1 ]] && [[ ${streamcount} -gt 0 ]]
- then
- update_db_pre_status $host "Not performing bonding change - we still have ${streamcount} streams active on ${host}."
- return 2
- fi
- #check other nodes are up
- if [[ ${extensive} -eq 1 ]]
- then
- declare -a peers
- peers=`${SSH} ${host} "grep node= /opt/teak/etc/teak.config" | grep -v ${myip} | awk -F, '{print $2}' | sed -e 's/:80/:8087/g'`
- local proceed=0
- for peer in $peers
- do
- response_code=`curl -H: "Connection: close" --silent -D /dev/stdout http://${peer}/status | head -n 1`
- if [[ ${response_code} =~ 200 ]]
- then
- proceed=1
- fi
- done
- if [[ ${proceed} -eq 0 ]]
- then
- update_db_pre_status $host "No peer nodes in cluster are alive - not doing bonding change."
- return 2
- else
- #echo "OK, at least one other node in our cluster is alive and able to service pump requests."
- :
- fi
- fi
- #account for p4p1 vs p2p1
- p_prefix=`${SSH} ${host} cat /proc/net/bonding/bond0 | grep "Slave Interface" | head -n 1 | awk '{print $NF}' | cut -c 1-2`
- #check for link on slave NICs
- slave1_link=`${SSH} ${host} "/sbin/ethtool ${p_prefix}p1" | grep "Link detected" | awk '{print $NF}'`
- slave2_link=`${SSH} ${host} "/sbin/ethtool ${p_prefix}p2" | grep "Link detected" | awk '{print $NF}'`
- slave1_mac=`${SSH} ${host} "cat /sys/class/net/${p_prefix}p1/address"`
- slave2_mac=`${SSH} ${host} "cat /sys/class/net/${p_prefix}p2/address"`
- echo "${fqdn} - current server side link state: ${p_prefix}p1 (${slave1_mac}) - ${slave1_link}, ${p_prefix}p2 (${slave2_mac}) - ${slave2_link}"
- if [[ ${extensive} -eq 1 ]]
- then
- if [[ ${slave1_link} != 'yes' ]] || [[ ${slave2_link} != 'yes' ]]
- then
- update_db_pre_status $host "We do not have link on both interfaces, not proceeding."
- return 2
- fi
- fi
- #check the /proc file
- up_count=`${SSH} ${host} "cat /proc/net/bonding/bond0" | grep -c "Slave Interface"`
- if [[ ${up_count} -eq 2 ]]
- then
- update_db_post_status $host "already converted to a bonded configuration."
- exit 1
- else
- update_db_post_status $host "not yet converted to a bonded configuration."
- fi
- #if all previous checks are good, then return back a 0
- return 0
- }
- function postcheck() {
- # $1 hostname
- # $2 extensive
- local host=${1}
- #ssh check
- local sshret=`${SSH} ${host} uname -m `
- if [[ ! ${sshret} =~ x86_64 ]]
- then
- update_db_post_status $host "Failed ssh post-check."
- return 2
- fi
- #purge some content to refetch again
- ${SSH} ${host} "tail -n 100 /opt/trafficserver/var/log/trafficserver/custom_ats_2.log | egrep \"bytes.*pssc=200\" | awk -F\"url=\" '{print \$2}' | awk '{print \$1}' > /dev/shm/bonding_urls; perl -pi -e \"s#http://quika.*comcast.net:80/\d+/(.*)#http://localhost:8088/\1#g\" /dev/shm/bonding_urls; cat /dev/shm/bonding_urls | while read url; do curl -X PURGE --silent \$url; done "
- preslave1=`${SSH} ${host} cat /sys/devices/virtual/net/bond0/slave_${p_prefix}p1/statistics/rx_bytes 2> /dev/null`
- preslave2=`${SSH} ${host} cat /sys/devices/virtual/net/bond0/slave_${p_prefix}p2/statistics/rx_bytes 2> /dev/null`
- if [[ -z ${preslave1} ]] || [[ -z ${preslave2} ]]
- then
- echo "${fqdn} - Uh-oh, looks like node is not bonded correctly, exiting."
- echo "${fqdn} - please check state of /proc/net/bonding/bond0"
- exit 1
- fi
- ${SSH} ${host} "cat /dev/shm/bonding_urls | while read url; do curl -o /dev/null --silent \$url ; done"
- postslave1=`${SSH} ${host} cat /sys/devices/virtual/net/bond0/slave_${p_prefix}p1/statistics/rx_bytes`
- postslave2=`${SSH} ${host} cat /sys/devices/virtual/net/bond0/slave_${p_prefix}p2/statistics/rx_bytes`
- deltaslave1=$((postslave1 - preslave1))
- deltaslave2=$((postslave2 - preslave2))
- echo "We just purged and refetched some content...here is count of bytes pulled by each interface."
- echo
- echo "${p_prefix}p1 : ${deltaslave1}"
- echo "${p_prefix}p2 : ${deltaslave2}"
- echo
- echo "These values should be both greater than 0."
- echo
- #get ethtool statistics
- echo "${p_prefix}p1 stats"
- ${SSH} ${host} "ethtool -S ${p_prefix}p1 | egrep '[rt]x_(errors|dropped|packets|bytes):' | sort"
- echo
- echo "${p_prefix}p2 stats"
- ${SSH} ${host} "ethtool -S ${p_prefix}p2 | egrep '[rt]x_(errors|dropped|packets|bytes):' | sort"
- echo
- #check the /proc file
- up_count=`${SSH} ${host} "cat /proc/net/bonding/bond0" | grep -c "MII Status: up"`
- if [[ ${up_count} -ne 3 ]]
- then
- update_db_post_status $host "Please check the state of /proc/net/bonding/bond0 on ${host} - we should see 3 interfaces up but I am not seeing that."
- fi
- #reenable checks
- #${SCRIPTPATH}/nagios_enable_checks.sh -h ${fqdn}
- }
- function cutover() {
- local host=$1
- precheck ${host} 1
- if [[ $? -ne 0 ]]
- then
- update_db_post_status $host "Found some problems with precheck - not proceeding."
- exit 2
- else
- echo
- echo "Starting cutover process for $host at `date`."
- fi
- echo
- #echo "Putting node into downtime so hopefully CNOC does not page out....fat chance, I know."
- #echo
- #${SCRIPTPATH}/nagios_disable_checks.sh -h ${fqdn}
- #echo
- echo "DRAC IP: https://${dracip}/"
- echo
- #check the /proc file
- up_count=`${SSH} ${host} "cat /proc/net/bonding/bond0" | grep -c "Slave Interface"`
- if [[ ${up_count} -eq 2 ]]
- then
- update_db_post_status $host "already converted to a bonded configuration, exiting."
- exit 1
- fi
- echo "Starting bonding change for $host...."
- ${p_prefix}p2_mac=`${SSH} ${host} "cat /sys/class/net/${p_prefix}p2/address"`
- cat > /dev/shm/teak_bonding.${host}.${RANDOMNESS} << EOF
- DEVICE="${p_prefix}p2"
- HWADDR="${slave2_mac}"
- ONBOOT="yes"
- SLAVE="yes"
- MASTER="bond0"
- EOF
- ${SCP} /dev/shm/teak_bonding.${host}.${RANDOMNESS} ${host}:/etc/sysconfig/network-scripts/ifcfg-${p_prefix}p2
- ${SSH} ${host} "/sbin/service network stop ; modprobe -r ixgbe; modprobe ixgbe ; sleep 5; /sbin/service network restart"
- echo
- rm -f /dev/shm/teak_bonding.${host}.${RANDOMNESS}
- echo "Cutover complete...now attempting to validate."
- echo
- postcheck ${host}
- }
- extensive=0
- action=null
- host=null
- if [[ ${EUID} -ne 0 ]]
- then
- echo "You must be root or use sudo for this script."
- exit 1
- fi
- while getopts "h:pPc" opt
- do
- case $opt in
- h)
- host=$OPTARG
- ;;
- p)
- action=pre
- ;;
- P)
- action=post
- ;;
- c)
- action=cutover
- ;;
- *)
- usage
- ;;
- esac
- done
- if [[ ${host} = null ]] || [[ ${action} = null ]]
- then
- usage
- fi
- #get our FQDN
- fqdn_count=`grep -w ${host} /etc/hosts | awk '{print $2}' | wc -l `
- if [[ ${fqdn_count} -ne 1 ]]
- then
- echo "Unable to match $host from /etc/hosts file - please check the host."
- echo "We need to only match one node so please be more specific."
- exit 1
- else
- fqdn=`grep -w ${host} /etc/hosts | awk '{print $2}'`
- if [[ -z ${fqdn} ]]
- then
- update_db_pre_status $host "Unable to determine FQDN."
- exit 1
- fi
- myip=`grep -w ${host} /etc/hosts | awk '{print $1}'`
- if [[ -z ${myip} ]]
- then
- update_db_pre_status $host "Unable to determine IP address."
- exit 1
- fi
- dracip=`${MSQ} "select terminal_server from hosts where hostname=\"${fqdn}\""`
- if [[ -z ${dracip} ]]
- then
- update_db_pre_status $host "Unable to determine DRAC IP address."
- exit 1
- fi
- host=${fqdn}
- fi
- if [[ ${action} = pre ]]
- then
- precheck ${host} ${extensive}
- if [[ $? -ne 0 ]]
- then
- echo "Pre-check failed for $host."
- exit 1
- fi
- fi
- if [[ ${action} = cutover ]]
- then
- cutover ${host}
- fi
- if [[ ${action} = post ]]
- then
- postcheck ${host}
- fi
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement