Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/local/bin/bash
- # spinpid.sh version 2017-01-01. Run as superuser. See notes at end.
- ##############################################
- #
- # Settings
- #
- ##############################################
- IPMITOOL="/usr/local/bin/ipmitool -H <IP> -U <U> -P <U>"
- # Drive Settings:
- SP=35.0 # Setpoint mean temperature
- # Time interval for checking drives in minutes.
- T=2
- Kp=4 # Proportional tunable constant
- Ki=0 # Integral tunable constant
- Kd=40 # Derivative tunable constant
- PID=0
- # Fan minimum duty cycle (%) (to avoid stalling)
- FAN_MIN=50
- LOG=/root/spinpid.log
- ##############################################
- # function get_disk_name
- # Get disk name from current LINE of DEVLIST
- ##############################################
- # The awk statement works by taking $LINE as input,
- # setting '(' as a _F_ield separator and taking the second field it separates
- # (ie after the separator), passing that to another awk that uses
- # ',' as a separator, and taking the first field (ie before the separator).
- # In other words, everything between '(' and ',' is kept.
- # camcontrol output for disks on HBA seems to reverse every version,
- # so need 2 options to get ada/da disk name.
- function get_disk_name {
- if [[ $LINE == *",d"* ]] ; then # for (pass#,da#) (HBA disks sometimes)
- DEVID=$(echo $LINE | awk -F ',' '{print $2}' | awk -F ')' '{print$1}')
- else # for (ada#,pass#) (motherboard disks)
- DEVID=$(echo $LINE | awk -F '(' '{print $2}' | awk -F ',' '{print$1}')
- fi
- }
- ############################################################
- # function print_header
- # Called when script starts and each quarter day
- ############################################################
- function print_header {
- DATE=$(date +"%A, %b %d")
- let "SPACES = DEVCOUNT * 5 + 70" # 5 spaces per drive
- printf "\n%-*s %-8s %s \n" $SPACES "$DATE" "Fan %" "Interim CPU"
- echo -n " "
- while read LINE ; do
- get_disk_name
- printf "%-5s" $DEVID
- done <<< "$DEVLIST" # while statement works on DEVLIST
- printf "%4s %5s %5s %6s %5s %6s %3s %s %4s %-7s %s %s" "Tmax" "Tmean" "ERRc" "P" "I" "D" "CPU" "Driver" "RPM" "MODE" "Curr/New" "Adjustments"
- }
- #################################################
- # function ipmi_read_duty
- #################################################
- function ipmi_read_duty {
- DUTY_CURR=$($IPMITOOL raw 0x30 0x70 0x66 0x00 $1)
- }
- #################################################
- # function ipmi_set_duty
- #################################################
- function ipmi_set_duty {
- # Set new duty cycle. "echo -n ``" prevents newline generated in log
- echo -n `$IPMITOOL raw 0x30 0x70 0x66 1 $1 $2`
- }
- #################################################
- # function drive_data: Read, process, print data
- #################################################
- function drive_data {
- Tmean=$(echo "scale=3; $Tsum / $i" | bc)
- ERRp=$ERRc
- ERRc=$(echo "scale=2; $Tmean - $SP" | bc)
- ERR=$(echo "scale=2; $ERRc * $T + $I" | bc)
- P=$(echo "scale=2; $Kp * $ERRc" | bc)
- I=$(echo "scale=2; $Ki * $ERR" | bc)
- D=$(echo "scale=2; $Kd * ($ERRc - $ERRp) / $T" | bc)
- PID=$(echo "scale=2; $P + $I + $D" | bc) # add 3 corrections
- PID=$(printf %0.f $PID) # round
- # Read duty cycle, convert to decimal.
- # May need to disable these 3 lines as some boards apparently return
- # incorrect data. In that case just assume $DUTY hasn't changed.
- ipmi_read_duty 0x01 # in hex
- DUTY_CURR=$(printf "0x%s" $DUTY_CURR) # add Ox in front
- DUTY_CURR=`echo $(($DUTY_CURR))` # convert to decimal
- # Read fan mode, convert to decimal.
- MODE=$($IPMITOOL raw 0x30 0x45 0) # in hex
- MODE=$(printf "0x%s" $MODE) # add Ox in front
- MODE=`echo $(($MODE))` # convert to decimal
- # Text for mode
- case $MODE in
- 0) MODEt="Standard" ;;
- 4) MODEt="HeavyIO" ;;
- 2) MODEt="Optimal" ;;
- 1) MODEt="Full" ;;
- esac
- # Get reported fan speed in RPM.
- # Takes the line with FAN1, then 2nd through the 5th
- # digit if there are that many.
- RPM=$($IPMITOOL sdr | grep "FANA" | grep -Eo '[0-9]{2,5}')
- # print current Tmax, Tmean, CPU 0 temp, fan speed, mode, and duty and CPU 0 temperature
- printf "^%-3d %5.2f" $Tmax $Tmean
- }
- ##############################################
- # function DRIVES_check_adjust
- # Print time on new log line.
- # Go through each drive, getting and printing
- # status and temp. Calculate sum and max
- # temp, then call function drive_data.
- # Apply max of $PID and CPU_CORR to the fans.
- ##############################################
- function DRIVES_check_adjust {
- echo # start new line
- # print time on each line
- TIME=$(date "+%H:%M:%S"); echo -n "$TIME "
- Tmax=0; Tsum=0 # initialize drive temps for new loop through drives
- i=0 # count number of spinning drives
- while read LINE ; do
- get_disk_name
- TEMP=$(/usr/local/sbin/smartctl -a -n standby "/dev/$DEVID" | grep "Temperature_Celsius" | /usr/local/bin/pcregrep -o1 '([0-9]*)( \(.*\))?$')
- /usr/local/sbin/smartctl -n standby "/dev/$DEVID" > /var/tempfile
- RETURN=$? # need to preserve because $? changes with each 'if'
- if [[ $RETURN == "0" ]] ; then
- STATE="*" # spinning
- elif [[ $RETURN == "2" ]] ; then
- STATE="_" # standby
- else
- STATE="?" # state unknown
- fi
- printf "%s%-2d " "$STATE" $TEMP
- # Update temperatures each drive; spinners only
- if [ "$STATE" == "*" ] ; then
- let "Tsum += $TEMP"
- if [[ $TEMP > $Tmax ]]; then Tmax=$TEMP; fi;
- let "i += 1"
- fi
- done <<< "$DEVLIST"
- drive_data # manage data
- let "DUTY_DRIVE = $DUTY_CURR + $PID"
- DRIVER="Drives"
- MAX=$DUTY_DRIVE
- adjust_fans $MAX # passing higher duty to the function adjust_fans
- }
- ##############################################
- # function adjust_fans
- # Add correction to current duty,
- # set duty, print diagnostic data
- ##############################################
- function adjust_fans {
- # Reset BMC if fans seem stuck: cool and >80% OR warm and <30%
- # if [[ $Tmean<$(($SP - 1)) && $DUTY>0x50 ]] || [[ $Tmean>$(($SP + 5)) && $DUTY<0x1E ]]; then
- # $IPMITOOL bmc reset warm; fi
- # $1 is the new duty
- # passed to this function when called
- DUTY_NEW=$1
- # Don't allow duty cycle beyond 20/95%
- if [[ $DUTY_NEW -gt 95 ]]; then DUTY_NEW=95; fi
- if [[ $DUTY_NEW -lt $FAN_MIN ]]; then DUTY_NEW=$FAN_MIN; fi
- # Change if different from current duty
- if [[ $DUTY_NEW -ne $DUTY_CURR ]]; then
- DUTYhex=$( printf "0x%x" $DUTY_NEW ) # hexify
- ipmi_set_duty 0x01 $DUTYhex
- fi
- }
- #####################################################
- # All this happens only at the beginning
- # Initializing values, list of drives, print header
- #####################################################
- sleep 180
- DRIVE_T=$( echo "$T * 60" | bc )
- I=0; ERRc=0 # Initialize errors to 0
- # Creates logfile and sends all stdout and stderr to the log, as well as to the console.
- # If you want to append to existing log, add '-a' to the tee command.
- exec > >(tee -i $LOG) 2>&1
- # Get list of drives
- DEVLIST1=$(/sbin/camcontrol devlist)
- # Remove lines with flash drives or SSD; edit as needed
- # You could use another strategy, e.g., find something in the camcontrol devlist
- # output that is unique to the drives you want, for instance only WDC drives:
- # if [[ $LINE != *"WDC"* ]] . . .
- DEVLIST="$(echo "$DEVLIST1"|sed '/Virtual disk/d')"
- DEVCOUNT=$(echo "$DEVLIST" | wc -l)
- # Set mode to 'Full' to avoid BMC changing duty cycle
- # Need to wait a tick or it doesn't get 2nd command
- # "echo -n ``" to avoid annoying newline generated in log
- ### Not using 'Full'
- ### echo -n `$IPMITOOL raw 0x30 0x45 1 1`; sleep 1
- # Then start with 50% duty cycle and let algorithm adjust from there
- DUTY_NEW=50
- DUTY_DRIVE=50
- DUTYhex=$( printf "0x%x" $DUTY_NEW )
- ipmi_set_duty 0x01 $DUTYhex
- sleep 3 # let fans respond
- printf "\nDrive states: * spinning; _ standby; ? unknown\n"
- print_header
- ############################################
- # Main: Loop through drives every T minutes
- ############################################
- while [ 1 ] ; do
- # Print header every quarter day. Expression removes any
- # leading 0 so it is not seen as octal
- HM=$(date +%k%M); HM=`expr $HM + 0`
- R=$(( HM % 600 )) # remainder after dividing by 6 hours
- if (( $R < $T )); then
- print_header;
- fi
- DRIVES_check_adjust
- printf "%6.2f %6.2f %5.2f %6.2f %3d %-6s %4d %-7s %2d/%-6d" $ERRc $P $I $D -1 $DRIVER $RPM $MODEt $DUTY_CURR $DUTY_NEW
- sleep $DRIVE_T
- done
- # Adjusts fans based on drive or CPU temperatures, whichever
- # needs more cooling. Max temp among drives is maintained at a setpoint
- # using a PID algorithm. CPU temp regulation uses just core 0
- # (they all stay within a few degrees of each other). CPU temp
- # need not and cannot be maintained at a setpoint, so PID is not
- # used; instead fan duty cycle is simply increased with temp.
- # Drives are checked and fans adjusted on a set interval, such as 6 minutes.
- # Logging is done at that point. CPU temps can spike much faster,
- # so are checked at a shorter interval, such as 30 seconds. Those
- # adjustments are not logged.
- # Logs:
- # - disk status (spinning or standby)
- # - disk temperature (Celsius) if spinning
- # - max and mean disk temperature
- # - CPU 0 temperature
- # - fan rpm and mode
- # - current and new fan duty cycle
- # - PID variables
- # - adjustments to fan duty cycle due to interim CPU loops
- # Includes disks on motherboard and on HBA.
- # Relation between percent duty cycle, hex value of that number,
- # and RPMs for my fans. RPM will vary among fans, is not
- # precisely related to duty cycle, and does not matter to the script.
- # It is merely reported.
- #
- # Percent Hex RPM
- # 10 A 300
- # 20 14 400
- # 30 1E 500
- # 40 28 600/700
- # 50 32 800
- # 60 3C 900
- # 70 46 1000/1100
- # 80 50 1100/1200
- # 90 5A 1200/1300
- # 100 64 1300
- # Some boards apparently report incorrect duty cycle.
- # If that is happening, disable lines 86-88 in function drive_data.
- # Then the script will assume the duty cycle is the
- # same as it was last set.
- # Tuning suggestions
- # PID tuning advice on the internet generally does not work well in this application.
- # First run the script spincheck.sh and get familiar with your temperature and fan variations without any intervention.
- # Choose a setpoint that is an actual observed Tmean, given the number of drives you have. It should be the Tmean associated with the Tmax that you want.
- # Set Ki=0 and leave it there. You probably will never need it.
- # Start with Kp low. Use a value that results in a rounded correction=1 when error is the lowest value you observe other than 0 (i.e., when ERRc is minimal, Kp ~= 1 / ERRc)
- # Set Kd at about Kp*10
- # Get Tmean within ~0.3 degree of SP before starting script.
- # Start script and run for a few hours or so. If Tmean oscillates (best to graph it), you probably need to reduce Kd. If no oscillation but response is too slow, raise Kd.
- # Stop script and get Tmean at least 1 C off SP. Restart. If there is overshoot and it goes through some cycles, you may need to reduce Kd.
- # If you have problems, examine PK and PD in the log and see which is messing you up. If all else fails you can try Ki. If you use Ki, make it small, ~ 0.1 or less.
- # Uses joeschmuck's smartctl method for drive status (returns 0 if spinning, 2 in standby)
- # https://forums.freenas.org/index.php?threads/how-to-find-out-if-a-drive-is-spinning-down-properly.2068/#post-28451
- # Other method (camcontrol cmd -a) doesn't work with HBA
- # Removed from drive_data. Though it was working
- # it doesn't seem right to hexify PID ?????
- # PID=$( printf "0x%x" $PID ) # fully hexify with '0x' in front
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement