Advertisement
Guest User

mprime-phc-setup

a guest
May 20th, 2019
1,319
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 11.49 KB | None | 0 0
  1. #!/bin/bash
  2.  
  3. # Find lowest vids for PHC so that mprime doesn't find errors.
  4. # Shouldn't crash the computer, but might.
  5.  
  6. #####################################
  7. # Parameters.
  8.  
  9. # short_test_length should be between 15 and 60 s.
  10. # Use a longer length to avoid crashing during the test.
  11. short_test_length=20
  12.  
  13. # long_test_length should be between 60 and 7200 s or more.
  14. # Bigger values are safer, but increase the test's length.
  15. long_test_length=320
  16.  
  17. # safety_vid_delta should be between 1 and 4. Bigger values are safer.
  18. # Suggestions:
  19. # - use 4 if long_test_length < 60
  20. # - use 3 if long_test_length >= 60 and < 240
  21. # - use 2 if long_test_length >= 240
  22. # - use 1 only if long_test_length >= 3600
  23. safety_vid_delta=2
  24.  
  25. debug=0
  26.  
  27. amd_max_vid=124
  28.  
  29. # Tolerance for inaccurate frequencies during the test (in percent)
  30. # Added because of https://bbs.archlinux.org/viewtopic.php?pid=1301126#p1301126
  31. # NOTE: actually it seems that the problem is different: some CPUs have a bogus 1st
  32. #       frequency that is 1 MHz higher than the real top frequency (the 2nd one).
  33. wrong_freq_tolerance=0
  34.  
  35. #####################################
  36.  
  37. # Check that settings are sane
  38. if (( short_test_length < 15 )); then
  39.     echo "Forcing short_test_length to 15 seconds."
  40.     short_test_length=15
  41. fi
  42. if (( long_test_length < 30 )); then
  43.     echo "Forcing long_test_length to 30 seconds."
  44.     long_test_length=30
  45. fi
  46. if (( safety_vid_delta < 1 )); then
  47.     echo "Forcing safety vid delta to 1."
  48.     safety_vid_delta=1
  49. fi
  50.  
  51. # Need root privileges to change the vids
  52. if [[ `whoami` != root ]]; then
  53.     echo "Run me as root."
  54.     exit 1
  55. fi
  56.  
  57. # Check that mprime is available
  58. which mprime &>/dev/null
  59. if (( $? != 0 )); then
  60.     echo "mprime is not in the path."
  61.     if [[ ! -e ./mprime ]]; then
  62.         echo "No mprime in the current directory either... Aborting."
  63.         exit 1
  64.     fi
  65.     echo "Using mprime from the current directory."
  66.     mp="./mprime -t"
  67. else
  68.     mp="mprime -t"
  69. fi
  70.  
  71. # Check that PHC is active
  72. cpuf=/sys/devices/system/cpu/cpu0/cpufreq
  73. if [[ ! -e $cpuf/phc_default_vids ]]; then
  74.     echo "The PHC module doesn't seem to be loaded."
  75.     exit 1
  76. fi
  77.  
  78. # Check if AMD processor, then need to reverse the search direction
  79. if lsmod | grep phc_k8 >/dev/null ; then
  80.     vid_delta=1
  81.     vid_limit=$amd_max_vid
  82. else
  83.     vid_delta=-1
  84.     vid_limit=0
  85. fi
  86.  
  87. # Warn user about end of the world
  88. echo ""
  89. echo "Warning: this might crash your computer or applications."
  90. echo "Please save all your work and don't do anything while the test is running."
  91. echo "You can stop the test at any time with CTRL-C."
  92. echo "Press RETURN to go on or CTRL-C to cancel."
  93. read
  94.  
  95. function set_sys_val
  96. {
  97.     #echo Writing $2 to $1
  98.     for i in /sys/devices/system/cpu/cpu*/cpufreq/$1; do
  99.         echo "$2" > $i
  100.     done
  101. }
  102.  
  103. function debug_info
  104. {
  105.     for i in /sys/devices/system/cpu/cpu*/cpufreq/{scaling_governor,cpuinfo_cur_freq,scaling_cur_freq,phc_vids}; do
  106.         echo -n $i | sed 's/\/sys\/devices\/system\/cpu\///'
  107.         echo " = $(cat $i)"
  108.     done
  109. }
  110.  
  111. # Store stuff to be able to cleanup later
  112. backup_governor=$(cat $cpuf/scaling_governor)
  113. backup_phc_vids=$(cat $cpuf/phc_vids)
  114.  
  115. #if [[ $backup_governor != userspace ]]; then
  116.     echo "Switching to the userspace governor. $backup_governor will be restored later."
  117.     modprobe cpufreq_userspace
  118.     set_sys_val scaling_governor userspace
  119. #fi
  120.  
  121. # File to save the state in order to continue after a crash
  122. crash_state="/var/tmp/$(basename $0).state"
  123.  
  124. # Log file for mprime
  125. mp_log=/tmp/$(basename $0).mp
  126.  
  127. # process ID of mprime
  128. mp_pid=-1
  129.  
  130. function launch_mprime
  131. {
  132.     $mp &>$mp_log &
  133.     mp_pid=$!
  134. }
  135.  
  136. function kill_mprime
  137. {
  138.     (( mp_pid <= 1 )) && return
  139.     kill -9 $mp_pid
  140.     wait $mp_pid &>/dev/null # needed to suppress the "killed" message by bash
  141.     mp_pid=-1
  142. }
  143.  
  144. function cleanup
  145. {
  146.     echo ""
  147.     echo "Restoring state..."
  148.  
  149.     kill_mprime
  150.  
  151.     # Restore vids
  152.     set_sys_val phc_vids "$backup_phc_vids"
  153.  
  154.     # Restore governor
  155.     set_sys_val scaling_governor "$backup_governor"
  156.  
  157.     # Delete log
  158.     [[ -e $mp_log ]] && rm $mp_log
  159.  
  160.     # Delete state file
  161.     [[ -e "$crash_state" ]] && rm "$crash_state"
  162. }
  163.  
  164. # Restore original state whenever the script exits
  165. trap cleanup EXIT
  166.  
  167. # List all vids and frequencies
  168. freqs=$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies)
  169. default_vids=$(cat /sys/devices/system/cpu/cpu0/cpufreq/phc_default_vids)
  170.  
  171. nb_freqs=0
  172. for f in $freqs; do
  173.     #echo $nb_freqs - $f
  174.     freq[nb_freqs]=$f
  175.     ((nb_freqs++))
  176. done
  177.  
  178. nb_vids=0
  179. for v in $default_vids; do
  180.     #echo $nb_vids - $v
  181.     vid[nb_vids]=$v
  182.     ((nb_vids++))
  183. done
  184.  
  185. if [[ $nb_freqs != $nb_vids ]]; then
  186.     echo "Error: number of vids and number of frequencies differ!"
  187.     exit 1
  188. fi
  189.  
  190. # Check that writing to scaling_max_freq works (I had this problem)
  191. #set_sys_val scaling_max_freq ${freq[1]}
  192. #if [[ $backup_scaling_max_freq == $(cat $cpuf/scaling_max_freq) ]]; then
  193. #   echo "Error: cannot write to scaling_max_freq!"
  194. #   echo "Try updating your kernel, rebooting and/or reinstalling PHC."
  195. #   exit 1
  196. #fi
  197.  
  198. # Estimate length of test
  199. estimate_min=$((short_test_length * (${vid[0]} - 2) + long_test_length))
  200. estimate_max=$((short_test_length * (${vid[0]} - 2) + nb_freqs * long_test_length * 3 / 2))
  201.  
  202. function print_time
  203. {
  204.     # input: $1 = number of seconds
  205.     # output: xx h yy min
  206.     local seconds=$1
  207.     local days=$((seconds/3600/24))
  208.     local seconds=$((seconds-days*3600*24))
  209.     local hours=$((seconds/3600))
  210.     seconds=$((seconds-hours*3600))
  211.     local minutes=$((seconds/60))
  212.     local r
  213.     ((days>0)) && r="$days d "
  214.     ((hours>0 || days>0)) && r="$r$hours h "
  215.     ((days==0)) && r="$r$minutes min"
  216.     echo -n $r
  217. }
  218.  
  219. echo -n "Estimated time to completion: between "
  220. print_time estimate_min
  221. echo -n " and "
  222. print_time estimate_max
  223. echo ""
  224.  
  225.  
  226. # For each available frequency, try to lower the vid as much as possible
  227.  
  228. # 1st pass: Lower the vid, test mprime for a small amount of time at each step.
  229. #           If an error is detected, increment cur_vid and continue with pass 2.
  230. #           If vid 0 is reached, continue with pass 2.
  231. # 2nd pass: Test cur_vid for a long time.
  232. #           If there is an error, increment cur_vid and loop.
  233. #           Stop when there is no error or cur_vid >= max_vid-delta.
  234. # Final step: best_vid=cur_vid+delta
  235.  
  236. # set_vid index vid
  237. function set_vid
  238. {
  239.     # Generate phc_vids string
  240.     local v=""
  241.     local i
  242.     for (( i=0; i<nb_freqs; i++ )); do
  243.         if (( $i == $1 )); then
  244.             v="$v$2"
  245.         else
  246.             v="$v${vid[i]}"
  247.         fi
  248.         (( i < nb_freqs-1 )) && v="$v "
  249.     done
  250.     set_sys_val phc_vids "$v"
  251. }
  252.  
  253. # Save progress in order to be able to continue after a crash
  254. function save_state
  255. {
  256.     # Restore default vids before writing to disk to avoid a possible crash
  257.     set_sys_val phc_vids "$default_vids"
  258.  
  259.     # state file contains:
  260.     # - current frequency between 0 and nb_freqs - 1
  261.     # - current VID
  262.     # - current list of best VIDS found
  263.     echo $f >"$crash_state"
  264.     echo $cur_vid >>"$crash_state"
  265.     echo "$final_vids" >>"$crash_state"
  266.     sync
  267.     sleep 1
  268. }
  269.  
  270. function read_state
  271. {
  272.     f=$(head -1 "$crash_state")
  273.     cur_vid=$(sed -n 2p "$crash_state")
  274.     final_vids="$(tail -1 "$crash_state")"
  275.     #echo f=$f
  276.     #echo cur_vid=$cur_vid
  277.     #echo final_vids="-"$final_vids"-"
  278. }
  279.  
  280. # Check if state file exists, which probably means the computer crashed
  281. if [[ -e "$crash_state" ]]; then
  282.     read_state
  283.     echo
  284.     echo "State file $crash_state detected."
  285.     echo "Frequency: $f"
  286.     echo "Current VID: $cur_vid"
  287.     echo "Current best vids: $final_vids"
  288.     echo "Press return to continue from this point after a crash, or CTRL-C to delete the state file."
  289.     read
  290.  
  291.     # There was a crash at $cur_vid, so increase it and go directly to pass 2
  292.     cur_vid=$((cur_vid-2*vid_delta))
  293.     pass2=1
  294. else
  295.     cur_vid=${vid[0]}
  296.     f=0
  297.     pass2=0
  298. fi
  299.  
  300. for (( ; f<nb_freqs; f++ )); do
  301.     echo ""
  302.     echo "Testing frequency $f (${freq[f]})..."
  303.     echo "Default vid: ${vid[f]}"
  304.  
  305.     # Based on comments found on this page:
  306.     # http://openmindedbrain.info/09/05/2010/undervolting-in-ubuntu-10-04-lucid-lts/
  307.     # it appears that some processors have a bogus 1st frequency that will always go
  308.     # down to VID 0 and is in fact unused.
  309.  
  310.     if (( f == 0 && ${freq[0]} == ${freq[1]} + 1000 )); then
  311.         echo "Looks like the first frequency is bogus; ignoring it."
  312.         final_vids="${vid[0]} "
  313.         continue
  314.     fi
  315.  
  316.     # Pass 1: lowering vid quickly until there is an error
  317.  
  318.     if (( cur_vid*vid_delta < ${vid[f]}*vid_delta && pass2 == 0 )); then
  319.         #echo "forcing cur vid to max"
  320.         cur_vid=${vid[f]}
  321.     fi
  322.  
  323.     cur_vid=$((cur_vid+vid_delta))
  324.  
  325.     for (( ; cur_vid*vid_delta < vid_limit*vid_delta && pass2 == 0; cur_vid+=vid_delta )); do
  326.         count=$short_test_length
  327.  
  328.         echo "Trying vid $cur_vid for $count seconds"
  329.         save_state
  330.  
  331.         set_vid $f $cur_vid
  332.  
  333.         # It looks like changing the VID also resets the frequency, so set frequency here
  334.         set_sys_val scaling_setspeed ${freq[f]}
  335.  
  336.         launch_mprime
  337.  
  338.         if ((debug)); then
  339.             sleep 1
  340.             debug_info
  341.         fi
  342.  
  343.         for (( ; count>0; count-- )); do
  344.             sleep 1
  345.             echo -n "."
  346.             grep FATAL $mp_log &>/dev/null
  347.             if (( $? == 0 )); then
  348.                 kill_mprime
  349.                 echo ""
  350.                 echo "Hardware failure detected."
  351.                 ((cur_vid-=vid_delta))
  352.                 break 2
  353.             fi
  354.             cur_freq=$(cat $cpuf/scaling_cur_freq)
  355.             if (( (cur_freq < ${freq[f]} * (100 - wrong_freq_tolerance) / 100)
  356.                 || (cur_freq > ${freq[f]} * (100 + wrong_freq_tolerance) / 100) )); then
  357.                 echo ""
  358.                 echo "ERROR: Wrong frequency! (${cur_freq} instead of ${freq[f]})"
  359.                 debug_info
  360.                 exit 1
  361.             fi
  362.         done
  363.  
  364.         echo ""
  365.  
  366.         kill_mprime
  367.     done
  368.  
  369.     # Pass 2: stress testing for a longer time and going up in case of an error.
  370.  
  371.     for (( ; cur_vid*vid_delta <= vid_limit && cur_vid*vid_delta > (${vid[f]}+safety_vid_delta*vid_delta)*vid_delta; cur_vid-=vid_delta )); do
  372.         count=$long_test_length
  373.         echo "Trying vid $cur_vid for $count seconds"
  374.         save_state
  375.  
  376.         set_vid $f $cur_vid
  377.         set_sys_val scaling_setspeed ${freq[f]}
  378.  
  379.         launch_mprime
  380.  
  381.         if ((debug)); then
  382.             debug_info
  383.         fi
  384.  
  385.         for (( ; count>0; count-- )); do
  386.             sleep 1
  387.             echo -n "."
  388.             grep FATAL $mp_log &>/dev/null
  389.             if (( $? == 0 )); then
  390.                 echo ""
  391.                 echo "Hardware failure detected."
  392.                 break
  393.             fi
  394.         done
  395.  
  396.         kill_mprime
  397.  
  398.         if (( count == 0 )); then
  399.             break
  400.         fi
  401.     done
  402.  
  403.     echo ""
  404.     echo "Found working vid. Adding $safety_vid_delta for safety."
  405.     (( cur_vid*vid_delta > vid_limit*vid_delta )) && cur_vid=$vid_limit
  406.     if (( (cur_vid - safety_vid_delta*vid_delta)*vid_delta < ${vid[f]}*vid_delta )); then
  407.         final_vids=${final_vids}${vid[f]}
  408.     else
  409.         final_vids=${final_vids}$((cur_vid-safety_vid_delta*vid_delta))
  410.     fi
  411.     (( f < nb_freqs-1 )) && final_vids="$final_vids "
  412.     echo "Current results: $final_vids"
  413.  
  414.     pass2=0
  415.  
  416. done
  417.  
  418. echo ""
  419. echo "All done."
  420. echo "Default vids: $default_vids"
  421. echo "Final vids:   $final_vids"
  422. echo ""
  423. if [[ -e /etc/default/phc-intel ]]; then
  424.     # Newest version of the AUR phc-intel package
  425.     echo "Edit /etc/default/phc-intel to add your final vids."
  426. elif [[ -e /etc/phc-intel.conf ]]; then
  427.     # Old version of the AUR phc-intel package
  428.     echo "Edit /etc/phc-intel.conf to add your final vids."
  429. elif [[ -e /etc/conf.d/phc-intel ]]; then
  430.     # Old version of the AUR phc-intel package, or current dkms-phc-intel package.
  431.     echo "Edit /etc/conf.d/phc-intel to add your final vids."
  432. elif [[ -e /etc/default/phc-k8 ]]; then
  433.     # Newest version of the AUR phc-k8 package
  434.     echo "Edit /etc/default/phc-k8 to add your final vids."
  435. else
  436.     echo "If your system has a working rc.local (probably not the case with systemd), you can add the following 3 lines to /etc/rc.local, before the final \"exit 0\":"
  437.     echo ""
  438.     echo "for i in /sys/devices/system/cpu/cpu*/cpufreq/phc_vids; do"
  439.     echo "  echo \"$final_vids\" > \$i"
  440.     echo "done"
  441. fi
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement