Guest User

SuspendMLCNaNTasks.sh

a guest
Dec 29th, 2020
68
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 2.97 KB | None | 0 0
  1. #!/bin/bash
  2.  
  3. # WARNING!!!
  4. # ONLY EXECUTE THIS SCRIPT IF YOU ARE ABSOLUTELY SURE OF WHAT YOU ARE DOING!
  5. # WRONG SETTINGS MAY EVEN DAMAGE YOUR COMPUTER!
  6. # USE SOLELY AT YOUR OWN RISK!
  7.  
  8. boinc_slots_path="/var/lib/boinc-client/slots" #BOINC client slots directory path.
  9. ramdisk_path="/dev/shm" #Tempfs on Linux Ubuntu.
  10. mlc_project_url="https://www.mlcathome.org/mlcathome/"
  11. interval=60
  12. training_line_pattern='Epoch [0-9]\+.*$'
  13. first_line=38 #This number depends on how many lines are in your stderr before "Epoch 1" line. Check it.
  14. declare -A last_epoch #Associative array to save last epoch reached for each task
  15.  
  16. while true
  17. do
  18.     slot_dirs=( $(ls "${boinc_slots_path}")) #Listing existing slots
  19.     ndirs=${#slot_dirs[@]}
  20.     # SCANNING SLOTS
  21.     for (( i = 0; i < ndirs; i++ ))
  22.     do
  23.         ## READING BOINC TASK STATE OR SKIPPING TO NEXT SLOT
  24.         boinc_task_state="${boinc_slots_path}/${i}/boinc_task_state.xml"
  25.         if [ ! -f ${boinc_task_state} ]; then continue; fi
  26.         project_url=$(sed -n 's|[^<]*<project_master_url>\([^<]*\)</project_master_url>[^<]*|\1\n|gp' $boinc_task_state)
  27.         task_name=$(sed -n 's|[^<]*<result_name>\([^<]*\)</result_name>[^<]*|\1\n|gp' $boinc_task_state)
  28.         ##
  29.  
  30.         ## ANALYSING TASK
  31.         echo -n $(date '+%d/%m/%Y %H:%M:%S')" - Analysing task ${task_name}..."
  32.         if [[ ${project_url} == ${mlc_project_url} ]] #MLC@Home task condition
  33.         then
  34.             faulty_flag=0 #Faulty task flag (1 = is faulty)
  35.             stderr="${boinc_slots_path}/${i}/stderr.txt" #Stderr path
  36.             key=$(echo "${task_name}" | md5sum | cut -d " " -f 1) #Generating a likely unique key based on task name
  37.             start_line=$((first_line+last_epoch[${key}])) #Note: $last_epoch[${key}] is unset (=0) if analysed task has just started
  38.             echo "$(tail -n +${start_line} ${stderr})" > "$ramdisk_path/stderr.txt" #Saving a temporary copy of stderr (without useless or previously analysed lines) to ramdisk
  39.             stderr="$ramdisk_path/stderr.txt" #Temporary stderr path
  40.  
  41.             ### READING TEMPORARY STDERR LINE BY LINE
  42.             while IFS= read -r line
  43.             do
  44.                 training_line=$(echo "${line}" | grep -o "${training_line_pattern}")
  45.                 epoch=$(echo "${training_line}" | cut -d "|" -f 1 | cut -d " " -f 2)
  46.                 loss=$(echo "${training_line}" | cut -d "|" -f 2 | cut -d " " -f 3)
  47.                 val_loss=$(echo "${training_line}" | cut -d "|" -f 3 | cut -d " " -f 3)
  48.                 #time=$(echo "${training_line}" | cut -d "|" -f 4 | cut -d " " -f 3) #Not used
  49.                 last_epoch[${key}]=${epoch} #Updating last epoch
  50.                 if [[ ${loss} == "nan" || ${val_loss} == "nan" ]] #Faulty task condition
  51.                 then
  52.                     faulty_flag=1
  53.                     break
  54.                 fi
  55.             done < "${stderr}"
  56.             ###
  57.  
  58.             ### DOING OPERATIONS ON MLC@HOME TASK
  59.             if [ ${faulty_flag} == 1 ]; then
  60.                 echo -e " \e[1;31mFAULT\e[0m"
  61.                 boinccmd --task ${project_url} ${task_name} suspend #abort
  62.                 echo $(date '+%d/%m/%Y %H:%M:%S')" - ${task_name} suspended!" #aborted!"
  63.             else
  64.                 echo -e " \e[1;32mOK\e[0m"
  65.             fi
  66.             ###
  67.         else
  68.              echo -e " \e[1;33mNot MLC@Home\e[0m"
  69.         fi
  70.         ##
  71.     done
  72.     #
  73.     sleep ${interval}
  74. done
  75.  
Add Comment
Please, Sign In to add comment