Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/bash
- # WARNING!!!
- # ONLY EXECUTE THIS SCRIPT IF YOU ARE ABSOLUTELY SURE OF WHAT YOU ARE DOING!
- # WRONG SETTINGS MAY EVEN DAMAGE YOUR COMPUTER!
- # USE SOLELY AT YOUR OWN RISK!
- boinc_slots_path="/var/lib/boinc-client/slots" #BOINC client slots directory path.
- ramdisk_path="/dev/shm" #Tempfs on Linux Ubuntu.
- mlc_project_url="https://www.mlcathome.org/mlcathome/"
- interval=60
- training_line_pattern='Epoch [0-9]\+.*$'
- first_line=38 #This number depends on how many lines are in your stderr before "Epoch 1" line. Check it.
- declare -A last_epoch #Associative array to save last epoch reached for each task
- while true
- do
- slot_dirs=( $(ls "${boinc_slots_path}")) #Listing existing slots
- ndirs=${#slot_dirs[@]}
- # SCANNING SLOTS
- for (( i = 0; i < ndirs; i++ ))
- do
- ## READING BOINC TASK STATE OR SKIPPING TO NEXT SLOT
- boinc_task_state="${boinc_slots_path}/${i}/boinc_task_state.xml"
- if [ ! -f ${boinc_task_state} ]; then continue; fi
- project_url=$(sed -n 's|[^<]*<project_master_url>\([^<]*\)</project_master_url>[^<]*|\1\n|gp' $boinc_task_state)
- task_name=$(sed -n 's|[^<]*<result_name>\([^<]*\)</result_name>[^<]*|\1\n|gp' $boinc_task_state)
- ##
- ## ANALYSING TASK
- echo -n $(date '+%d/%m/%Y %H:%M:%S')" - Analysing task ${task_name}..."
- if [[ ${project_url} == ${mlc_project_url} ]] #MLC@Home task condition
- then
- faulty_flag=0 #Faulty task flag (1 = is faulty)
- stderr="${boinc_slots_path}/${i}/stderr.txt" #Stderr path
- key=$(echo "${task_name}" | md5sum | cut -d " " -f 1) #Generating a likely unique key based on task name
- start_line=$((first_line+last_epoch[${key}])) #Note: $last_epoch[${key}] is unset (=0) if analysed task has just started
- echo "$(tail -n +${start_line} ${stderr})" > "$ramdisk_path/stderr.txt" #Saving a temporary copy of stderr (without useless or previously analysed lines) to ramdisk
- stderr="$ramdisk_path/stderr.txt" #Temporary stderr path
- ### READING TEMPORARY STDERR LINE BY LINE
- while IFS= read -r line
- do
- training_line=$(echo "${line}" | grep -o "${training_line_pattern}")
- epoch=$(echo "${training_line}" | cut -d "|" -f 1 | cut -d " " -f 2)
- loss=$(echo "${training_line}" | cut -d "|" -f 2 | cut -d " " -f 3)
- val_loss=$(echo "${training_line}" | cut -d "|" -f 3 | cut -d " " -f 3)
- #time=$(echo "${training_line}" | cut -d "|" -f 4 | cut -d " " -f 3) #Not used
- last_epoch[${key}]=${epoch} #Updating last epoch
- if [[ ${loss} == "nan" || ${val_loss} == "nan" ]] #Faulty task condition
- then
- faulty_flag=1
- break
- fi
- done < "${stderr}"
- ###
- ### DOING OPERATIONS ON MLC@HOME TASK
- if [ ${faulty_flag} == 1 ]; then
- echo -e " \e[1;31mFAULT\e[0m"
- boinccmd --task ${project_url} ${task_name} suspend #abort
- echo $(date '+%d/%m/%Y %H:%M:%S')" - ${task_name} suspended!" #aborted!"
- else
- echo -e " \e[1;32mOK\e[0m"
- fi
- ###
- else
- echo -e " \e[1;33mNot MLC@Home\e[0m"
- fi
- ##
- done
- #
- sleep ${interval}
- done
Add Comment
Please, Sign In to add comment