#!/bin/bash # usage: ./chunky.sh START END THREADS # will download range START to END in chunks, keeping THREADS downloaders going # press e to exit, t to change the number of threads or r to change the range end. # while running, statistics are occasionally output # # Updates, 14 Jun 2011 DoubleJ: # * Recomputes $RANGE when new $END is provided. # * Cookie and STOP files are tagged with chunky's PID, so multiple chunkys should # be able to politely share a directory. Touching STOP with no PID suffix will # stop all chunkys in that directory. # # Updates, 12 Jun 2011 DoubleJ: # * While winding down, now checks $CHILDTHREADS instead of $RUNNING so it should # now shut itself off properly. # * Aesthetic changes to $CHILDTHREADS in some places to make display make more sense. # # Updates, 11 Jun 2011 DoubleJ: # * 0 is a valid number of threads to request (allow open threads to coast to a stop) # * Can change the value of $END # * Counts all bff-thread instances against $WANT to allow a job to "hibernate" while # a previous job completes # # Known issues: # * Since process-creation time is non-zero occasionally chunky will over-spawn # bff-threads. Once it catches up to itself it behaves as if the user scaled down # the number of threads and allows extant threads to complete without being renewed. # * Different chunkys can "steal" each other's threads depending on when an old thread # completes and when each chunky's 30-second timer ends. Over time this will even # out, and people should only be running multiple chunkys when one is close to the # end of its range, so I don't consider this a show-stopper. usage() { echo USAGE: $0 START END THREADS echo The number of threads can be changed while running by pressing t exit 1 } if [ $# -ne 3 ]; then usage fi START=`echo $1|grep -E '^[1-9][0-9]*$'` END=`echo $2|grep -E '^[1-9][0-9]*$'` WANT=`echo $3|grep -E '^[1-9][0-9]*$'` STEP=100 if [ -z "$START" ]; then echo Invalid start value. Must be a positive integer. usage fi if [ -z "$END" ]; then echo Invalid end value. Must be a positive integer. usage fi if [ -z "$WANT" ]; then echo Invalid thread count. Must be a positive integer. usage fi if [ $END -le $START ]; then echo START must be less than END. usage fi # command to call for each thread. THREAD_COMMAND=./bff-thread.sh CHILDTHREADS=0 RUNNING=`pgrep bff-thread | wc -l` CUR=$START RANGE=$((END-START+1)) # check bash version for associative array support if [ ${BASH_VERSINFO[0]} -lt 4 ]; then echo "error: chunky needs bash version 4.0 or greater. version $BASH_VERSION detected." exit 1 fi # map from PID to cookie jar declare -A CHILDREN # map from cookie jar to PID declare -A COOKIEJARS # what is the start of the range for each PID declare -A thread_range # and current profile declare -A thread_current KEEPGOING=1 GETINPUT=0 startchild() { # find an available cookie jar jarnum=$WANT for ((jar=0; jar<$WANT; jar++)); do if [ -z "${COOKIEJARS[$jar]}" ]; then jarnum=$jar break fi done if [ $jarnum -ge $WANT ]; then RUNNING=$WANT return fi # calculate range for this child s=$CUR CUR=$((CUR+STEP)) if [ $CUR -gt $END ]; then CUR=$((END+1)) fi e=$((CUR-1)) rng=$((e-s+1)) # start the child and get the PID $THREAD_COMMAND $s $e cookies$$.${jarnum}.txt >> friendster.${s}-${e}.log 2>&1 & cn=$! # record the new child CHILDREN[$cn]=$jarnum COOKIEJARS[$jarnum]=$cn thread_range[$cn]=${s}:${rng} thread_current[$cn]=$s sleep .1 CHILDTHREADS=${#CHILDREN[@]} RUNNING=`pgrep bff-thread | wc -l` # if we hit the end of the range, we don't want to start more children, ever if [ $CUR -gt $END ]; then WANT=0 fi } checkchildren() { for c in ${COOKIEJARS[@]}; do kill -0 $c 2>/dev/null if [ $? -eq 1 ]; then # thread is gone. clear information related to it jar=${CHILDREN[$c]} unset CHILDREN[$c] unset COOKIEJARS[$jar] unset thread_range[$c] unset thread_current[$c] else # thread is alive. get the current status of the thread cur=`cat bffthread-${c} 2>/dev/null` if [ "$cur" ]; then thread_current[$c]=$cur fi fi done CHILDTHREADS=${#CHILDREN[*]} RUNNING=`pgrep bff-thread | wc -l` } threadreport() { if [ $CHILDTHREADS -lt $RUNNING ] && [ $CHILDTHREADS -lt $WANT ]; then echo "running threads (${CHILDTHREADS}/${WANT}) waiting for thread pool to open (${RUNNING} total):" else echo "running threads (${CHILDTHREADS}/${WANT}):" fi for c in ${COOKIEJARS[@]}; do s=${thread_range[$c]} rng=${s#*:} s=${s%:*} e=$((s+rng-1)) cur=${thread_current[$c]} v=$((cur-s)) pct=$((100 * v / rng)) echo " thread covering ${s}-${e}: ${cur} (${pct}%)" done } askexit() { echo echo "Do you wish to stop? [y/N]" read e if [ "$e" == "y" ]; then # stop the loop KEEPGOING=0 # tell our children to stop touch STOP.$$ fi } askthreads() { echo if [ $WANT -eq 0 ]; then echo no more blocks to assign. unable to change thread count. else echo "How many threads do you want to run? [$WANT]" read w if [ -n "$w" ]; then # verify a valid number w=`echo $w|grep -E '^[0-9][0-9]*$'` if [ -z "$w" ]; then echo invalid thread count. staying with $WANT else WANT=$w fi fi fi } askrange() { echo echo "What would you like to change the range end to? [$END]" read rend if [ -n "$rend" ]; then r=`echo $rend|grep -E '^[1-9][0-9]*$'` if [ -z "$rend" ]; then echo Invalid range end. Staying with $END else END=$rend RANGE=$((END-START+1)) fi fi } # before we start, remove the STOP file, or bff-thread.sh will stop after 1 profile [ -f STOP ] && rm STOP [ -f STOP.$$ ] && rm STOP.$$ while [ $KEEPGOING -eq 1 ]; do # check to see if any children have finished checkchildren # start new threads to fill voids left by ones that finished while [ $RUNNING -lt $WANT ]; do startchild done # present statistics v=$((CUR-START)) pct=$((100 * v / RANGE)) echo [ $CUR -le $END ] && echo "next block starts at $CUR." echo "${pct}% of range ${START}-${END} assigned or completed." threadreport echo "press e to exit, t to change number of threads or r to change the range end" echo "any other key will update stats, or wait 30 seconds" [ $CUR -le $END ] && echo "next block starts at $CUR." # check to see if we should keep going this is before the sleep so that # the sleep is almost certainly what gets interrupted with ^c, as everything # else is fast if [ $WANT -eq 0 ] && [ $CHILDTHREADS -eq 0 ]; then KEEPGOING=0 fi # sleep for a bit read -t 30 -n 1 dummy # exit code greater than 128 means timeout if [ $? -le 128 ]; then case "$dummy" in e|E) askexit;; t|T) askthreads;; r|R) askrange;; esac fi # check for STOP, which may have been created by a disk-space monitor script if [ -f STOP ] || [ -f STOP.$$ ]; then echo STOP file detected. shutting down. KEEPGOING=0 fi done # wait for any running threads to finish (will only happen if we stopped early) while [ $CHILDTHREADS -gt 0 ]; do echo waiting for $CHILDTHREADS threads to finish their current profile threadreport sleep 10 checkchildren done echo done.