Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/bash
- # usage: ./chunky.sh START END THREADS
- # will download range START to END in chunks, keeping THREADS downloaders going
- # press e to exit, t to change the number of threads or r to change the range end.
- # while running, statistics are occasionally output
- #
- # Updates, 14 Jun 2011 DoubleJ:
- # * Recomputes $RANGE when new $END is provided.
- # * Cookie and STOP files are tagged with chunky's PID, so multiple chunkys should
- # be able to politely share a directory. Touching STOP with no PID suffix will
- # stop all chunkys in that directory.
- #
- # Updates, 12 Jun 2011 DoubleJ:
- # * While winding down, now checks $CHILDTHREADS instead of $RUNNING so it should
- # now shut itself off properly.
- # * Aesthetic changes to $CHILDTHREADS in some places to make display make more sense.
- #
- # Updates, 11 Jun 2011 DoubleJ:
- # * 0 is a valid number of threads to request (allow open threads to coast to a stop)
- # * Can change the value of $END
- # * Counts all bff-thread instances against $WANT to allow a job to "hibernate" while
- # a previous job completes
- #
- # Known issues:
- # * Since process-creation time is non-zero occasionally chunky will over-spawn
- # bff-threads. Once it catches up to itself it behaves as if the user scaled down
- # the number of threads and allows extant threads to complete without being renewed.
- # * Different chunkys can "steal" each other's threads depending on when an old thread
- # completes and when each chunky's 30-second timer ends. Over time this will even
- # out, and people should only be running multiple chunkys when one is close to the
- # end of its range, so I don't consider this a show-stopper.
- usage()
- {
- echo USAGE: $0 START END THREADS
- echo The number of threads can be changed while running by pressing t
- exit 1
- }
- if [ $# -ne 3 ]; then
- usage
- fi
- START=`echo $1|grep -E '^[1-9][0-9]*$'`
- END=`echo $2|grep -E '^[1-9][0-9]*$'`
- WANT=`echo $3|grep -E '^[1-9][0-9]*$'`
- STEP=100
- if [ -z "$START" ]; then
- echo Invalid start value. Must be a positive integer.
- usage
- fi
- if [ -z "$END" ]; then
- echo Invalid end value. Must be a positive integer.
- usage
- fi
- if [ -z "$WANT" ]; then
- echo Invalid thread count. Must be a positive integer.
- usage
- fi
- if [ $END -le $START ]; then
- echo START must be less than END.
- usage
- fi
- # command to call for each thread.
- THREAD_COMMAND=./bff-thread.sh
- CHILDTHREADS=0
- RUNNING=`pgrep bff-thread | wc -l`
- CUR=$START
- RANGE=$((END-START+1))
- # check bash version for associative array support
- if [ ${BASH_VERSINFO[0]} -lt 4 ]; then
- echo "error: chunky needs bash version 4.0 or greater. version $BASH_VERSION detected."
- exit 1
- fi
- # map from PID to cookie jar
- declare -A CHILDREN
- # map from cookie jar to PID
- declare -A COOKIEJARS
- # what is the start of the range for each PID
- declare -A thread_range
- # and current profile
- declare -A thread_current
- KEEPGOING=1
- GETINPUT=0
- startchild()
- {
- # find an available cookie jar
- jarnum=$WANT
- for ((jar=0; jar<$WANT; jar++)); do
- if [ -z "${COOKIEJARS[$jar]}" ]; then
- jarnum=$jar
- break
- fi
- done
- if [ $jarnum -ge $WANT ]; then
- RUNNING=$WANT
- return
- fi
- # calculate range for this child
- s=$CUR
- CUR=$((CUR+STEP))
- if [ $CUR -gt $END ]; then
- CUR=$((END+1))
- fi
- e=$((CUR-1))
- rng=$((e-s+1))
- # start the child and get the PID
- $THREAD_COMMAND $s $e cookies$$.${jarnum}.txt >> friendster.${s}-${e}.log 2>&1 &
- cn=$!
- # record the new child
- CHILDREN[$cn]=$jarnum
- COOKIEJARS[$jarnum]=$cn
- thread_range[$cn]=${s}:${rng}
- thread_current[$cn]=$s
- sleep .1
- CHILDTHREADS=${#CHILDREN[@]}
- RUNNING=`pgrep bff-thread | wc -l`
- # if we hit the end of the range, we don't want to start more children, ever
- if [ $CUR -gt $END ]; then
- WANT=0
- fi
- }
- checkchildren()
- {
- for c in ${COOKIEJARS[@]}; do
- kill -0 $c 2>/dev/null
- if [ $? -eq 1 ]; then
- # thread is gone. clear information related to it
- jar=${CHILDREN[$c]}
- unset CHILDREN[$c]
- unset COOKIEJARS[$jar]
- unset thread_range[$c]
- unset thread_current[$c]
- else
- # thread is alive. get the current status of the thread
- cur=`cat bffthread-${c} 2>/dev/null`
- if [ "$cur" ]; then
- thread_current[$c]=$cur
- fi
- fi
- done
- CHILDTHREADS=${#CHILDREN[*]}
- RUNNING=`pgrep bff-thread | wc -l`
- }
- threadreport()
- {
- if [ $CHILDTHREADS -lt $RUNNING ] && [ $CHILDTHREADS -lt $WANT ]; then
- echo "running threads (${CHILDTHREADS}/${WANT}) waiting for thread pool to open (${RUNNING} total):"
- else
- echo "running threads (${CHILDTHREADS}/${WANT}):"
- fi
- for c in ${COOKIEJARS[@]}; do
- s=${thread_range[$c]}
- rng=${s#*:}
- s=${s%:*}
- e=$((s+rng-1))
- cur=${thread_current[$c]}
- v=$((cur-s))
- pct=$((100 * v / rng))
- echo " thread covering ${s}-${e}: ${cur} (${pct}%)"
- done
- }
- askexit()
- {
- echo
- echo "Do you wish to stop? [y/N]"
- read e
- if [ "$e" == "y" ]; then
- # stop the loop
- KEEPGOING=0
- # tell our children to stop
- touch STOP.$$
- fi
- }
- askthreads()
- {
- echo
- if [ $WANT -eq 0 ]; then
- echo no more blocks to assign. unable to change thread count.
- else
- echo "How many threads do you want to run? [$WANT]"
- read w
- if [ -n "$w" ]; then
- # verify a valid number
- w=`echo $w|grep -E '^[0-9][0-9]*$'`
- if [ -z "$w" ]; then
- echo invalid thread count. staying with $WANT
- else
- WANT=$w
- fi
- fi
- fi
- }
- askrange()
- {
- echo
- echo "What would you like to change the range end to? [$END]"
- read rend
- if [ -n "$rend" ]; then
- r=`echo $rend|grep -E '^[1-9][0-9]*$'`
- if [ -z "$rend" ]; then
- echo Invalid range end. Staying with $END
- else
- END=$rend
- RANGE=$((END-START+1))
- fi
- fi
- }
- # before we start, remove the STOP file, or bff-thread.sh will stop after 1 profile
- [ -f STOP ] && rm STOP
- [ -f STOP.$$ ] && rm STOP.$$
- while [ $KEEPGOING -eq 1 ]; do
- # check to see if any children have finished
- checkchildren
- # start new threads to fill voids left by ones that finished
- while [ $RUNNING -lt $WANT ]; do
- startchild
- done
- # present statistics
- v=$((CUR-START))
- pct=$((100 * v / RANGE))
- echo
- [ $CUR -le $END ] && echo "next block starts at $CUR."
- echo "${pct}% of range ${START}-${END} assigned or completed."
- threadreport
- echo "press e to exit, t to change number of threads or r to change the range end"
- echo "any other key will update stats, or wait 30 seconds"
- [ $CUR -le $END ] && echo "next block starts at $CUR."
- # check to see if we should keep going this is before the sleep so that
- # the sleep is almost certainly what gets interrupted with ^c, as everything
- # else is fast
- if [ $WANT -eq 0 ] && [ $CHILDTHREADS -eq 0 ]; then
- KEEPGOING=0
- fi
- # sleep for a bit
- read -t 30 -n 1 dummy
- # exit code greater than 128 means timeout
- if [ $? -le 128 ]; then
- case "$dummy" in
- e|E) askexit;;
- t|T) askthreads;;
- r|R) askrange;;
- esac
- fi
- # check for STOP, which may have been created by a disk-space monitor script
- if [ -f STOP ] || [ -f STOP.$$ ]; then
- echo STOP file detected. shutting down.
- KEEPGOING=0
- fi
- done
- # wait for any running threads to finish (will only happen if we stopped early)
- while [ $CHILDTHREADS -gt 0 ]; do
- echo waiting for $CHILDTHREADS threads to finish their current profile
- threadreport
- sleep 10
- checkchildren
- done
- echo done.
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement