SHARE
TWEET

Multi-instance chunky

DoubleJ Jun 11th, 2011 223 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/bin/bash
  2.  
  3. # usage: ./chunky.sh START END THREADS
  4. # will download range START to END in chunks, keeping THREADS downloaders going
  5. # press e to exit, t to change the number of threads or r to change the range end.
  6. # while running, statistics are occasionally output
  7. #
  8. # Updates, 14 Jun 2011 DoubleJ:
  9. # * Recomputes $RANGE when new $END is provided.
  10. # * Cookie and STOP files are tagged with chunky's PID, so multiple chunkys should
  11. #   be able to politely share a directory. Touching STOP with no PID suffix will
  12. #   stop all chunkys in that directory.
  13. #
  14. # Updates, 12 Jun 2011 DoubleJ:
  15. # * While winding down, now checks $CHILDTHREADS instead of $RUNNING so it should
  16. #   now shut itself off properly.
  17. # * Aesthetic changes to $CHILDTHREADS in some places to make display make more sense.
  18. #
  19. # Updates, 11 Jun 2011 DoubleJ:
  20. # * 0 is a valid number of threads to request (allow open threads to coast to a stop)
  21. # * Can change the value of $END
  22. # * Counts all bff-thread instances against $WANT to allow a job to "hibernate" while
  23. #   a previous job completes
  24. #
  25. # Known issues:
  26. # * Since process-creation time is non-zero occasionally chunky will over-spawn
  27. #   bff-threads. Once it catches up to itself it behaves as if the user scaled down
  28. #   the number of threads and allows extant threads to complete without being renewed.
  29. # * Different chunkys can "steal" each other's threads depending on when an old thread
  30. #   completes and when each chunky's 30-second timer ends. Over time this will even
  31. #   out, and people should only be running multiple chunkys when one is close to the
  32. #   end of its range, so I don't consider this a show-stopper.
  33.  
  34. usage()
  35. {
  36.         echo USAGE: $0 START END THREADS
  37.         echo The number of threads can be changed while running by pressing t
  38.         exit 1
  39. }
  40.  
  41. if [ $# -ne 3 ]; then
  42.         usage
  43. fi
  44.  
  45. START=`echo $1|grep -E '^[1-9][0-9]*$'`
  46. END=`echo $2|grep -E '^[1-9][0-9]*$'`
  47. WANT=`echo $3|grep -E '^[1-9][0-9]*$'`
  48. STEP=100
  49.  
  50. if [ -z "$START" ]; then
  51.         echo Invalid start value. Must be a positive integer.
  52.         usage
  53. fi
  54. if [ -z "$END" ]; then
  55.         echo Invalid end value. Must be a positive integer.
  56.         usage
  57. fi
  58. if [ -z "$WANT" ]; then
  59.         echo Invalid thread count. Must be a positive integer.
  60.         usage
  61. fi
  62. if [ $END -le $START ]; then
  63.         echo START must be less than END.
  64.         usage
  65. fi
  66.  
  67.  
  68. # command to call for each thread.
  69. THREAD_COMMAND=./bff-thread.sh
  70.  
  71. CHILDTHREADS=0
  72. RUNNING=`pgrep bff-thread | wc -l`
  73. CUR=$START
  74.  
  75. RANGE=$((END-START+1))
  76.  
  77. # check bash version for associative array support
  78. if [ ${BASH_VERSINFO[0]} -lt 4 ]; then
  79.         echo "error: chunky needs bash version 4.0 or greater. version $BASH_VERSION detected."
  80.         exit 1
  81. fi
  82.  
  83. # map from PID to cookie jar
  84. declare -A CHILDREN
  85.  
  86. # map from cookie jar to PID
  87. declare -A COOKIEJARS
  88.  
  89. # what is the start of the range for each PID
  90. declare -A thread_range
  91. # and current profile
  92. declare -A thread_current
  93.  
  94. KEEPGOING=1
  95. GETINPUT=0
  96.  
  97. startchild()
  98. {
  99.         # find an available cookie jar
  100.         jarnum=$WANT
  101.         for ((jar=0; jar<$WANT; jar++)); do
  102.                 if [ -z "${COOKIEJARS[$jar]}" ]; then
  103.                         jarnum=$jar
  104.                         break
  105.                 fi
  106.         done
  107.         if [ $jarnum -ge $WANT ]; then
  108.                 RUNNING=$WANT
  109.                 return
  110.         fi
  111.         # calculate range for this child
  112.         s=$CUR
  113.         CUR=$((CUR+STEP))
  114.         if [ $CUR -gt $END ]; then
  115.                 CUR=$((END+1))
  116.         fi
  117.         e=$((CUR-1))
  118.         rng=$((e-s+1))
  119.  
  120.         # start the child and get the PID
  121.         $THREAD_COMMAND $s $e cookies$$.${jarnum}.txt >> friendster.${s}-${e}.log 2>&1 &
  122.         cn=$!
  123.  
  124.         # record the new child
  125.         CHILDREN[$cn]=$jarnum
  126.         COOKIEJARS[$jarnum]=$cn
  127.         thread_range[$cn]=${s}:${rng}
  128.         thread_current[$cn]=$s
  129.         sleep .1
  130.         CHILDTHREADS=${#CHILDREN[@]}
  131.         RUNNING=`pgrep bff-thread | wc -l`
  132.  
  133.         # if we hit the end of the range, we don't want to start more children, ever
  134.         if [ $CUR -gt $END ]; then
  135.                 WANT=0
  136.         fi
  137. }
  138.  
  139. checkchildren()
  140. {
  141.         for c in ${COOKIEJARS[@]}; do
  142.                 kill -0 $c 2>/dev/null
  143.                 if [ $? -eq 1 ]; then
  144.                         # thread is gone. clear information related to it
  145.                         jar=${CHILDREN[$c]}
  146.                         unset CHILDREN[$c]
  147.                         unset COOKIEJARS[$jar]
  148.                         unset thread_range[$c]
  149.                         unset thread_current[$c]
  150.                 else
  151.                         # thread is alive. get the current status of the thread
  152.                         cur=`cat bffthread-${c} 2>/dev/null`
  153.                         if [ "$cur" ]; then
  154.                                 thread_current[$c]=$cur
  155.                         fi
  156.                 fi
  157.         done
  158.         CHILDTHREADS=${#CHILDREN[*]}
  159.         RUNNING=`pgrep bff-thread | wc -l`
  160. }
  161.  
  162. threadreport()
  163. {
  164.         if [ $CHILDTHREADS -lt $RUNNING ] && [ $CHILDTHREADS -lt $WANT ]; then
  165.                 echo "running threads (${CHILDTHREADS}/${WANT}) waiting for thread pool to open (${RUNNING} total):"
  166.         else
  167.                 echo "running threads (${CHILDTHREADS}/${WANT}):"
  168.         fi
  169.         for c in ${COOKIEJARS[@]}; do
  170.                 s=${thread_range[$c]}
  171.                 rng=${s#*:}
  172.                 s=${s%:*}
  173.                 e=$((s+rng-1))
  174.                 cur=${thread_current[$c]}
  175.                 v=$((cur-s))
  176.                 pct=$((100 * v / rng))
  177.                 echo " thread covering ${s}-${e}: ${cur} (${pct}%)"
  178.         done
  179. }
  180.  
  181. askexit()
  182. {
  183.         echo
  184.         echo "Do you wish to stop? [y/N]"
  185.         read e
  186.         if [ "$e" == "y" ]; then
  187.                 # stop the loop
  188.                 KEEPGOING=0
  189.                 # tell our children to stop
  190.                 touch STOP.$$
  191.         fi
  192. }
  193.  
  194. askthreads()
  195. {
  196.         echo
  197.         if [ $WANT -eq 0 ]; then
  198.                 echo no more blocks to assign. unable to change thread count.
  199.         else
  200.                 echo "How many threads do you want to run? [$WANT]"
  201.                 read w
  202.                 if [ -n "$w" ]; then
  203.                         # verify a valid number
  204.                         w=`echo $w|grep -E '^[0-9][0-9]*$'`
  205.                         if [ -z "$w" ]; then
  206.                                 echo invalid thread count. staying with $WANT
  207.                         else
  208.                                 WANT=$w
  209.                         fi
  210.                 fi
  211.         fi
  212. }
  213.  
  214. askrange()
  215. {
  216.         echo
  217.         echo "What would you like to change the range end to? [$END]"
  218.         read rend
  219.         if [ -n "$rend" ]; then
  220.                 r=`echo $rend|grep -E '^[1-9][0-9]*$'`
  221.                 if [ -z "$rend" ]; then
  222.                         echo Invalid range end. Staying with $END
  223.                 else
  224.                         END=$rend
  225.                         RANGE=$((END-START+1))
  226.                 fi
  227.         fi
  228. }
  229.  
  230. # before we start, remove the STOP file, or bff-thread.sh will stop after 1 profile
  231. [ -f STOP ] && rm STOP
  232. [ -f STOP.$$ ] && rm STOP.$$
  233.  
  234. while [ $KEEPGOING -eq 1 ]; do
  235.         # check to see if any children have finished
  236.         checkchildren
  237.  
  238.         # start new threads to fill voids left by ones that finished
  239.         while [ $RUNNING -lt $WANT ]; do
  240.                 startchild
  241.         done
  242.  
  243.         # present statistics
  244.         v=$((CUR-START))
  245.         pct=$((100 * v / RANGE))
  246.         echo
  247.         [ $CUR -le $END ] && echo "next block starts at $CUR."
  248.         echo "${pct}% of range ${START}-${END} assigned or completed."
  249.         threadreport
  250.         echo "press e to exit, t to change number of threads or r to change the range end"
  251.         echo "any other key will update stats, or wait 30 seconds"
  252.         [ $CUR -le $END ] && echo "next block starts at $CUR."
  253.  
  254.         # check to see if we should keep going this is before the sleep so that
  255.         # the sleep is almost certainly what gets interrupted with ^c, as everything
  256.         # else is fast
  257.         if [ $WANT -eq 0 ] && [ $CHILDTHREADS -eq 0 ]; then
  258.                 KEEPGOING=0
  259.         fi
  260.  
  261.         # sleep for a bit
  262.         read -t 30 -n 1 dummy
  263.         # exit code greater than 128 means timeout
  264.         if [ $? -le 128 ]; then
  265.                 case "$dummy" in
  266.                 e|E) askexit;;
  267.                 t|T) askthreads;;
  268.                 r|R) askrange;;
  269.                 esac
  270.         fi
  271.  
  272.         # check for STOP, which may have been created by a disk-space monitor script
  273.         if [ -f STOP ] || [ -f STOP.$$ ]; then
  274.                 echo STOP file detected. shutting down.
  275.                 KEEPGOING=0
  276.         fi
  277. done
  278.  
  279. # wait for any running threads to finish (will only happen if we stopped early)
  280. while [ $CHILDTHREADS -gt 0 ]; do
  281.         echo waiting for $CHILDTHREADS threads to finish their current profile
  282.         threadreport
  283.         sleep 10
  284.         checkchildren
  285. done
  286.  
  287. echo done.
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top