DoubleJ

Multi-instance chunky

Jun 11th, 2011
353
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/bin/bash
  2.  
  3. # usage: ./chunky.sh START END THREADS
  4. # will download range START to END in chunks, keeping THREADS downloaders going
  5. # press e to exit, t to change the number of threads or r to change the range end.
  6. # while running, statistics are occasionally output
  7. #
  8. # Updates, 14 Jun 2011 DoubleJ:
  9. # * Recomputes $RANGE when new $END is provided.
  10. # * Cookie and STOP files are tagged with chunky's PID, so multiple chunkys should
  11. #   be able to politely share a directory. Touching STOP with no PID suffix will
  12. #   stop all chunkys in that directory.
  13. #
  14. # Updates, 12 Jun 2011 DoubleJ:
  15. # * While winding down, now checks $CHILDTHREADS instead of $RUNNING so it should
  16. #   now shut itself off properly.
  17. # * Aesthetic changes to $CHILDTHREADS in some places to make display make more sense.
  18. #
  19. # Updates, 11 Jun 2011 DoubleJ:
  20. # * 0 is a valid number of threads to request (allow open threads to coast to a stop)
  21. # * Can change the value of $END
  22. # * Counts all bff-thread instances against $WANT to allow a job to "hibernate" while
  23. #   a previous job completes
  24. #
  25. # Known issues:
  26. # * Since process-creation time is non-zero occasionally chunky will over-spawn
  27. #   bff-threads. Once it catches up to itself it behaves as if the user scaled down
  28. #   the number of threads and allows extant threads to complete without being renewed.
  29. # * Different chunkys can "steal" each other's threads depending on when an old thread
  30. #   completes and when each chunky's 30-second timer ends. Over time this will even
  31. #   out, and people should only be running multiple chunkys when one is close to the
  32. #   end of its range, so I don't consider this a show-stopper.
  33.  
  34. usage()
  35. {
  36.         echo USAGE: $0 START END THREADS
  37.         echo The number of threads can be changed while running by pressing t
  38.         exit 1
  39. }
  40.  
  41. if [ $# -ne 3 ]; then
  42.         usage
  43. fi
  44.  
  45. START=`echo $1|grep -E '^[1-9][0-9]*$'`
  46. END=`echo $2|grep -E '^[1-9][0-9]*$'`
  47. WANT=`echo $3|grep -E '^[1-9][0-9]*$'`
  48. STEP=100
  49.  
  50. if [ -z "$START" ]; then
  51.         echo Invalid start value. Must be a positive integer.
  52.         usage
  53. fi
  54. if [ -z "$END" ]; then
  55.         echo Invalid end value. Must be a positive integer.
  56.         usage
  57. fi
  58. if [ -z "$WANT" ]; then
  59.         echo Invalid thread count. Must be a positive integer.
  60.         usage
  61. fi
  62. if [ $END -le $START ]; then
  63.         echo START must be less than END.
  64.         usage
  65. fi
  66.  
  67.  
  68. # command to call for each thread.
  69. THREAD_COMMAND=./bff-thread.sh
  70.  
  71. CHILDTHREADS=0
  72. RUNNING=`pgrep bff-thread | wc -l`
  73. CUR=$START
  74.  
  75. RANGE=$((END-START+1))
  76.  
  77. # check bash version for associative array support
  78. if [ ${BASH_VERSINFO[0]} -lt 4 ]; then
  79.         echo "error: chunky needs bash version 4.0 or greater. version $BASH_VERSION detected."
  80.         exit 1
  81. fi
  82.  
  83. # map from PID to cookie jar
  84. declare -A CHILDREN
  85.  
  86. # map from cookie jar to PID
  87. declare -A COOKIEJARS
  88.  
  89. # what is the start of the range for each PID
  90. declare -A thread_range
  91. # and current profile
  92. declare -A thread_current
  93.  
  94. KEEPGOING=1
  95. GETINPUT=0
  96.  
  97. startchild()
  98. {
  99.         # find an available cookie jar
  100.         jarnum=$WANT
  101.         for ((jar=0; jar<$WANT; jar++)); do
  102.                 if [ -z "${COOKIEJARS[$jar]}" ]; then
  103.                         jarnum=$jar
  104.                         break
  105.                 fi
  106.         done
  107.         if [ $jarnum -ge $WANT ]; then
  108.                 RUNNING=$WANT
  109.                 return
  110.         fi
  111.         # calculate range for this child
  112.         s=$CUR
  113.         CUR=$((CUR+STEP))
  114.         if [ $CUR -gt $END ]; then
  115.                 CUR=$((END+1))
  116.         fi
  117.         e=$((CUR-1))
  118.         rng=$((e-s+1))
  119.  
  120.         # start the child and get the PID
  121.         $THREAD_COMMAND $s $e cookies$$.${jarnum}.txt >> friendster.${s}-${e}.log 2>&1 &
  122.         cn=$!
  123.  
  124.         # record the new child
  125.         CHILDREN[$cn]=$jarnum
  126.         COOKIEJARS[$jarnum]=$cn
  127.         thread_range[$cn]=${s}:${rng}
  128.         thread_current[$cn]=$s
  129.         sleep .1
  130.         CHILDTHREADS=${#CHILDREN[@]}
  131.         RUNNING=`pgrep bff-thread | wc -l`
  132.  
  133.         # if we hit the end of the range, we don't want to start more children, ever
  134.         if [ $CUR -gt $END ]; then
  135.                 WANT=0
  136.         fi
  137. }
  138.  
  139. checkchildren()
  140. {
  141.         for c in ${COOKIEJARS[@]}; do
  142.                 kill -0 $c 2>/dev/null
  143.                 if [ $? -eq 1 ]; then
  144.                         # thread is gone. clear information related to it
  145.                         jar=${CHILDREN[$c]}
  146.                         unset CHILDREN[$c]
  147.                         unset COOKIEJARS[$jar]
  148.                         unset thread_range[$c]
  149.                         unset thread_current[$c]
  150.                 else
  151.                         # thread is alive. get the current status of the thread
  152.                         cur=`cat bffthread-${c} 2>/dev/null`
  153.                         if [ "$cur" ]; then
  154.                                 thread_current[$c]=$cur
  155.                         fi
  156.                 fi
  157.         done
  158.         CHILDTHREADS=${#CHILDREN[*]}
  159.         RUNNING=`pgrep bff-thread | wc -l`
  160. }
  161.  
  162. threadreport()
  163. {
  164.         if [ $CHILDTHREADS -lt $RUNNING ] && [ $CHILDTHREADS -lt $WANT ]; then
  165.                 echo "running threads (${CHILDTHREADS}/${WANT}) waiting for thread pool to open (${RUNNING} total):"
  166.         else
  167.                 echo "running threads (${CHILDTHREADS}/${WANT}):"
  168.         fi
  169.         for c in ${COOKIEJARS[@]}; do
  170.                 s=${thread_range[$c]}
  171.                 rng=${s#*:}
  172.                 s=${s%:*}
  173.                 e=$((s+rng-1))
  174.                 cur=${thread_current[$c]}
  175.                 v=$((cur-s))
  176.                 pct=$((100 * v / rng))
  177.                 echo " thread covering ${s}-${e}: ${cur} (${pct}%)"
  178.         done
  179. }
  180.  
  181. askexit()
  182. {
  183.         echo
  184.         echo "Do you wish to stop? [y/N]"
  185.         read e
  186.         if [ "$e" == "y" ]; then
  187.                 # stop the loop
  188.                 KEEPGOING=0
  189.                 # tell our children to stop
  190.                 touch STOP.$$
  191.         fi
  192. }
  193.  
  194. askthreads()
  195. {
  196.         echo
  197.         if [ $WANT -eq 0 ]; then
  198.                 echo no more blocks to assign. unable to change thread count.
  199.         else
  200.                 echo "How many threads do you want to run? [$WANT]"
  201.                 read w
  202.                 if [ -n "$w" ]; then
  203.                         # verify a valid number
  204.                         w=`echo $w|grep -E '^[0-9][0-9]*$'`
  205.                         if [ -z "$w" ]; then
  206.                                 echo invalid thread count. staying with $WANT
  207.                         else
  208.                                 WANT=$w
  209.                         fi
  210.                 fi
  211.         fi
  212. }
  213.  
  214. askrange()
  215. {
  216.         echo
  217.         echo "What would you like to change the range end to? [$END]"
  218.         read rend
  219.         if [ -n "$rend" ]; then
  220.                 r=`echo $rend|grep -E '^[1-9][0-9]*$'`
  221.                 if [ -z "$rend" ]; then
  222.                         echo Invalid range end. Staying with $END
  223.                 else
  224.                         END=$rend
  225.             RANGE=$((END-START+1))
  226.                 fi
  227.         fi
  228. }
  229.  
  230. # before we start, remove the STOP file, or bff-thread.sh will stop after 1 profile
  231. [ -f STOP ] && rm STOP
  232. [ -f STOP.$$ ] && rm STOP.$$
  233.  
  234. while [ $KEEPGOING -eq 1 ]; do
  235.         # check to see if any children have finished
  236.         checkchildren
  237.  
  238.         # start new threads to fill voids left by ones that finished
  239.         while [ $RUNNING -lt $WANT ]; do
  240.                 startchild
  241.         done
  242.  
  243.         # present statistics
  244.         v=$((CUR-START))
  245.         pct=$((100 * v / RANGE))
  246.         echo
  247.         [ $CUR -le $END ] && echo "next block starts at $CUR."
  248.         echo "${pct}% of range ${START}-${END} assigned or completed."
  249.         threadreport
  250.         echo "press e to exit, t to change number of threads or r to change the range end"
  251.         echo "any other key will update stats, or wait 30 seconds"
  252.         [ $CUR -le $END ] && echo "next block starts at $CUR."
  253.  
  254.         # check to see if we should keep going this is before the sleep so that
  255.         # the sleep is almost certainly what gets interrupted with ^c, as everything
  256.         # else is fast
  257.         if [ $WANT -eq 0 ] && [ $CHILDTHREADS -eq 0 ]; then
  258.                 KEEPGOING=0
  259.         fi
  260.  
  261.         # sleep for a bit
  262.         read -t 30 -n 1 dummy
  263.         # exit code greater than 128 means timeout
  264.         if [ $? -le 128 ]; then
  265.                 case "$dummy" in
  266.                 e|E) askexit;;
  267.                 t|T) askthreads;;
  268.                 r|R) askrange;;
  269.                 esac
  270.         fi
  271.  
  272.         # check for STOP, which may have been created by a disk-space monitor script
  273.         if [ -f STOP ] || [ -f STOP.$$ ]; then
  274.                 echo STOP file detected. shutting down.
  275.                 KEEPGOING=0
  276.         fi
  277. done
  278.  
  279. # wait for any running threads to finish (will only happen if we stopped early)
  280. while [ $CHILDTHREADS -gt 0 ]; do
  281.         echo waiting for $CHILDTHREADS threads to finish their current profile
  282.         threadreport
  283.         sleep 10
  284.         checkchildren
  285. done
  286.  
  287. echo done.
RAW Paste Data