Advertisement
MestreLion

toprated - sort input by count, with total and percentages

Apr 15th, 2012
69
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 5.59 KB | None | 0 0
  1. #!/bin/bash
  2. #
  3. # toprated - sort input by count, showing totals and percentages
  4. #
  5. #    Copyright (C) 2012 Rodrigo Silva (MestreLion) <[email protected]>
  6. #
  7. #    This program is free software: you can redistribute it and/or modify
  8. #    it under the terms of the GNU General Public License as published by
  9. #    the Free Software Foundation, either version 3 of the License, or
  10. #    (at your option) any later version.
  11. #
  12. #    This program is distributed in the hope that it will be useful,
  13. #    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15. #    GNU General Public License for more details.
  16. #
  17. #    You should have received a copy of the GNU General Public License
  18. #    along with this program. If not, see <http://www.gnu.org/licenses/gpl.html>
  19. #
  20. # Think of it as a sort | uniq -c | sort -rn on steroids ;)
  21. #
  22. # TODO: allow <total> to be printed as last line instead of first
  23. # TODO: allow <other> to be printed in its list position instead of last
  24.  
  25. #Defauls:
  26. mincount=0
  27. minperc=0
  28. showtotal=1
  29. showperc=1
  30. showother=1
  31. precision=0
  32. totallabel="Total"
  33. otherlabel="Other"
  34.  
  35. fatal()   { [[ "$1" ]] && echo "$myname: error: $1" >&2 ; exit ${2:-1} ; }
  36. argerr()  { echo "$myname: ${1:-error}" >&2 ; usage 1 ; }
  37. invalid() { argerr "invalid option: $1" ; }
  38. missing() { argerr "missing ${2:+$2 }operand${1:+ from $1}." ; }
  39. integer() { [[ "$1" != *[!0-9]* ]] || argerr "'$1'${2:+ in $2} is not an integer." ; }
  40.  
  41. usage() {
  42.     cat <<- USAGE
  43.     Usage: $myname [options] [FILE...]
  44.     USAGE
  45.     if [[ "$1" ]] ; then
  46.         cat >&2 <<- USAGE
  47.         Try '$myname --help' for more information.
  48.         USAGE
  49.         exit 1
  50.     fi
  51.     cat <<-USAGE
  52.  
  53.     Sort input by count, printing totals and percentages. Think of it as
  54.     sort | uniq -c | sort -rn on steroids.
  55.  
  56.     If FILE is not given, read from standard input. For numeric input
  57.     options, NUM must be a positive integer (digits only). All options
  58.     requiring arguments accept both --option=ARG or --option ARG forms
  59.  
  60.     Options:
  61.       -h|--help            show this page.
  62.  
  63.       --min-count=NUM      only print lines with count >= NUM
  64.       --min-perc=NUM       only print lines with count percent >= NUM%
  65.  
  66.       All lines with count less than any of the above options will be
  67.       grouped together as a single <other> line, printed last by default
  68.  
  69.       --precision=NUM      use NUM decimal digits for the percentages,
  70.                            default $precision
  71.  
  72.       --label-total=LABEL  use LABEL for <total> line, default "$totallabel"
  73.       --label-other=LABEL  use LABEL for <other> line, default "$otherlabel"
  74.  
  75.       --no-perc            do not print percentages
  76.       --no-total           do not print <total> line
  77.       --no-other           do not print <other> line
  78.  
  79.       --total-last         print <total> line last instead of first *
  80.       --sort-other         print <other> line in sorted position *
  81.  
  82.       * (above options not yet implemented)
  83.  
  84.     Examples:
  85.  
  86.     # Group all lines with count = 1 as "Other"
  87.     $myname --min-count=2
  88.  
  89.     # Ignore lines with count < 10%
  90.     $myname --min-perc=10 --no-other
  91.  
  92.     # Behaves (almost*) exactly like sort | uniq -c | sort -nr
  93.     $myname --no-total --no-perc
  94.  
  95.     * (it still pads counts as if total was present)
  96.  
  97.     Copyright (C) 2012 Rodrigo Silva (MestreLion) <linux@rodrigosilva.com>
  98.     License: GPLv3 or later. See <http://www.gnu.org/licenses/gpl.html>
  99.     USAGE
  100.     exit 0
  101. }
  102.  
  103. myname="${0##*/}"
  104. files=()
  105. for arg in "$@"; do [[ "$arg" == "-h" || "$arg" == "--help" ]] && usage ; done
  106. while (( $# )); do
  107.     case "$1" in
  108.     --min-count=*   ) mincount="${1#*=}"             ;;
  109.     --min-perc=*    ) minperc="${1#*=}"              ;;
  110.     --precision=*   ) precision="${1#*=}"            ;;
  111.     --label-total=* ) totallabel="${1#*=}"           ;;
  112.     --label-other=* ) otherlabel="${1#*=}"           ;;
  113.     --min-count     ) shift ; mincount="$1"          ;;
  114.     --min-perc      ) shift ; minperc="$1"           ;;
  115.     --precision     ) shift ; precision="$1"         ;;
  116.     --label-total   ) shift ; totallabel="$1"        ;;
  117.     --label-other   ) shift ; otherlabel="$1"        ;;
  118.     --no-total      ) showtotal=0                    ;;
  119.     --no-perc       ) showperc=0                     ;;
  120.     --no-other      ) showother=0                    ;;
  121.     --              ) shift ; files=( "$@" ) ; break ;;
  122.     -*              ) invalid "$1"                   ;;
  123.     *               ) files+=( "$1" )                ;;
  124.     esac
  125.     shift
  126. done
  127.  
  128. [[ "$totallabel" ]] || missing "--label-total" "LABEL"
  129. [[ "$otherlabel" ]] || missing "--label-other" "LABEL"
  130. [[ "$mincount"   ]] || missing "--min-count"   "NUM"
  131. [[ "$minperc"    ]] || missing "--min-perc"    "NUM"
  132. [[ "$precision"  ]] || missing "--precision"   "NUM"
  133.  
  134. integer "$mincount"  "--min-count"
  135. integer "$minperc"   "--min-perc"
  136. integer "$precision" "--precision"
  137.  
  138. sort "${files[@]}" | uniq -c |
  139. awk -F' ' -v label="$totallabel" '
  140.         {total+=$1; print}
  141.     END {print " ", total, label}' |
  142. sort -nr |
  143. awk -v showtotal=$showtotal -v showperc=$showperc -v showother=$showother \
  144.     -v mincount=$mincount -v minperc=$minperc \
  145.     -v precision=$precision -v label="$otherlabel" -F ' ' --posix '
  146.     function printitem(count, perc, item) {
  147.         printf("%*d %*.*f%% %s\n",cpad,count,ppad,prescision,perc,item)
  148.     }
  149.     BEGIN {
  150.         ppad = 3
  151.         if (precision > 0) ppad += precision+1
  152.     }
  153.     FNR==1 {
  154.         total = $1
  155.         cpad = length($1)+2
  156.     }
  157.     {
  158.         perc = 100*$1/total
  159.         abovemin = (perc >= minperc + 0) && ($1 >= mincount + 0)
  160.         if (abovemin && (FNR>1 || showtotal))
  161.             printitem($1,perc,$2)
  162.         if (!abovemin && showother && FNR>1)
  163.             other += $1
  164.     }
  165.     END {
  166.         if (other > 0) {
  167.             perc = 100*other/total
  168.             printitem(other,perc,label)
  169.         }
  170.     }
  171. '
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement