Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/bash
- #
- # toprated - sort input by count, showing totals and percentages
- #
- # Copyright (C) 2012 Rodrigo Silva (MestreLion) <[email protected]>
- #
- # This program is free software: you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation, either version 3 of the License, or
- # (at your option) any later version.
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License
- # along with this program. If not, see <http://www.gnu.org/licenses/gpl.html>
- #
- # Think of it as a sort | uniq -c | sort -rn on steroids ;)
- #
- # TODO: allow <total> to be printed as last line instead of first
- # TODO: allow <other> to be printed in its list position instead of last
- #Defauls:
- mincount=0
- minperc=0
- showtotal=1
- showperc=1
- showother=1
- precision=0
- totallabel="Total"
- otherlabel="Other"
- fatal() { [[ "$1" ]] && echo "$myname: error: $1" >&2 ; exit ${2:-1} ; }
- argerr() { echo "$myname: ${1:-error}" >&2 ; usage 1 ; }
- invalid() { argerr "invalid option: $1" ; }
- missing() { argerr "missing ${2:+$2 }operand${1:+ from $1}." ; }
- integer() { [[ "$1" != *[!0-9]* ]] || argerr "'$1'${2:+ in $2} is not an integer." ; }
- usage() {
- cat <<- USAGE
- Usage: $myname [options] [FILE...]
- USAGE
- if [[ "$1" ]] ; then
- cat >&2 <<- USAGE
- Try '$myname --help' for more information.
- USAGE
- exit 1
- fi
- cat <<-USAGE
- Sort input by count, printing totals and percentages. Think of it as
- sort | uniq -c | sort -rn on steroids.
- If FILE is not given, read from standard input. For numeric input
- options, NUM must be a positive integer (digits only). All options
- requiring arguments accept both --option=ARG or --option ARG forms
- Options:
- -h|--help show this page.
- --min-count=NUM only print lines with count >= NUM
- --min-perc=NUM only print lines with count percent >= NUM%
- All lines with count less than any of the above options will be
- grouped together as a single <other> line, printed last by default
- --precision=NUM use NUM decimal digits for the percentages,
- default $precision
- --label-total=LABEL use LABEL for <total> line, default "$totallabel"
- --label-other=LABEL use LABEL for <other> line, default "$otherlabel"
- --no-perc do not print percentages
- --no-total do not print <total> line
- --no-other do not print <other> line
- --total-last print <total> line last instead of first *
- --sort-other print <other> line in sorted position *
- * (above options not yet implemented)
- Examples:
- # Group all lines with count = 1 as "Other"
- $myname --min-count=2
- # Ignore lines with count < 10%
- $myname --min-perc=10 --no-other
- # Behaves (almost*) exactly like sort | uniq -c | sort -nr
- $myname --no-total --no-perc
- * (it still pads counts as if total was present)
- Copyright (C) 2012 Rodrigo Silva (MestreLion) <linux@rodrigosilva.com>
- License: GPLv3 or later. See <http://www.gnu.org/licenses/gpl.html>
- USAGE
- exit 0
- }
- myname="${0##*/}"
- files=()
- for arg in "$@"; do [[ "$arg" == "-h" || "$arg" == "--help" ]] && usage ; done
- while (( $# )); do
- case "$1" in
- --min-count=* ) mincount="${1#*=}" ;;
- --min-perc=* ) minperc="${1#*=}" ;;
- --precision=* ) precision="${1#*=}" ;;
- --label-total=* ) totallabel="${1#*=}" ;;
- --label-other=* ) otherlabel="${1#*=}" ;;
- --min-count ) shift ; mincount="$1" ;;
- --min-perc ) shift ; minperc="$1" ;;
- --precision ) shift ; precision="$1" ;;
- --label-total ) shift ; totallabel="$1" ;;
- --label-other ) shift ; otherlabel="$1" ;;
- --no-total ) showtotal=0 ;;
- --no-perc ) showperc=0 ;;
- --no-other ) showother=0 ;;
- -- ) shift ; files=( "$@" ) ; break ;;
- -* ) invalid "$1" ;;
- * ) files+=( "$1" ) ;;
- esac
- shift
- done
- [[ "$totallabel" ]] || missing "--label-total" "LABEL"
- [[ "$otherlabel" ]] || missing "--label-other" "LABEL"
- [[ "$mincount" ]] || missing "--min-count" "NUM"
- [[ "$minperc" ]] || missing "--min-perc" "NUM"
- [[ "$precision" ]] || missing "--precision" "NUM"
- integer "$mincount" "--min-count"
- integer "$minperc" "--min-perc"
- integer "$precision" "--precision"
- sort "${files[@]}" | uniq -c |
- awk -F' ' -v label="$totallabel" '
- {total+=$1; print}
- END {print " ", total, label}' |
- sort -nr |
- awk -v showtotal=$showtotal -v showperc=$showperc -v showother=$showother \
- -v mincount=$mincount -v minperc=$minperc \
- -v precision=$precision -v label="$otherlabel" -F ' ' --posix '
- function printitem(count, perc, item) {
- printf("%*d %*.*f%% %s\n",cpad,count,ppad,prescision,perc,item)
- }
- BEGIN {
- ppad = 3
- if (precision > 0) ppad += precision+1
- }
- FNR==1 {
- total = $1
- cpad = length($1)+2
- }
- {
- perc = 100*$1/total
- abovemin = (perc >= minperc + 0) && ($1 >= mincount + 0)
- if (abovemin && (FNR>1 || showtotal))
- printitem($1,perc,$2)
- if (!abovemin && showother && FNR>1)
- other += $1
- }
- END {
- if (other > 0) {
- perc = 100*other/total
- printitem(other,perc,label)
- }
- }
- '
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement