Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/bash
- #
- # check
- #
- # pdftotext ain't seem to work in batch
- # so do that separately before this.
- # We take it from Step 2
- # Use spell to write out unusual English words
- # Extract any word that starts capitalized
- # Count those, sort, list the top 15
- #
- # Enter the text file as argument
- # Usage: ./<check> '081618 Toscas Transcript_Redacted.txt'
- echo "$1"
- spell "$1" | sort -u | grep '^[A-Z]' | grep -v \'s > "${1%.*}".lst;
- while IFS= read y;
- do
- number=`grep $y "$1" | wc -l`
- printf '%s %d\n' $y $number
- done < "${1%.*}".lst > "${1%.*}".dat
- sort --key=2n "${1%.*}".dat | tail -15 | tac > "${1%.*}".out
- #
- #
- #
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement