oyouareatubeo

tlist.sh

Jul 19th, 2012
30
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 1.29 KB | None | 0 0
  1. #!/bin/bash
  2. #############################################################
  3. # This script searches twitter for a given search term,    
  4. # and parses the results into a proper wordlist thing.      
  5. # USAGE: ./tlist.sh {keyword}                              
  6. # EXAMPLE: counterculture.txt, http://pastebin.com/XuCwMJW6
  7. #          cyberculture.txt, http://pastebin.com/Le2G44yf  
  8. # EXTERNALS: stop-words.txt, http://pastebin.com/ihkVeWeX
  9. #############################################################
  10.  
  11. if [ -z "$1" ];
  12. then
  13.     echo -e "\n::::::\n\ntlist creates a list of words, one word per line.\nUse a keyword as a seed to your wordlist.\n\nUSAGE :: tlist {search-term}\n\n::::::\n"
  14.     exit 0
  15. fi
  16.  
  17. ### THE MEAT:: ###
  18. curl -s "http://search.twitter.com/search.json?q=$1&rpp=500" |  # Ask Twitter for words!
  19. tr "," \\n |        # Parse JSON into 1 key-value pair per line.
  20. grep "^\"text" |    # Extract tweet text only.
  21. cut -d"\"" -f4- |   # Remove JSON Key, from each line.
  22. tr " " \\n |        # Put every word on it's own line.
  23. sed s/\"//g |       # Remove all double quotes.
  24. sed s/\^\#//g |     # Remove hashtags.
  25. sed s/\^\@//g |     # Remove mentions text.
  26. grep -v "^http:" |  # Don't include url's.
  27. grep -v "\\\\" |        # Remove back slashes.
  28. tr -cs A-Za-z '\n' |
  29. tr A-Z a-z |
  30. egrep -v -f /usr/local/lib/stop-words.txt
Advertisement
Add Comment
Please, Sign In to add comment