Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/bash
- # Adapted from http://automateeverything.tumblr.com/post/22700447516/blekko-keyword-suggest-scraper
- inp=input.txt
- out=output.txt
- tmp=tmp.txt
- echo "" > $tmp
- echo "" > $out
- while read p; do
- q=$(echo "$p" | sed 's/ /%20/g')
- random=( $(echo '' {a..z} {0..9} | tr " " "\n" | shuf | tr -d " " ) )
- for suffix in '' "${random[@]}"
- do
- echo $p $suffix
- echo ---GOOGLE AUSTRALIA: $p $suffix >> $tmp
- curl -s "http://clients1.google.com.au/complete/search?hl=en&q=$q%20$suffix&client=hp" | sed 's/\\u003Cb\\u003E//g;s/\\u003C\\\/b\\u003E//g;s/\[/\n\[/g' | cut -d'"' -f2 | tail -n +4 >> $tmp
- echo ---AMAZON: $p $suffix >> $tmp
- curl -s "http://t1-completion.amazon.com/search/complete?method=completion&q=$q%20$suffix&search-alias=aps&client=amazon-search-ui&mkt=1&x=updateISSCompletion&sc=1" | sed 's/,\[{".*//g;s/,/\n/g' | cut -d'"' -f2 | grep -v '\[\|\]\|\{\|\]' | tail -n +3 >> $tmp
- echo ---YAHOO PREFIX: $p $suffix >> $tmp
- curl -s "http://sugg.us.search.yahoo.net/gossip-us-ura?droprotated=1&output=sd1&command=$q%20$suffix&nresults=10" | sed 's/{/\n{/g' | grep '"k"' | cut -d'"' -f4 >> $tmp
- echo ---YAHOO SUFFIX: $p $suffix >> $tmp
- # this way uses our keyword as a suffix rather than prefix. usually there are some duplicates with the first method but those are removed later.
- curl -s "http://sugg.us.search.yahoo.net/gossip-us-ura?droprotated=0&output=sd1&command=$q%20$suffix&nresults=10" | sed 's/{/\n{/g' | grep '"k"' | cut -d'"' -f4 >> $tmp
- echo ---BING: $p $suffix >> $tmp
- curl -s "http://api.bing.com/qsonhs.aspx?FORM=ASAPIW&mkt=en-US&type=cb&cb=sa_inst.apiCB&q=$q%20$suffix&cp=13&bq=$q" | sed 's/{/\n{/g' | grep '"Txt"' | cut -d'"' -f4 >> $tmp
- echo ---BLEKKO: $p $suffix >> $tmp
- curl -s "http://blekko.com/autocomplete?query=$q%20$suffix" | sed 's/.*\[//g;s/\].*//g;s/","/\n/g;s/"//g' >> $tmp
- echo >> $tmp
- echo ---THEFIND: $p $suffix >> $tmp
- curl -s "https://www.thefind.com/search/suggest.js?spellcheck=0?&q=$q+$suffix&_output=js" | sed 's/.*\["//g;s/"\].*//g;s/","/\n/g' >> $tmp
- echo >> $tmp
- echo "-----------" >> $tmp
- sleep 1s
- done
- done < $inp
- export LC_ALL=C
- sed '/^$/d' $tmp | grep -v "\-\-\-" | sort | uniq > $out
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement