Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/bash
- #
- # As-is, 100% free, late-night software.
- #
- # DO NOT RUN AS ROOT!
- #
- # Description: By default, tries to download 300 google
- # image search results for search string into folder named 'img'.
- #
- # Requires: tidy and wget
- # apt-get install wget tidy (Ubuntu/Debian)
- # or yum install wget tidy (RedHat/CentOS/Fedora)
- #
- # max downloads
- maxsearches=300
- # download path
- downloadpath="img"
- # tmp dir (gets deleted)
- tmpdir=$(mktemp -d)
- # read your search term
- read -p "enter search term: " search
- search=$(echo $search | tr ' ' '+')
- # make folder for downloads
- mkdir -p $downloadpath || exit 1
- # useragent (google search does not like wget usergent)
- useragent='Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6'
- pushd $downloadpath &>/dev/null || exit 1
- num=0
- while [[ $num -lt $maxsearches ]]; do
- prev=$num
- let num=num+20
- # poor man's progress bar
- echo -en "${prev} through ${num}..."
- # google url for search
- googleurl="https://www.google.com/search?q=${search}&um=1&hl=en&safe=off&start=${num}&sa=N&tbm=isch&sout=1"
- # grab google results
- wget --quiet -U "$useragent" -O ${tmpdir}/google "$googleurl"
- # clean up google output
- tidy -q ${tmpdir}/google 2>/dev/null > ${tmpdir}/google.tidy
- # grab urls
- grep http ${tmpdir}/google.tidy | sed -e 's:amp:\n:g' | grep imgurl | sed -e 's:".*imgurl=::g' -e 's:\&$::g' > ${tmpdir}/urls
- # download all urls
- for url in $(cat ${tmpdir}/urls); do
- wget --timeout=2 --dns-timeout=3 --connect-timeout=2 --read-timeout=3 \
- --quiet -U "$useragent" -A jpg,png,gif "$url"
- # progress bar
- echo -en "."
- done
- # clean up
- echo "done."
- done
- rm -rf ${tmpdir}
- popd &>/dev/null
- exit 0
Add Comment
Please, Sign In to add comment