Advertisement
s3c

Crawl www.samair.ru

s3c
May 19th, 2011
1,880
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 1.83 KB | None | 0 0
  1. #!/bin/bash
  2. #Crawl www.samair.ru/proxy/proxy-$pagenum.htm for proxy list
  3. #http://s3cu14r.wordpress.com/2011/05/19/crawling-websites-using-curl-and-bash/
  4.  
  5. echo -e -n "Crawling www.samair.ru for proxy list \nDoing Page: "
  6.  
  7. rm proxylist.txt &> /dev/null
  8. for pagenum in {1..99}
  9. do
  10.     echo -n "$pagenum "
  11.     newpagenum=`printf "%02d" $pagenum`
  12.     curlresp=`curl http://www.samair.ru/proxy/proxy-$newpagenum.htm 2> /dev/null`
  13.     reppairs=`echo $curlresp | grep -o -E '.=[0-9]'`
  14.     echo $curlresp  | grep -o -E '<tr>[^)]*' | grep -E '([0-9]{1,3}\.){3}[0-9]{1,3}' > tempfile.txt
  15.     mloop=0
  16.     for tempvar in $reppairs
  17.     do
  18.         repchar[mloop]=${tempvar:0:1}
  19.         repnum[mloop]=${tempvar:2:1}
  20.         let "mloop=$mloop+1"
  21.     done
  22.     while read curproxy; do
  23.         curip=`echo $curproxy | grep -o -E '([0-9]{1,3}\.){3}[0-9]{1,3}'`
  24.         curport=`echo $curproxy | grep -o -E '\+.+'`
  25.         obfsconly=`echo $curport | sed 's/+//g'`
  26.         for tempvar in {0..9}
  27.         do
  28.             obfsconly=`echo $obfsconly | sed "s/${repchar[tempvar]}/${repnum[tempvar]}/g"`
  29.         done
  30.         echo $curip:$obfsconly >> proxylist.txt
  31.         let "pagecount=$pagecount+1"
  32.     done < tempfile.txt
  33.     rm tempfile.txt
  34. done
  35.  
  36. sort proxylist.txt > proxylist1.txt
  37. uniq -u proxylist1.txt > proxylist2.txt
  38.  
  39. rm proxylist.txt
  40. rm proxylist1.txt
  41. mv proxylist2.txt proxylist.txt
  42.  
  43. echo -e "\nCreated proxylist.txt"
  44.  
  45. rm proxylist_active.txt &> /dev/null
  46. for ip in `cat proxylist.txt`
  47. do
  48.     curlresponse=`curl -m 5 -x $ip google.com 2> /dev/null`
  49.     googlepresent=`echo $curlresponse | grep google.com`
  50.     if [ -n "$googlepresent" ]; then
  51.         echo -e "\e[00;32mActive: $ip \e[00m"
  52.         echo $ip >> proxylist_active.txt
  53.     else
  54.         echo -e "\e[00;31mInactive: $ip \e[00m"
  55.     fi
  56. done
  57.  
  58. echo "Created proxylist_active.txt"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement