Guest User

Untitled

a guest
Aug 26th, 2012
32
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.76 KB | None | 0 0
  1. from bs4 import BeautifulSoup
  2. import sys,os
  3. import httplib2, urllib,time
  4.  
  5. http = httplib2.Http()
  6.  
  7. count = int(sys.argv[1])
  8.  
  9. retries = 0
  10. max_retries = int(sys.argv[2])
  11.  
  12. images_path = "/var/www/4chanbanners/images/"
  13.  
  14. if len(sys.argv) == 3:
  15.     while count > 0 and retries <= 20:
  16.         time.sleep(3)
  17.         status, response = http.request('http://boards.4chan.org/b/')
  18.         soup = BeautifulSoup(response).img["src"]
  19.         filename = soup.split('/')[-1].split('#')[0].split('?')[0]
  20.        
  21.         if os.path.exists(images_path+filename):
  22.             retries+= 1
  23.             print "File " + filename + " already exists!"
  24.         else:
  25.             urllib.urlretrieve("http:"+soup,images_path+filename)
  26.             print "Got " + filename
  27.             retries = 0
  28.             count -= 1
  29. else:
  30.     print "Usage: parse.py IMAGE_COUNT MAX_RETRIES"
Advertisement
Add Comment
Please, Sign In to add comment