jmunsch

get_item_pages

Mar 8th, 2014
98
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.22 KB | None | 0 0
  1. import time, os, random
  2. import httplib
  3. import urllib2
  4. httplib.HTTPConnection.debuglevel = 1
  5.  
  6. # create the html page directory if it doesn't exist
  7. folder = 'item_number_pages'
  8.  
  9. try:
  10.     os.mkdir(folder)
  11. except Exception,e:print e
  12.  
  13. list = []
  14. # create a list of item numbers from manifest
  15. with open('XXX_item_numbers','r') as the_file:
  16.         for line in the_file:
  17.                 for characters in line.split():
  18.                         # append characters 1 thru 6 ... 4XX123
  19.                         list.append(characters[1:6])
  20.  
  21. # from the unique set of list create the filepath(fp) and create the request
  22. for fnames in set(list):
  23.     fp = os.path.join(folder,fnames)
  24.     print "Trying:"+fp+"\n"
  25.     try:
  26.         request = urllib2.Request('http://www.XXXXXXXXXXX.com/product/'+fnames.upper()+'.uts')
  27.         # add custom header to identify user
  28.         request.add_header('User-Agent','jmunsch_thnx_v2.0 +http://jamesmunsch.com/')
  29.         opener = urllib2.build_opener()
  30.         data = opener.open(request).read()
  31.         with open(fp,'w+') as f:
  32.            f.write(data)
  33.         # sleep a random amount of time (1 + (random range from 0.00 to 1.00))
  34.         time.sleep(1+random.random())
  35.     except Exception,e:print e
Advertisement
Add Comment
Please, Sign In to add comment