Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import time, os, random
- import httplib
- import urllib2
- httplib.HTTPConnection.debuglevel = 1
- # create the html page directory if it doesn't exist
- folder = 'item_number_pages'
- try:
- os.mkdir(folder)
- except Exception,e:print e
- list = []
- # create a list of item numbers from manifest
- with open('XXX_item_numbers','r') as the_file:
- for line in the_file:
- for characters in line.split():
- # append characters 1 thru 6 ... 4XX123
- list.append(characters[1:6])
- # from the unique set of list create the filepath(fp) and create the request
- for fnames in set(list):
- fp = os.path.join(folder,fnames)
- print "Trying:"+fp+"\n"
- try:
- request = urllib2.Request('http://www.XXXXXXXXXXX.com/product/'+fnames.upper()+'.uts')
- # add custom header to identify user
- request.add_header('User-Agent','jmunsch_thnx_v2.0 +http://jamesmunsch.com/')
- opener = urllib2.build_opener()
- data = opener.open(request).read()
- with open(fp,'w+') as f:
- f.write(data)
- # sleep a random amount of time (1 + (random range from 0.00 to 1.00))
- time.sleep(1+random.random())
- except Exception,e:print e
Advertisement
Add Comment
Please, Sign In to add comment