Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def download_item_info():
- errors = open('download_error_log','a+')
- with open('item_number_list_3-5-14.txt','r') as item_list:
- # make the web page directory if it doesn't exist
- path = 'XXXXXXXX_pages'
- try:
- os.mkdir(path)
- except Exception,e:print e
- for item_number in item_list:
- # format item_number strip \r and \n
- item_number = item_number.rstrip("\r\n")
- # create filepath
- fp = os.path.join(path,item_number)
- print "### Current:"+fp
- # Check if file exists if it does skip it? Maybe some diff checking is needed?
- if os.path.isfile(fp):
- print "Skipped:"+fp
- continue
- # try to open the page with urllib2 and read the page
- try:
- f = urllib2.urlopen('http://www.XXXXXXXXX.com/product/?&familyid='+j)
- # Read the entire page and store into data
- data = f.read()
- except Exception,e:
- error = "Error opening/reading page:\n"+str(e)
- print error
- errors.write(error+"\n"+str(fp))
- try:
- with open(fp,'w+') as jpage:
- jpage.write(data)
- print "Got page for:"+str(j)
- except Exception,e:
- print e
- errors.write(str(e)+"\n"+str(fp))
- time.sleep(4)
- errors.close()
Advertisement
Add Comment
Please, Sign In to add comment