Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #Original Author: http://www.malos-ojos.com/?p=445
- #Much Thanks!
- #Forked from: http://cl.ly/3Jo7
- #updated by gremlin 13 MAR 2013
- #recognizing, no error handling, ungraceful parsing. Have fun!
- import urllib
- import re
- import string
- import csv
- words = []
- mails = []
- counter = 0
- outWriter = csv.writer(open('data.csv', 'wb'), delimiter=',')
- file = open('words.txt', 'r')
- for line in file:
- words.append(line)
- counter += 1
- print "\n"
- print "imported %i word(s)" % counter
- counter = 0
- for word in words:
- o = urllib.FancyURLopener({})
- c = o.open("http://www.mailinator.com/rss.jsp?email=%s" % word)
- s = c.read()
- r = re.findall('item\srdf:about=\"(.*)">', s)
- for item in r:
- n = string.replace(item, 'amp;', '')
- #Experimental View
- #n = string.replace(n, 'displayemail', 'displayemail1')
- x = re.findall('@([^&]*)', n) #remove mail domain ex. '@mailinator.com'
- y = '@' + x[0]
- n = string.replace(n, y, '')
- mails.append(n)
- counter += 1
- print "mined %i e-mail(s)" % counter
- counter = 0
- for item in mails:
- print item
- o = urllib.FancyURLopener({})
- c = o.open(item)
- s = c.read()
- t = re.search('<td\sclass=\"label\">To\:<\/td>\s*<td\sclass=\"data\"\swidth\=\"300px\">(.*)\s*<\/td>', s)
- f = re.search('<td\sclass=\"label\">From\:<\/td>\s*<td\sclass=\"data\">(.*)\s*<\/td>', s)
- j = re.search('<td\sclass=\"label\">Subject\:<\/td>\s*<td\sclass=\"data\">(.*)\s*<\/td>', s)
- if t and f and j:
- outWriter.writerow([t.group(1), f.group(1), j.group(1), item])
- counter += 3
- print "wrote %i pieces of data to csv" % counter
- print "\n"
- file.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement