Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from models.py import Email
- import getpass, imaplib, email, sys
- from django.http import HttpResponse
- def index(request):
- certain_file = open("lists/list_certain.txt", "r")
- negation_file = open("lists/list_negation.txt", "r")
- warm_file = open("lists/list_warm.txt", "r")
- cold_file = open("lists/list_cold.txt", "r")
- firstPOV_file = open("lists/list_1POV.txt", "r")
- secondPOV_file = open("lists/list_2POV.txt", "r")
- thirdPOV_file = open("lists/list_3POV.txt", "r")
- certain_list = [word.strip() for word in certain_file.readlines()]
- negation_list = [word.strip() for word in negation_file.readlines()]
- warm_list = [word.strip() for word in warm_file.readlines()]
- cold_list = [word.strip() for word in cold_file.readlines()]
- firstPOV_list = [word.strip() for word in firstPOV_file.readlines()]
- secondPOV_list = [word.strip() for word in secondPOV_file.readlines()]
- thirdPOV_list = [word.strip() for word in thirdPOV_file.readlines()]
- class NGramSummer(object): #for tallying total ngram count from someone
- def __init__(self, from_person): #constructor constructor... whats your...
- self.from_person = from_person #self is tagging with the "who" it's from
- self.ngrams = dict()
- def add_ngrams(self, new_ngrams): #this is where the counting is going on
- for word in new_ngrams:
- if word in self.ngrams:
- self.ngrams[word] += new_ngrams[word]
- else:
- self.ngrams[word] = new_ngrams[word]
- def get_ngrams(self):
- return self.ngrams
- # NGramCounter builds a dictionary relating ngrams (as tuples) to the number
- # of times that ngram occurs in a text (as integers)
- class NGramCounter(object):
- # parameter n is the 'order' (length) of the desired n-gram
- def __init__(self, text):
- self.text = text
- self.ngrams = dict()
- # feed method calls tokenize to break the given string up into units
- def tokenize(self):
- return self.text.split(" ")
- # feed method takes text, tokenizes it, and visits every group of n tokens
- # in turn, adding the group to self.ngrams or incrementing count in same
- def parse(self):
- tokens = self.tokenize()
- #Moves through every individual word in the text, increments counter if already found
- #else sets count to 1
- for word in tokens:
- if word in self.ngrams:
- self.ngrams[word] += 1
- else:
- self.ngrams[word] = 1
- def get_ngrams(self):
- return self.ngrams
- #loading profile for login
- M = imaplib.IMAP4_SSL('imap.gmail.com')
- M.login("wilo@gmail.com", "pass")
- # M.select("[Gmail]/Sent Mail")
- M.select("[Gmail]/All Mail")
- def get_first_text_part(msg): #setup to cleanup all text
- maintype = msg.get_content_maintype()
- if maintype == 'multipart':
- for part in msg.get_payload():
- if part.get_content_maintype() == 'text':
- return part.get_payload()
- elif maintype == 'text':
- return msg.get_payload()
- xml_template = "<email><from>{sender}</from><to>{to}</to><date>{date}</date><subject>{subject}</subject><body>{body}</body><certain>{certain}</certain><negation>{negation}</negation><warm>{warm}</warm><cold>{cold}</cold><firstPOV>{firstPOV}</firstPOV><secondPOV>{secondPOV}</secondPOV><thirdPOV>{thirdPOV}</thirdPOV><ngrams>{ngrams}</ngrams></email>"
- person_summary = "<Summary><ngramSUM>{summery}</ngramSUM></Summary>"
- # my_message = xml_template.format(sender="sushionthego@gmail.com", to="willimite@gmail.com", date="4/13/2012", subject="Did you drink all the milk or did you throw it out b/c Avery is moving in?", body="see subject", ngrams="somengramsgohere")
- theperson = ["alinjen@bellsouth.net", "ajarnp@gmail.com", "alinjen@me.com", "dlbergman@gmail.com", "donjen@bellsouth.net", "hljen@bellsouth.net", "trixietree@hotmail.com", "wendycantdrive@hotmail.com", "avery@averymax.com", "chris.laniosz@gmail.com", "frannie.hall@gmail.com", "ian.oliver@flawlessfuture.com", "jasonaston@gmail.com", "seatubers@gmail.com", "seatubers@gmail.com", "idralcar@hotmail.com", "matthew.d.rader@gmail.com", "rader@matthewrader.com", "mslyssa@gmail.com", "guerrajmichael@gmail.com", "pamela@pamelareed.com", "justkeepgoing@excite.com", "studio@reedandrader.com", "ryan@letsneverdie.net", "4stepan@gmail.com", "cmae.oliver@gmail.com", "conniemae.olive@gmail.com", "larissa_bemis@yahoo.com", "larissarbemis@gmail.com", "lbemis@apple.com", "sushionthego@gmail.com", "acm466@nyu.edu", "lia.martinez@nyu.edu", "lia@potiondesign.com", "kiwi@smirkyplop.com", "roisin.stack@gmail.com", "ohannamarie@gmail.com", "bryan.baxter@gmail.com", "davidestici@hotmail.com", "genny.hoffman@gmail.com", "sheenamcneal@gmail.com", "daniel.shiffman@gmail.com", "daniel.shiffman@nyu.edu", "dan.osullivan@nyu.edu", "edward.gordon@nyu.edu", "marianne.petit@nyu.edu", "midori.yasuda@nyu.edu", "nh19@nyu.edu", "rob.ryan@nyu.edu", "EFarnon@wsgc.com", "lori@scoreatthetop.com"]
- for person in theperson:
- print "Searching for:", repr(person)
- type, data = M.search(None, 'FROM', person) #Gets ALL messages
- summer = NGramSummer(person) #a NGramSummer object,this will sum all the ngrams together
- new = open(person+"_from.xml", 'w')
- for num in data[0].split(): #Loops through all messages
- yp, data = M.fetch(num, '(RFC822)') #Pulls Message
- msg = email.message_from_string(data[0][1]) #Puts message into easy to use python objects
- _from = msg['from'] #pull from
- _to = msg['to'] #pull to
- _subject = msg['subject'] #pull subject
- _date = msg['date']
- _body = get_first_text_part(msg) #pull body
- certain_sum = 0
- negation_sum = 0
- warm_sum = 0
- cold_sum = 0
- firstPOV_sum = 0
- secondPOV_sum = 0
- thirdPOV_sum = 0
- for word in _body.split(' '):
- word = word.lower()
- if word in certain_list:
- certain_sum += 1
- if word in negation_list:
- negation_sum += 1
- if word in warm_list:
- warm_sum += 1
- if word in cold_list:
- cold_sum += 1
- if word in firstPOV_list:
- firstPOV_sum += 1
- if word in secondPOV_list:
- secondPOV_sum += 1
- if word in thirdPOV_list:
- thirdPOV_sum += 1
- if _body:
- ngrams = NGramCounter(" ".join(_body.strip(">").split()))
- ngrams.parse()
- _feed = ngrams.get_ngrams()
- summer.add_ngrams(_feed)
- email_obj = Email()
- email_obj.from = _from
- email_obj.to = _to
- email_obj.subject = _subject
- email_obj.date = _date
- email_obj.body = _body
- email_obj.certain = certain_sum
- email_obj.negation = negation_list
- email_obj.warm = warm_sum
- email_obj.cold = cold_sum
- email_obj.firstPOV = firstPOV_sum
- email_obj.secondPOV = secondPOV_sum
- email_obj.thirdPOV = thirdPOV_sum
- email_obj.save()
- #print _feed
- my_message = xml_template.format(sender=_from, to=_to, date=_date, subject=_subject, ngrams=_feed, body=_body, certain=certain_sum, negation=negation_sum, warm=warm_sum, cold=cold_sum, firstPOV=firstPOV_sum, secondPOV=secondPOV_sum, thirdPOV=thirdPOV_sum)
- new.write(my_message)
- print repr(my_message)
- # print 'Content-Type:',msg.get_content_type()
- # last_message = xml_template.format(sender="_from", to=_to, date=_date, subject=_subject, body=_body, ngrams=_summer.get_ngrams())
- # new.write('------summmary----------')
- # new.write(str(summer.get_ngrams()))
- # print '------summmary----------'
- the_summary = summer.get_ngrams()
- sum_persons = person_summary.format(summery=the_summary)
- new.write(sum_persons)
- print repr(summer.get_ngrams())
- new.close()
- M.close()
- M.logout()
- return HttpResponse("Hello, world. You're at the mailerz index")
Add Comment
Please, Sign In to add comment