Guest User

Untitled

a guest
Sep 27th, 2018
771
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.85 KB | None | 0 0
  1. import getpass, imaplib, email, sys
  2.  
  3. class NGramSummer(object): #for tallying total ngram count from someone
  4. def __init__(self, from_person): #constructor constructor... whats your...
  5. self.from_person = from_person #self is tagging with the "who" it's from
  6. self.ngrams = dict()
  7.  
  8. def add_ngrams(self, new_ngrams): #this is where the counting is going on
  9. for word in new_ngrams:
  10. if word in self.ngrams:
  11. self.ngrams[word] += new_ngrams[word]
  12. else:
  13. self.ngrams[word] = new_ngrams[word]
  14.  
  15. def get_ngrams(self):
  16. return self.ngrams
  17.  
  18. # NGramCounter builds a dictionary relating ngrams (as tuples) to the number
  19. # of times that ngram occurs in a text (as integers)
  20. class NGramCounter(object):
  21.  
  22. # parameter n is the 'order' (length) of the desired n-gram
  23. def __init__(self, text):
  24. self.text = text
  25. self.ngrams = dict()
  26.  
  27. # feed method calls tokenize to break the given string up into units
  28. def tokenize(self):
  29. return self.text.split(" ")
  30.  
  31. # feed method takes text, tokenizes it, and visits every group of n tokens
  32. # in turn, adding the group to self.ngrams or incrementing count in same
  33. def parse(self):
  34.  
  35. tokens = self.tokenize()
  36. #Moves through every individual word in the text, increments counter if already found
  37. #else sets count to 1
  38. for word in tokens:
  39. if word in self.ngrams:
  40. self.ngrams[word] += 1
  41. else:
  42. self.ngrams[word] = 1
  43.  
  44. def get_ngrams(self):
  45. return self.ngrams
  46.  
  47. #loading profile for login
  48. M = imaplib.IMAP4_SSL('imap.gmail.com')
  49. M.login("willimite@gmail.com", "PASSWORD")
  50. # M.select("[Gmail]/Sent Mail")
  51. M.select("inbox")
  52.  
  53. def get_first_text_part(msg): #setup to cleanup all text
  54. maintype = msg.get_content_maintype()
  55. if maintype == 'multipart':
  56. for part in msg.get_payload():
  57. if part.get_content_maintype() == 'text':
  58. return part.get_payload()
  59. elif maintype == 'text':
  60. return msg.get_payload()
  61.  
  62. xml_template = "<email><from>{sender}</from><to>{to}</to><date>{date}</date><subject>{subject}</subject><body>{body}</body><ngrams>{ngrams}</ngrams></email>"
  63.  
  64. # my_message = xml_template.format(sender="sushionthego@gmail.com", to="willimite@gmail.com", date="4/13/2012", subject="Did you drink all the milk or did you throw it out b/c Avery is moving in?", body="see subject", ngrams="somengramsgohere")
  65.  
  66. theperson = ["alinjen@bellsouth.net", "ajarnp@gmail.com", "alinjen@me.com", "dlbergman@gmail.com", "donjen@bellsouth.net", "hljen@bellsouth.net", "trixietree@hotmail.com", "wendycantdrive@hotmail.com", "chris.laniosz@gmail.com", "frannie.hall@gmail.com", "ian.oliver@flawlessfuture.com", "jasonaston@gmail.com", "seatubers@gmail.com", "seatubers@gmail.com", "idralcar@hotmail.com", "matthew.d.rader@gmail.com", "rader@matthewrader.com", "mslyssa@gmail.com", "guerrajmichael@gmail.com", "pamela@pamelareed.com", "studio@reedandrader.com", "ryan@letsneverdie.net", "4stepan@gmail.com", "cmae.oliver@gmail.com", "conniemae.olive@gmail.com", "larissa_bemis@yahoo.com", "larissarbemis@gmail.com", "lbemis@apple.com", "sushionthego@gmail.com", "acm466@nyu.edu", "lia.martinez@nyu.edu", "lia@potiondesign.com", "kiwi@smirkyplop.com", "roisin.stack@gmail.com", "ohannamarie@gmail.com", "bryan.baxter@gmail.com", "davidestici@hotmail.com", "genny.hoffman@gmail.com", "sheenamcneal@gmail.com", "daniel.shiffman@gmail.com", "daniel.shiffman@nyu.edu", "dan.osullivan@nyu.edu", "edward.gordon@nyu.edu", "marianne.petit@nyu.edu", "midori.yasuda@nyu.edu", "nh19@nyu.edu", "rob.ryan@nyu.edu", "EFarnon@wsgc.com", "lori@scoreatthetop.com"]
  67.  
  68. for person in theperson:
  69. print "Searching for:", person
  70. type, data = M.search(None, 'FROM', person) #Gets ALL messages
  71. summer = NGramSummer(person) #a NGramSummer object,this will sum all the ngrams together
  72. new = open(person+".xml", 'w')
  73. for num in data[0].split(): #Loops through all messages
  74. yp, data = M.fetch(num, '(RFC822)') #Pulls Message
  75. msg = email.message_from_string(data[0][1]) #Puts message into easy to use python objects
  76. _from = msg['from'] #pull from
  77. _to = msg['to'] #pull to
  78. _subject = msg['subject'] #pull subject
  79. _date = msg['date']
  80. _body = get_first_text_part(msg) #pull body
  81. if _body:
  82. ngrams = NGramCounter(" ".join(_body.strip(">").split()))
  83. ngrams.parse()
  84. _feed = ngrams.get_ngrams()
  85. #print _feed
  86. summer.add_ngrams(_feed)
  87.  
  88. my_message = xml_template.format(sender=_from, to=_to, date=_date, subject=_subject, ngrams=_feed, body=_body)
  89. new.write(my_message)
  90. print my_message
  91.  
  92. # print 'Content-Type:',msg.get_content_type()
  93. # last_message = xml_template.format(sender="_from", to=_to, date=_date, subject=_subject, body=_body, ngrams=_summer.get_ngrams())
  94.  
  95. # new.write('------summmary----------')
  96. # new.write(str(summer.get_ngrams()))
  97. # print '------summmary----------'
  98. print summer.get_ngrams()
  99.  
  100. new.close()
  101. M.close()
  102. M.logout()
Add Comment
Please, Sign In to add comment