Guest User

Untitled

a guest
Nov 17th, 2017
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.19 KB | None | 0 0
  1. #!/usr/bin/env python
  2. # -*- coding:utf-8 -*-
  3.  
  4. from __future__ import division
  5. from operator import itemgetter
  6.  
  7. import re
  8. import pymongo
  9.  
  10.  
  11. def connect_database():
  12. client = pymongo.MongoClient('mongodb://localhost:27017/')
  13. return client.hashes
  14.  
  15.  
  16. def main():
  17. db = connect_database()
  18. collection = db.mail_address
  19. stat_dict = {}
  20.  
  21. mail_address_list = collection.find({})
  22.  
  23. for mail_address in mail_address_list:
  24. mail_address = mail_address['mail']
  25. m = re.match(r'\b[\w.+-]+?@(\w*[-\.\w+?]*\b)', mail_address)
  26.  
  27. try:
  28. mail_address = m.group(1).lower()
  29.  
  30. if not mail_address in stat_dict:
  31. stat_dict[mail_address] = 1
  32. else:
  33. stat_dict[mail_address] = stat_dict[mail_address] + 1
  34. except AttributeError as e:
  35. pass
  36.  
  37. sorted_list = []
  38. vol_amount_all = 0
  39.  
  40. for item in sorted(stat_dict.items(), key=lambda x: x[1], reverse=True):
  41. vol_amount_all = item[1] + vol_amount_all
  42. sorted_list.append(item)
  43.  
  44. for i in sorted_list:
  45. calc = round(i[1] / vol_amount_all * 100, 2)
  46.  
  47. if calc > 0:
  48. print calc, i[0]
  49.  
  50.  
  51. if __name__ == '__main__':
  52. main()
Add Comment
Please, Sign In to add comment