Advertisement
Guest User

Untitled

a guest
Mar 27th, 2018
796
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.52 KB | None | 0 0
  1. import smtplib
  2. def mail(message):
  3. gmail_user = 'jesper.holmstrom92@gmail.com'
  4. gmail_password = 'Jesperhol12!'
  5.  
  6.  
  7. server = smtplib.SMTP('smtp.gmail.com', 587)
  8. server.starttls()
  9. server.login("jesper.holmstrom92@gmail.com", "Jesperhol12")
  10. server.sendmail("jesper.holmstrom92@gmail.com", "bry.holmstrom@gmail.com", message)
  11. server.quit()
  12.  
  13. import xlwt
  14. import ast
  15. import time
  16. import re
  17. import os
  18. import bitlydatahandler
  19. import Queue
  20. import collections
  21. from collections import defaultdict
  22.  
  23.  
  24. newsEnding24h = re.compile('.*news24h.txt$')
  25.  
  26. newsEnding1h = re.compile('.*news01h\.txt$')
  27. path = ""
  28.  
  29. def findNewsFiles(path):
  30. file_paths = []
  31. for file_name in os.listdir(path):
  32. if (newsEnding1h.match(file_name) or newsEnding24h.match(file_name)):
  33. file_path = path + file_name
  34. file_paths.append(file_path)
  35. return file_paths
  36.  
  37.  
  38. def findUpdateFiles(path, origin):
  39. file_paths = Queue.PriorityQueue()
  40. timestampAndNews = origin[(len(origin) - 29):(len(origin) - 4)]
  41. updateForm = re.compile(timestampAndNews + '[_]{1}.*')
  42. for file_name in os.listdir(path):
  43. if (updateForm.match(file_name)):
  44. file_path = path + file_name
  45. file_paths.put(file_path, int(file_name[(len(file_name) - 11):(len(file_name) - 5)]))
  46. return file_paths
  47. urlDict = defaultdict(dict)
  48.  
  49.  
  50.  
  51. def main():
  52. classPath = "./data/news/classifications.txt"
  53. with open(classPath, 'r') as classFile:
  54. classData = classFile.read().split('\n')
  55. dictionaryOfURLandClass = {}
  56. for line in classData:
  57. if line != '':
  58. dictionaryOfURLandClass[line.split(';')[2]] = line.split(';')[1]
  59. for item in dictionaryOfURLandClass:
  60. print item
  61. paths = findNewsFiles("./data/")
  62. book = xlwt.Workbook(encoding="utf-8")
  63. sheet1 = book.add_sheet("Clicks history real")
  64. sheet2 = book.add_sheet("Clicks history fake")
  65. rowReal = 0
  66. rowFake = 0
  67. sheet1.write(rowReal, 0, "URL")
  68. sheet1.write(rowReal, 1, "Class")
  69. sheet1.write(rowReal, 2, "followers")
  70. sheet1.write(rowReal, 3, u'Antal click vid tiden: ')
  71. for path in paths:
  72. colTime = 2
  73. with open(path, 'r') as file:
  74. data = file.read().split('\n')
  75. print("path: " + path)
  76. timestamp =path[7:len(path) - 12]
  77. timestamp=timestamp.replace("-","")
  78. timestamp=timestamp.replace("_","")
  79.  
  80.  
  81. rowReal = rowReal + 1
  82. counterReal = 0
  83. print "File " + path + " is start file no. %d" % (paths.index(path) + 1)
  84.  
  85. for line in data[:-1]:
  86. clicks =[]
  87. if line != '':
  88. dataDict = ast.literal_eval(line)
  89. url = str(dataDict["long_url"])
  90. clicks= dataDict["global_clicks"]
  91. urlDict[url][timestamp]= clicks
  92.  
  93. updatePaths = findUpdateFiles("./data/", path)
  94. updateArray = []
  95. while updatePaths.empty() is False:
  96. updatePath = updatePaths.get()
  97. #print("updatepath: " + updatePath)
  98. updatePath1=(updatePath[len(updatePath) - 22:len(updatePath) - 5])
  99. updatePath1=updatePath1.replace("-","")
  100. updatePath1=updatePath1.replace("_","")
  101.  
  102. with open(updatePath, 'r') as file:
  103. updateArray = ast.literal_eval(file.read())
  104. for sampleUpdate in updateArray:
  105. clicks = sampleUpdate["global_clicks"]
  106. url = sampleUpdate["long_url"]
  107. urlDict[url][updatePath1]=clicks
  108.  
  109. for url in urlDict:
  110. #print url
  111. #print type(urlDict[url])
  112. #print urlDict[url]
  113. #urlDict[url] = sorted(urlDict[url])
  114. urlDict[url] = collections.OrderedDict(sorted(urlDict[url].items()))
  115. print len(urlDict[url])
  116. #for url in sorted(sak):
  117. #print()
  118. #print "%s: %s" % (url, sak[url])
  119. #urlDict[url]= urlDict[url]
  120. #od=collections.OrderedDict(sorted(urlDict[url].items()))
  121. #od=collections.OrderedDict(sorted(urlDict.items()))
  122. #od = {}
  123. #for key in sorted
  124. row = 1
  125. col = 1
  126. for url in urlDict:
  127. try:
  128. classification = dictionaryOfURLandClass[url]
  129. sheet1.write(row,0,url)
  130. sheet1.write(row,1, classification)
  131. except KeyError as e:
  132. print "fel"
  133.  
  134. row+=1
  135. print urlDict
  136. #print(od)
  137. book.save("ResultsBitly.xls")
  138. print "Done updating, done saving excel."
  139.  
  140.  
  141. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement