Advertisement
Guest User

Untitled

a guest
Apr 3rd, 2018
1,238
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.34 KB | None | 0 0
  1. import smtplib
  2. def mail(message):
  3. gmail_user = 'jesper.holmstrom92@gmail.com'
  4. gmail_password = 'Jesperhol12!'
  5.  
  6.  
  7. server = smtplib.SMTP('smtp.gmail.com', 587)
  8. server.starttls()
  9. server.login("jesper.holmstrom92@gmail.com", "Jesperhol12")
  10. server.sendmail("jesper.holmstrom92@gmail.com", "bry.holmstrom@gmail.com", message)
  11. server.quit()
  12.  
  13. import xlwt
  14. import ast
  15. import time
  16. import re
  17. import os
  18. import bitlydatahandler
  19. import Queue
  20. import collections
  21. from collections import defaultdict
  22.  
  23.  
  24. newsEnding24h = re.compile('.*news24h.txt$')
  25.  
  26. newsEnding1h = re.compile('.*news01h\.txt$')
  27. path = ""
  28.  
  29. def findNewsFiles(path):
  30. file_paths = []
  31. for file_name in os.listdir(path):
  32. if (newsEnding1h.match(file_name) or newsEnding24h.match(file_name)):
  33. file_path = path + file_name
  34. file_paths.append(file_path)
  35. return file_paths
  36.  
  37.  
  38. def findUpdateFiles(path, origin):
  39. file_paths = Queue.PriorityQueue()
  40. timestampAndNews = origin[(len(origin) - 29):(len(origin) - 4)]
  41. updateForm = re.compile(timestampAndNews + '[_]{1}.*')
  42. for file_name in os.listdir(path):
  43. if (updateForm.match(file_name)):
  44. file_path = path + file_name
  45. file_paths.put(file_path, int(file_name[(len(file_name) - 11):(len(file_name) - 5)]))
  46. return file_paths
  47. urlDict = defaultdict(dict)
  48.  
  49.  
  50.  
  51. def main():
  52. classPath = "./data/news/classifications.txt"
  53. with open(classPath, 'r') as classFile:
  54. classData = classFile.read().split('\n')
  55. dictionaryOfURLandClass = {}
  56. for line in classData:
  57. if line != '':
  58. dictionaryOfURLandClass[line.split(';')[2]] = line.split(';')[1]
  59.  
  60. paths = findNewsFiles("./data/")
  61. book = xlwt.Workbook(encoding="utf-8")
  62. sheet1 = book.add_sheet("Clicks history real")
  63. initrow = 0
  64. sheet1.write(initrow, 0, "URL")
  65. sheet1.write(initrow, 1, "Class")
  66. sheet1.write(initrow, 2, "Followers")
  67. sheet1.write(initrow, 3, "Retweets")
  68. sheet1.write(initrow, 4,"Startdate")
  69. for i in range(5, 66):
  70. sheet1.write(initrow, i, "T= " + str((i-5)*2))
  71.  
  72. for path in paths:
  73. with open(path, 'r') as file:
  74. data = file.read().split('\n')
  75. #print("path: " + path)
  76. timestamp =path[7:len(path) - 12]
  77. timestamp=timestamp.replace("-","")
  78. timestamp=timestamp.replace("_","")
  79. initrow = initrow + 1
  80.  
  81. #print "File " + path + " is start file no. %d" % (paths.index(path) + 1)
  82. for line in data[:-1]:
  83. if line != '':
  84. dataDict = ast.literal_eval(line)
  85. url = str(dataDict["long_url"])
  86. clicks= dataDict["global_clicks"]
  87. followers = dataDict["followers"]
  88. urlDict[url]['followers'] = followers
  89. retweets = dataDict["retweets"]
  90. urlDict[url]['retweets'] = retweets
  91. urlDict[url][timestamp]= clicks
  92.  
  93. updatePaths = findUpdateFiles("./data/", path)
  94. updateArray = []
  95. while updatePaths.empty() is False:
  96. updatePath = updatePaths.get()
  97. updatePath1=(updatePath[len(updatePath) - 22:len(updatePath) - 5])
  98. updatePath1=updatePath1.replace("-","")
  99. updatePath1=updatePath1.replace("_","")
  100.  
  101. with open(updatePath, 'r') as file:
  102. updateArray = ast.literal_eval(file.read())
  103. for sampleUpdate in updateArray:
  104. clicks = sampleUpdate["global_clicks"]
  105. url = sampleUpdate["long_url"]
  106. urlDict[url][updatePath1]=clicks
  107.  
  108. for url in urlDict:
  109. urlDict[url] = collections.OrderedDict(sorted(urlDict[url].items()))
  110.  
  111. row = 1
  112. for url in urlDict:
  113. firstRound = True
  114. startDate = True
  115. try:
  116. classification = dictionaryOfURLandClass[url]
  117. sheet1.write(row,0, url)
  118. sheet1.write(row,1, classification)
  119. sheet1.write(row,2, urlDict[url]['followers'])
  120. sheet1.write(row, 3, urlDict[url]['retweets'])
  121. col = 5
  122. print(len(urlDict[url]))
  123. for timestamp in urlDict[url]:
  124. if(startDate):
  125. sheet1.write(row,4,timestamp)
  126. startDate= False
  127. if(timestamp!="followers" and timestamp!="retweets"):
  128. if(len(urlDict[url])==14 and col >11):
  129.  
  130. if(firstRound):
  131. for i in range(1, 6):
  132. sheet1.write(row, col, "-")
  133. col += 1
  134. sheet1.write(row, col, urlDict[url][timestamp])
  135. firstRound = False
  136.  
  137. else:
  138. for i in range(1, 12):
  139. sheet1.write(row, col, "-")
  140. col += 1
  141. sheet1.write(row, col, urlDict[url][timestamp])
  142. else:
  143. sheet1.write(row, col, urlDict[url][timestamp])
  144. col +=1
  145. except KeyError as e:
  146. print "fel"
  147. row+=1
  148. #print urlDict
  149. #print(od)
  150. book.save("ResultsBitly.xls")
  151. print "Done updating, done saving excel."
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement