Advertisement
Guest User

Untitled

a guest
Apr 6th, 2018
301
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.80 KB | None | 0 0
  1. import smtplib
  2. def mail(message):
  3. gmail_user = 'jesper.holmstrom92@gmail.com'
  4. gmail_password = 'Jesperhol12!'
  5.  
  6.  
  7. server = smtplib.SMTP('smtp.gmail.com', 587)
  8. server.starttls()
  9. server.login("jesper.holmstrom92@gmail.com", "Jesperhol12")
  10. server.sendmail("jesper.holmstrom92@gmail.com", "bry.holmstrom@gmail.com", message)
  11. server.quit()
  12.  
  13. import xlwt
  14. import ast
  15. import time
  16. import re
  17. import os
  18. import bitlydatahandler
  19. import Queue
  20. import collections
  21. from collections import defaultdict
  22.  
  23.  
  24. newsEnding24h = re.compile('.*news24h.txt$')
  25.  
  26. newsEnding1h = re.compile('.*news01h\.txt$')
  27. path = ""
  28.  
  29. def findNewsFiles(path):
  30. file_paths = []
  31. for file_name in os.listdir(path):
  32. if (newsEnding1h.match(file_name) or newsEnding24h.match(file_name)):
  33. file_path = path + file_name
  34. file_paths.append(file_path)
  35. return file_paths
  36.  
  37.  
  38. def findUpdateFiles(path, origin):
  39. file_paths = Queue.PriorityQueue()
  40. timestampAndNews = origin[(len(origin) - 29):(len(origin) - 4)]
  41. updateForm = re.compile(timestampAndNews + '[_]{1}.*')
  42. for file_name in os.listdir(path):
  43. if (updateForm.match(file_name)):
  44. file_path = path + file_name
  45. file_paths.put(file_path, int(file_name[(len(file_name) - 11):(len(file_name) - 5)]))
  46. return file_paths
  47. urlDict = defaultdict(dict)
  48.  
  49.  
  50.  
  51. def main():
  52. classPath = "./data/news/classifications.txt"
  53. with open(classPath, 'r') as classFile:
  54. classData = classFile.read().split('\n')
  55. dictionaryOfURLandClass = {}
  56. for line in classData:
  57. if line != '':
  58. dictionaryOfURLandClass[line.split(';')[2]] = line.split(';')[1]
  59.  
  60. paths = findNewsFiles("./data/")
  61. book = xlwt.Workbook(encoding="utf-8")
  62. sheet1 = book.add_sheet("Clicks history real")
  63. initrow = 0
  64. sheet1.write(initrow, 0, "URL")
  65. sheet1.write(initrow, 1, "Class")
  66. sheet1.write(initrow, 2, "Random")
  67. sheet1.write(initrow, 3, "is24h")
  68. sheet1.write(initrow, 4, "Followers")
  69. sheet1.write(initrow, 5, "Retweets")
  70. sheet1.write(initrow, 6,"Startdate")
  71. for i in range(7, 68):
  72. sheet1.write(initrow, i, "T= " + str((i-7)*2))
  73.  
  74. for path in paths:
  75. with open(path, 'r') as file:
  76. data = file.read().split('\n')
  77. #print("path: " + path)
  78. timestamp =path[7:len(path) - 12]
  79. timestamp=timestamp.replace("-","")
  80. timestamp=timestamp.replace("_","")
  81. initrow = initrow + 1
  82.  
  83. #print "File " + path + " is start file no. %d" % (paths.index(path) + 1)
  84. for line in data[:-1]:
  85. if line != '':
  86. dataDict = ast.literal_eval(line)
  87. url = str(dataDict["long_url"])
  88. clicks= dataDict["global_clicks"]
  89. followers = dataDict["followers"]
  90. random = dataDict["random"]
  91. is24h = dataDict["is24h"]
  92. retweets = dataDict["retweets"]
  93. urlDict[url]['random'] = random
  94. urlDict[url]['is24h'] = is24h
  95. urlDict[url]['followers'] = followers
  96. urlDict[url]['retweets'] = retweets
  97. urlDict[url][timestamp]= clicks
  98.  
  99. updatePaths = findUpdateFiles("./data/", path)
  100. updateArray = []
  101. while updatePaths.empty() is False:
  102. updatePath = updatePaths.get()
  103. updatePath1=(updatePath[len(updatePath) - 22:len(updatePath) - 5])
  104. updatePath1=updatePath1.replace("-","")
  105. updatePath1=updatePath1.replace("_","")
  106.  
  107. with open(updatePath, 'r') as file:
  108. updateArray = ast.literal_eval(file.read())
  109. for sampleUpdate in updateArray:
  110. clicks = sampleUpdate["global_clicks"]
  111. url = sampleUpdate["long_url"]
  112. urlDict[url][updatePath1]=clicks
  113.  
  114. for url in urlDict:
  115. urlDict[url] = collections.OrderedDict(sorted(urlDict[url].items()))
  116.  
  117. row = 1
  118. for url in urlDict:
  119. firstRound = True
  120. startDate = True
  121. try:
  122. classification = dictionaryOfURLandClass[url]
  123. sheet1.write(row,0, url)
  124. sheet1.write(row,1, classification)
  125. sheet1.write(row,2, urlDict[url]['random'])
  126. sheet1.write(row,3, urlDict[url]['is24h'])
  127. sheet1.write(row,4, urlDict[url]['followers'])
  128. sheet1.write(row, 5, urlDict[url]['retweets'])
  129. col = 6
  130. print(len(urlDict[url]))
  131. for timestamp in urlDict[url]:
  132. if(startDate):
  133. sheet1.write(row,6,timestamp)
  134. startDate= False
  135. col=7
  136. if(timestamp!="followers" and timestamp!="retweets" and timestamp != "random" and timestamp!="is24h"):
  137. if(len(urlDict[url])==16 and col >13):
  138.  
  139. if(firstRound):
  140. for i in range(1, 6):
  141. sheet1.write(row, col, "-")
  142. col += 1
  143. sheet1.write(row, col, urlDict[url][timestamp])
  144. firstRound = False
  145.  
  146. else:
  147. for i in range(1, 12):
  148. sheet1.write(row, col, "-")
  149. col += 1
  150. sheet1.write(row, col, urlDict[url][timestamp])
  151. else:
  152. sheet1.write(row, col, urlDict[url][timestamp])
  153. col +=1
  154. except KeyError as e:
  155. print "fel"
  156. row+=1
  157. #print urlDict
  158. #print(od)
  159. book.save("ResultsBitly.xls")
  160. print "Done updating, done saving excel."
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement