Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import smtplib
- def mail(message):
- gmail_user = 'jesper.holmstrom92@gmail.com'
- gmail_password = 'Jesperhol12!'
- server = smtplib.SMTP('smtp.gmail.com', 587)
- server.starttls()
- server.login("jesper.holmstrom92@gmail.com", "Jesperhol12")
- server.sendmail("jesper.holmstrom92@gmail.com", "bry.holmstrom@gmail.com", message)
- server.quit()
- import xlwt
- import ast
- import time
- import re
- import os
- import bitlydatahandler
- import Queue
- import collections
- from collections import defaultdict
- newsEnding24h = re.compile('.*news24h.txt$')
- newsEnding1h = re.compile('.*news01h\.txt$')
- path = ""
- def findNewsFiles(path):
- file_paths = []
- for file_name in os.listdir(path):
- if (newsEnding1h.match(file_name) or newsEnding24h.match(file_name)):
- file_path = path + file_name
- file_paths.append(file_path)
- return file_paths
- def findUpdateFiles(path, origin):
- file_paths = Queue.PriorityQueue()
- timestampAndNews = origin[(len(origin) - 29):(len(origin) - 4)]
- updateForm = re.compile(timestampAndNews + '[_]{1}.*')
- for file_name in os.listdir(path):
- if (updateForm.match(file_name)):
- file_path = path + file_name
- file_paths.put(file_path, int(file_name[(len(file_name) - 11):(len(file_name) - 5)]))
- return file_paths
- urlDict = defaultdict(dict)
- def main():
- classPath = "./data/news/classifications.txt"
- with open(classPath, 'r') as classFile:
- classData = classFile.read().split('\n')
- dictionaryOfURLandClass = {}
- for line in classData:
- if line != '':
- dictionaryOfURLandClass[line.split(';')[2]] = line.split(';')[1]
- paths = findNewsFiles("./data/")
- book = xlwt.Workbook(encoding="utf-8")
- sheet1 = book.add_sheet("Clicks history real")
- initrow = 0
- sheet1.write(initrow, 0, "URL")
- sheet1.write(initrow, 1, "Class")
- sheet1.write(initrow, 2, "Followers")
- sheet1.write(initrow, 3, "Retweets")
- sheet1.write(initrow, 4,"Startdate")
- for i in range(5, 66):
- sheet1.write(initrow, i, "T= " + str((i-5)*2))
- for path in paths:
- with open(path, 'r') as file:
- data = file.read().split('\n')
- #print("path: " + path)
- timestamp =path[7:len(path) - 12]
- timestamp=timestamp.replace("-","")
- timestamp=timestamp.replace("_","")
- initrow = initrow + 1
- #print "File " + path + " is start file no. %d" % (paths.index(path) + 1)
- for line in data[:-1]:
- if line != '':
- dataDict = ast.literal_eval(line)
- url = str(dataDict["long_url"])
- clicks= dataDict["global_clicks"]
- followers = dataDict["followers"]
- urlDict[url]['followers'] = followers
- retweets = dataDict["retweets"]
- urlDict[url]['retweets'] = retweets
- urlDict[url][timestamp]= clicks
- updatePaths = findUpdateFiles("./data/", path)
- updateArray = []
- while updatePaths.empty() is False:
- updatePath = updatePaths.get()
- updatePath1=(updatePath[len(updatePath) - 22:len(updatePath) - 5])
- updatePath1=updatePath1.replace("-","")
- updatePath1=updatePath1.replace("_","")
- with open(updatePath, 'r') as file:
- updateArray = ast.literal_eval(file.read())
- for sampleUpdate in updateArray:
- clicks = sampleUpdate["global_clicks"]
- url = sampleUpdate["long_url"]
- urlDict[url][updatePath1]=clicks
- for url in urlDict:
- urlDict[url] = collections.OrderedDict(sorted(urlDict[url].items()))
- row = 1
- for url in urlDict:
- firstRound = True
- startDate = True
- try:
- classification = dictionaryOfURLandClass[url]
- sheet1.write(row,0, url)
- sheet1.write(row,1, classification)
- sheet1.write(row,2, urlDict[url]['followers'])
- sheet1.write(row, 3, urlDict[url]['retweets'])
- col = 5
- print(len(urlDict[url]))
- for timestamp in urlDict[url]:
- if(startDate):
- sheet1.write(row,4,timestamp)
- startDate= False
- if(timestamp!="followers" and timestamp!="retweets"):
- if(len(urlDict[url])==14 and col >11):
- if(firstRound):
- for i in range(1, 6):
- sheet1.write(row, col, "-")
- col += 1
- sheet1.write(row, col, urlDict[url][timestamp])
- firstRound = False
- else:
- for i in range(1, 12):
- sheet1.write(row, col, "-")
- col += 1
- sheet1.write(row, col, urlDict[url][timestamp])
- else:
- sheet1.write(row, col, urlDict[url][timestamp])
- col +=1
- except KeyError as e:
- print "fel"
- row+=1
- #print urlDict
- #print(od)
- book.save("ResultsBitly.xls")
- print "Done updating, done saving excel."
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement