Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import smtplib
- def mail(message):
- gmail_user = 'jesper.holmstrom92@gmail.com'
- gmail_password = 'Jesperhol12!'
- server = smtplib.SMTP('smtp.gmail.com', 587)
- server.starttls()
- server.login("jesper.holmstrom92@gmail.com", "Jesperhol12")
- server.sendmail("jesper.holmstrom92@gmail.com", "bry.holmstrom@gmail.com", message)
- server.quit()
- import xlwt
- import ast
- import time
- import re
- import os
- import bitlydatahandler
- import Queue
- import collections
- from collections import defaultdict
- newsEnding24h = re.compile('.*news24h.txt$')
- newsEnding1h = re.compile('.*news01h\.txt$')
- path = ""
- def findNewsFiles(path):
- file_paths = []
- for file_name in os.listdir(path):
- if (newsEnding1h.match(file_name) or newsEnding24h.match(file_name)):
- file_path = path + file_name
- file_paths.append(file_path)
- return file_paths
- def findUpdateFiles(path, origin):
- file_paths = Queue.PriorityQueue()
- timestampAndNews = origin[(len(origin) - 29):(len(origin) - 4)]
- updateForm = re.compile(timestampAndNews + '[_]{1}.*')
- for file_name in os.listdir(path):
- if (updateForm.match(file_name)):
- file_path = path + file_name
- file_paths.put(file_path, int(file_name[(len(file_name) - 11):(len(file_name) - 5)]))
- return file_paths
- urlDict = defaultdict(dict)
- def main():
- classPath = "./data/news/classifications.txt"
- with open(classPath, 'r') as classFile:
- classData = classFile.read().split('\n')
- dictionaryOfURLandClass = {}
- for line in classData:
- if line != '':
- dictionaryOfURLandClass[line.split(';')[2]] = line.split(';')[1]
- for item in dictionaryOfURLandClass:
- print item
- paths = findNewsFiles("./data/")
- book = xlwt.Workbook(encoding="utf-8")
- sheet1 = book.add_sheet("Clicks history real")
- sheet2 = book.add_sheet("Clicks history fake")
- rowReal = 0
- rowFake = 0
- sheet1.write(rowReal, 0, "URL")
- sheet1.write(rowReal, 1, "Class")
- sheet1.write(rowReal, 2, "followers")
- sheet1.write(rowReal, 3, u'Antal click vid tiden: ')
- for path in paths:
- colTime = 2
- with open(path, 'r') as file:
- data = file.read().split('\n')
- print("path: " + path)
- timestamp =path[7:len(path) - 12]
- timestamp=timestamp.replace("-","")
- timestamp=timestamp.replace("_","")
- rowReal = rowReal + 1
- counterReal = 0
- print "File " + path + " is start file no. %d" % (paths.index(path) + 1)
- for line in data[:-1]:
- clicks =[]
- if line != '':
- dataDict = ast.literal_eval(line)
- url = str(dataDict["long_url"])
- clicks= dataDict["global_clicks"]
- urlDict[url][timestamp]= clicks
- updatePaths = findUpdateFiles("./data/", path)
- updateArray = []
- while updatePaths.empty() is False:
- updatePath = updatePaths.get()
- #print("updatepath: " + updatePath)
- updatePath1=(updatePath[len(updatePath) - 22:len(updatePath) - 5])
- updatePath1=updatePath1.replace("-","")
- updatePath1=updatePath1.replace("_","")
- with open(updatePath, 'r') as file:
- updateArray = ast.literal_eval(file.read())
- for sampleUpdate in updateArray:
- clicks = sampleUpdate["global_clicks"]
- url = sampleUpdate["long_url"]
- urlDict[url][updatePath1]=clicks
- for url in urlDict:
- #print url
- #print type(urlDict[url])
- #print urlDict[url]
- #urlDict[url] = sorted(urlDict[url])
- urlDict[url] = collections.OrderedDict(sorted(urlDict[url].items()))
- print len(urlDict[url])
- #for url in sorted(sak):
- #print()
- #print "%s: %s" % (url, sak[url])
- #urlDict[url]= urlDict[url]
- #od=collections.OrderedDict(sorted(urlDict[url].items()))
- #od=collections.OrderedDict(sorted(urlDict.items()))
- #od = {}
- #for key in sorted
- row = 1
- col = 1
- for url in urlDict:
- try:
- classification = dictionaryOfURLandClass[url]
- sheet1.write(row,0,url)
- sheet1.write(row,1, classification)
- except KeyError as e:
- print "fel"
- row+=1
- print urlDict
- #print(od)
- book.save("ResultsBitly.xls")
- print "Done updating, done saving excel."
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement