Untitled

from facepy import GraphAPI
import io
import pyodbc
import dateutil.parser as dateparser
from sixopy.db import Odbc
from datetime import timedelta

from datetime import datetime

import pandas as pd

atoken = 'EAACEdEose0cBAJUpFujsg2ajzzJvNPLyTC9SogquPN36YFNgCkiPxWbxR67Brdh9f7LZANL0SYn4naqjmnINmwCkz6Jk2tev6cQYmB5GMsWTIPCu2HZALsAnYYUO4o4z85Kb8ECnIZBHnSpXQcF96RXJnwtRLlTvPGbsP9t9rZCmwy3Bo7eivnojs6g5OCtgtLBGzo2gPQZDZD'

server = 'devsixdbserv01.database.secure.windows.net'
database = 'devsix'
username = 'sixers_kkhitalishvili'
password = 'Xl#uq%60j48r9F2'
driver= '{ODBC Driver 13 for SQL Server}'


def get_field(dic,field):
	return(dic[field] if field in dic else None)

def construct_dict(dic,fields):
	return(dict([(f,get_field(dic,f)) for f in fields]))

def main():
	# # Connect to azure and get the date of last post
	# azure = Odbc(server,database,username,password)
	# since = azure.select("SELECT TOP 100 [created time],id FROM sixers.facebook_posts ORDER BY [created time] DESC")
	# # save id and date decreased by two days of last post
	# sinceids = [s[1] for s in since]
	# since = dateparser.parse(since[0][0]) - timedelta(days = 1)
	# since = int(since.timestamp()) # convert datetime to unix timestamp
	since = int(datetime(2017,9,17).timestamp())
	# Get all posts
	graph = GraphAPI(atoken)
	pages = graph.get("52974817515/feed", page = True, since = since)

	# Get post ids
	postids = list(reversed([p['id'] for page in pages for p in page['data']])) # get just the post ids (pageid_postid, ex.g. 52974817515_10156185473452516)
	# postids = ['52974817515_10156205319582516','52974817515_10156204804522516']

	# remove duplicate post ids
	# postids = list(filter(lambda x: x not in sinceids,postids))

	# Get post info
	fields = "id,created_time,caption,description,link,message,type,status_type,place,permalink_url"
	insights = "post_impressions,post_impressions_unique,post_impressions_paid,post_impressions_paid_unique,post_impressions_organic,post_impressions_organic_unique,post_consumptions,post_consumptions_unique,post_engaged_users,post_video_avg_time_watched,post_video_complete_views_organic,post_video_complete_views_paid,post_video_views,post_video_views_unique,post_video_views_10s,post_video_views_10s_unique,post_video_length,post_video_view_time"
	nestedresp = "like,share,comment,hide_all_clicks,hide_clicks"
	varss = str(fields+","+insights+","+nestedresp).split(",")
	vartitles = {} # variable titles defined by facebook
	posts = []
	count = 1
	missed = 0
	for d in postids:
		try:
			# get post fields
			pinfo = graph.get(str(d) + "?fields=" + fields) # dict with the fields

			# get post insights
			pinsights = graph.get(str(d) + "/insights?metric=" + insights)
			vartitles.update(dict([(p['name'],p['title']) for p in pinsights['data']]))
			pinsights = dict([(p['name'], p['values'][0]['value']) for p in pinsights['data']])


			# get insights from nested responses
			paction = graph.get(str(d) + "/insights?metric=" + "post_stories_by_action_type")['data'][0]['values'][0]['value']
			paction.update(graph.get(str(d) + "/insights?metric=" + "post_negative_feedback_by_type")['data'][0]['values'][0]['value'])

			pinfo.update(pinsights) # combine post fields and insights
			pinfo.update(paction)

			# construct dictionary with post data
			dic = construct_dict(pinfo,varss)
			for i in nestedresp.split(','):
				if dic[i] is None:
					dic[i] = 0

			# get location
			if dic['place'] is not None: dic['place'] = dic['place']['name']

			posts.append(dic) # append to list
			print(count,"/",len(postids))
			count+=1
		except Exception as e:
			print(e)
			missed += 1
			with io.open('backup/fb_posts.txt','w', encoding='utf-8') as file:
				file.write("\t".join(posts[0].keys()))
				file.write('\n')
				for row in posts:
					file.write("\t".join([str(val).replace('\n',' ') for val in row.values()]))
					file.write('\n')

	print("Missed " + str(missed) + "/" + str(len(postids)))

	#print(vartitles)
	vartitles = {'post_impressions': 'Lifetime Post Total Impressions', 'post_impressions_unique': 'Lifetime Post Total Reach', 'post_impressions_paid': 'Lifetime Post Paid Impressions', 'post_impressions_paid_unique': 'Lifetime Post Paid Reach', 'post_impressions_organic': 'Lifetime Post Organic Impressions', 'post_impressions_organic_unique': 'Lifetime Post organic reach', 'post_consumptions': 'Lifetime Post Consumptions', 'post_consumptions_unique': 'Lifetime Post Consumers', 'post_engaged_users': 'Lifetime Engaged Users', 'post_video_avg_time_watched': 'Lifetime Average time video viewed', 'post_video_length': 'Lifetime Video length', 'post_video_complete_views_organic': 'Lifetime Organic watches at 95%', 'post_video_complete_views_paid': 'Lifetime Paid watches at 95%', 'post_video_views': 'Lifetime Total Video Views', 'post_video_views_unique': 'Lifetime Unique Video Views','post_video_views_10s':'Lifetime Total 10-Second Views','post_video_views_10s_unique':'Lifetime Unique 10-Second Views','post_video_view_time':'Lifetime Total Video View Time (in MS)'}

	vartitles.update(dict(zip(fields.split(','),fields.replace('_',' ').split(',')))) # add field titles with underscore replaced by space
	vartitles.update({'like':'Likes','share':'Shares','comment':'Comments','hide_all_clicks': "Hide all clicks",'hide_clicks':'Hide clicks'}) # add nested response titles

	posts = [{vartitles[k]: v for k, v in p.items()} for p in posts] # add names
	print("Posts ORIG: ", len(posts))

	# filter by date
	posts = [d for d in posts if datetime.strptime(d['created time'],'%Y-%m-%dT%H:%M:%S+0000') <= datetime.strptime('10/1/2017', '%m/%d/%Y')]
	print("Posts FILTERED: ",len(posts))


	# with io.open(r'C:\Users\kkhitalishvili\Desktop\fb-content\data-mining\backup\fb_posts.txt','w', encoding='utf-8') as file:
	# 		file.write("\t".join(posts[0].keys()))
	# 		file.write('\n')
	# 		for row in posts:
	# 			file.write("\t".join([str(val).replace('\n',' ') for val in row.values()]))
	# 			file.write('\n')


	df = pd.DataFrame(posts)
	df.to_csv(r'C:\Users\kkhitalishvili\Desktop\social\social-content-report\sixers_fb2.csv')
	return

	count = 1
	cursor = azure.connect_to_azure()
	for row in posts:
		print()
		cursor.execute('''INSERT INTO [sixers].[facebook_posts]
						 		( id,
								[created time],
								caption,
								description,
								link,
								message,
								type,
								[status type],
								place,
								[permalink url],
								[Lifetime Post Total Impressions],
								[Lifetime Post Total Reach],
								[Lifetime Post Paid Impressions],
								[Lifetime Post Paid Reach],
								[Lifetime Post Organic Impressions],
								[Lifetime Post organic reach],
								[Lifetime Post Consumptions],
								[Lifetime Post Consumers],
								[Lifetime Engaged Users],
								[Lifetime Average time video viewed],
								[Lifetime Organic views to 95%],
								[Lifetime Paid views to 95%],
								[Lifetime Total Video Views],
								[Lifetime Unique Video Views],
								[Lifetime Total 10-Second Views],
								[Lifetime Unique 10-Second Views],
								[Lifetime Video length],
								[Lifetime Total Video View Time (in MS)],
								Likes,
								Shares,
								Comments,
								[Hide all clicks],
								[Hide clicks]

						 		)  VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
		  		  ''', tuple(row.values()))
		print("Inserted " + str(count) + "/" + str(len(posts)) + " rows")
		count += 1
		cursor.commit()

	cursor.close()


if __name__ == "__main__":
	main()