Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from facepy import GraphAPI
- import io
- import pyodbc
- import dateutil.parser as dateparser
- from sixopy.db import Odbc
- from datetime import timedelta
- from datetime import datetime
- import pandas as pd
- atoken = 'EAACEdEose0cBAJUpFujsg2ajzzJvNPLyTC9SogquPN36YFNgCkiPxWbxR67Brdh9f7LZANL0SYn4naqjmnINmwCkz6Jk2tev6cQYmB5GMsWTIPCu2HZALsAnYYUO4o4z85Kb8ECnIZBHnSpXQcF96RXJnwtRLlTvPGbsP9t9rZCmwy3Bo7eivnojs6g5OCtgtLBGzo2gPQZDZD'
- server = 'devsixdbserv01.database.secure.windows.net'
- database = 'devsix'
- username = 'sixers_kkhitalishvili'
- password = 'Xl#uq%60j48r9F2'
- driver= '{ODBC Driver 13 for SQL Server}'
- def get_field(dic,field):
- return(dic[field] if field in dic else None)
- def construct_dict(dic,fields):
- return(dict([(f,get_field(dic,f)) for f in fields]))
- def main():
- # # Connect to azure and get the date of last post
- # azure = Odbc(server,database,username,password)
- # since = azure.select("SELECT TOP 100 [created time],id FROM sixers.facebook_posts ORDER BY [created time] DESC")
- # # save id and date decreased by two days of last post
- # sinceids = [s[1] for s in since]
- # since = dateparser.parse(since[0][0]) - timedelta(days = 1)
- # since = int(since.timestamp()) # convert datetime to unix timestamp
- since = int(datetime(2017,9,17).timestamp())
- # Get all posts
- graph = GraphAPI(atoken)
- pages = graph.get("52974817515/feed", page = True, since = since)
- # Get post ids
- postids = list(reversed([p['id'] for page in pages for p in page['data']])) # get just the post ids (pageid_postid, ex.g. 52974817515_10156185473452516)
- # postids = ['52974817515_10156205319582516','52974817515_10156204804522516']
- # remove duplicate post ids
- # postids = list(filter(lambda x: x not in sinceids,postids))
- # Get post info
- fields = "id,created_time,caption,description,link,message,type,status_type,place,permalink_url"
- insights = "post_impressions,post_impressions_unique,post_impressions_paid,post_impressions_paid_unique,post_impressions_organic,post_impressions_organic_unique,post_consumptions,post_consumptions_unique,post_engaged_users,post_video_avg_time_watched,post_video_complete_views_organic,post_video_complete_views_paid,post_video_views,post_video_views_unique,post_video_views_10s,post_video_views_10s_unique,post_video_length,post_video_view_time"
- nestedresp = "like,share,comment,hide_all_clicks,hide_clicks"
- varss = str(fields+","+insights+","+nestedresp).split(",")
- vartitles = {} # variable titles defined by facebook
- posts = []
- count = 1
- missed = 0
- for d in postids:
- try:
- # get post fields
- pinfo = graph.get(str(d) + "?fields=" + fields) # dict with the fields
- # get post insights
- pinsights = graph.get(str(d) + "/insights?metric=" + insights)
- vartitles.update(dict([(p['name'],p['title']) for p in pinsights['data']]))
- pinsights = dict([(p['name'], p['values'][0]['value']) for p in pinsights['data']])
- # get insights from nested responses
- paction = graph.get(str(d) + "/insights?metric=" + "post_stories_by_action_type")['data'][0]['values'][0]['value']
- paction.update(graph.get(str(d) + "/insights?metric=" + "post_negative_feedback_by_type")['data'][0]['values'][0]['value'])
- pinfo.update(pinsights) # combine post fields and insights
- pinfo.update(paction)
- # construct dictionary with post data
- dic = construct_dict(pinfo,varss)
- for i in nestedresp.split(','):
- if dic[i] is None:
- dic[i] = 0
- # get location
- if dic['place'] is not None: dic['place'] = dic['place']['name']
- posts.append(dic) # append to list
- print(count,"/",len(postids))
- count+=1
- except Exception as e:
- print(e)
- missed += 1
- with io.open('backup/fb_posts.txt','w', encoding='utf-8') as file:
- file.write("\t".join(posts[0].keys()))
- file.write('\n')
- for row in posts:
- file.write("\t".join([str(val).replace('\n',' ') for val in row.values()]))
- file.write('\n')
- print("Missed " + str(missed) + "/" + str(len(postids)))
- #print(vartitles)
- vartitles = {'post_impressions': 'Lifetime Post Total Impressions', 'post_impressions_unique': 'Lifetime Post Total Reach', 'post_impressions_paid': 'Lifetime Post Paid Impressions', 'post_impressions_paid_unique': 'Lifetime Post Paid Reach', 'post_impressions_organic': 'Lifetime Post Organic Impressions', 'post_impressions_organic_unique': 'Lifetime Post organic reach', 'post_consumptions': 'Lifetime Post Consumptions', 'post_consumptions_unique': 'Lifetime Post Consumers', 'post_engaged_users': 'Lifetime Engaged Users', 'post_video_avg_time_watched': 'Lifetime Average time video viewed', 'post_video_length': 'Lifetime Video length', 'post_video_complete_views_organic': 'Lifetime Organic watches at 95%', 'post_video_complete_views_paid': 'Lifetime Paid watches at 95%', 'post_video_views': 'Lifetime Total Video Views', 'post_video_views_unique': 'Lifetime Unique Video Views','post_video_views_10s':'Lifetime Total 10-Second Views','post_video_views_10s_unique':'Lifetime Unique 10-Second Views','post_video_view_time':'Lifetime Total Video View Time (in MS)'}
- vartitles.update(dict(zip(fields.split(','),fields.replace('_',' ').split(',')))) # add field titles with underscore replaced by space
- vartitles.update({'like':'Likes','share':'Shares','comment':'Comments','hide_all_clicks': "Hide all clicks",'hide_clicks':'Hide clicks'}) # add nested response titles
- posts = [{vartitles[k]: v for k, v in p.items()} for p in posts] # add names
- print("Posts ORIG: ", len(posts))
- # filter by date
- posts = [d for d in posts if datetime.strptime(d['created time'],'%Y-%m-%dT%H:%M:%S+0000') <= datetime.strptime('10/1/2017', '%m/%d/%Y')]
- print("Posts FILTERED: ",len(posts))
- # with io.open(r'C:\Users\kkhitalishvili\Desktop\fb-content\data-mining\backup\fb_posts.txt','w', encoding='utf-8') as file:
- # file.write("\t".join(posts[0].keys()))
- # file.write('\n')
- # for row in posts:
- # file.write("\t".join([str(val).replace('\n',' ') for val in row.values()]))
- # file.write('\n')
- df = pd.DataFrame(posts)
- df.to_csv(r'C:\Users\kkhitalishvili\Desktop\social\social-content-report\sixers_fb2.csv')
- return
- count = 1
- cursor = azure.connect_to_azure()
- for row in posts:
- print()
- cursor.execute('''INSERT INTO [sixers].[facebook_posts]
- ( id,
- [created time],
- caption,
- description,
- link,
- message,
- type,
- [status type],
- place,
- [permalink url],
- [Lifetime Post Total Impressions],
- [Lifetime Post Total Reach],
- [Lifetime Post Paid Impressions],
- [Lifetime Post Paid Reach],
- [Lifetime Post Organic Impressions],
- [Lifetime Post organic reach],
- [Lifetime Post Consumptions],
- [Lifetime Post Consumers],
- [Lifetime Engaged Users],
- [Lifetime Average time video viewed],
- [Lifetime Organic views to 95%],
- [Lifetime Paid views to 95%],
- [Lifetime Total Video Views],
- [Lifetime Unique Video Views],
- [Lifetime Total 10-Second Views],
- [Lifetime Unique 10-Second Views],
- [Lifetime Video length],
- [Lifetime Total Video View Time (in MS)],
- Likes,
- Shares,
- Comments,
- [Hide all clicks],
- [Hide clicks]
- ) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
- ''', tuple(row.values()))
- print("Inserted " + str(count) + "/" + str(len(posts)) + " rows")
- count += 1
- cursor.commit()
- cursor.close()
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement