Advertisement
Guest User

Untitled

a guest
Oct 9th, 2017
98
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.51 KB | None | 0 0
  1. from facepy import GraphAPI
  2. import io
  3. import pyodbc
  4. import dateutil.parser as dateparser
  5. from sixopy.db import Odbc
  6. from datetime import timedelta
  7.  
  8. from datetime import datetime
  9.  
  10. import pandas as pd
  11.  
  12. atoken = 'EAACEdEose0cBAJUpFujsg2ajzzJvNPLyTC9SogquPN36YFNgCkiPxWbxR67Brdh9f7LZANL0SYn4naqjmnINmwCkz6Jk2tev6cQYmB5GMsWTIPCu2HZALsAnYYUO4o4z85Kb8ECnIZBHnSpXQcF96RXJnwtRLlTvPGbsP9t9rZCmwy3Bo7eivnojs6g5OCtgtLBGzo2gPQZDZD'
  13.  
  14. server = 'devsixdbserv01.database.secure.windows.net'
  15. database = 'devsix'
  16. username = 'sixers_kkhitalishvili'
  17. password = 'Xl#uq%60j48r9F2'
  18. driver= '{ODBC Driver 13 for SQL Server}'
  19.  
  20.  
  21. def get_field(dic,field):
  22. return(dic[field] if field in dic else None)
  23.  
  24. def construct_dict(dic,fields):
  25. return(dict([(f,get_field(dic,f)) for f in fields]))
  26.  
  27. def main():
  28. # # Connect to azure and get the date of last post
  29. # azure = Odbc(server,database,username,password)
  30. # since = azure.select("SELECT TOP 100 [created time],id FROM sixers.facebook_posts ORDER BY [created time] DESC")
  31. # # save id and date decreased by two days of last post
  32. # sinceids = [s[1] for s in since]
  33. # since = dateparser.parse(since[0][0]) - timedelta(days = 1)
  34. # since = int(since.timestamp()) # convert datetime to unix timestamp
  35. since = int(datetime(2017,9,17).timestamp())
  36. # Get all posts
  37. graph = GraphAPI(atoken)
  38. pages = graph.get("52974817515/feed", page = True, since = since)
  39.  
  40. # Get post ids
  41. postids = list(reversed([p['id'] for page in pages for p in page['data']])) # get just the post ids (pageid_postid, ex.g. 52974817515_10156185473452516)
  42. # postids = ['52974817515_10156205319582516','52974817515_10156204804522516']
  43.  
  44. # remove duplicate post ids
  45. # postids = list(filter(lambda x: x not in sinceids,postids))
  46.  
  47. # Get post info
  48. fields = "id,created_time,caption,description,link,message,type,status_type,place,permalink_url"
  49. insights = "post_impressions,post_impressions_unique,post_impressions_paid,post_impressions_paid_unique,post_impressions_organic,post_impressions_organic_unique,post_consumptions,post_consumptions_unique,post_engaged_users,post_video_avg_time_watched,post_video_complete_views_organic,post_video_complete_views_paid,post_video_views,post_video_views_unique,post_video_views_10s,post_video_views_10s_unique,post_video_length,post_video_view_time"
  50. nestedresp = "like,share,comment,hide_all_clicks,hide_clicks"
  51. varss = str(fields+","+insights+","+nestedresp).split(",")
  52. vartitles = {} # variable titles defined by facebook
  53. posts = []
  54. count = 1
  55. missed = 0
  56. for d in postids:
  57. try:
  58. # get post fields
  59. pinfo = graph.get(str(d) + "?fields=" + fields) # dict with the fields
  60.  
  61. # get post insights
  62. pinsights = graph.get(str(d) + "/insights?metric=" + insights)
  63. vartitles.update(dict([(p['name'],p['title']) for p in pinsights['data']]))
  64. pinsights = dict([(p['name'], p['values'][0]['value']) for p in pinsights['data']])
  65.  
  66.  
  67. # get insights from nested responses
  68. paction = graph.get(str(d) + "/insights?metric=" + "post_stories_by_action_type")['data'][0]['values'][0]['value']
  69. paction.update(graph.get(str(d) + "/insights?metric=" + "post_negative_feedback_by_type")['data'][0]['values'][0]['value'])
  70.  
  71. pinfo.update(pinsights) # combine post fields and insights
  72. pinfo.update(paction)
  73.  
  74. # construct dictionary with post data
  75. dic = construct_dict(pinfo,varss)
  76. for i in nestedresp.split(','):
  77. if dic[i] is None:
  78. dic[i] = 0
  79.  
  80. # get location
  81. if dic['place'] is not None: dic['place'] = dic['place']['name']
  82.  
  83. posts.append(dic) # append to list
  84. print(count,"/",len(postids))
  85. count+=1
  86. except Exception as e:
  87. print(e)
  88. missed += 1
  89. with io.open('backup/fb_posts.txt','w', encoding='utf-8') as file:
  90. file.write("\t".join(posts[0].keys()))
  91. file.write('\n')
  92. for row in posts:
  93. file.write("\t".join([str(val).replace('\n',' ') for val in row.values()]))
  94. file.write('\n')
  95.  
  96. print("Missed " + str(missed) + "/" + str(len(postids)))
  97.  
  98. #print(vartitles)
  99. vartitles = {'post_impressions': 'Lifetime Post Total Impressions', 'post_impressions_unique': 'Lifetime Post Total Reach', 'post_impressions_paid': 'Lifetime Post Paid Impressions', 'post_impressions_paid_unique': 'Lifetime Post Paid Reach', 'post_impressions_organic': 'Lifetime Post Organic Impressions', 'post_impressions_organic_unique': 'Lifetime Post organic reach', 'post_consumptions': 'Lifetime Post Consumptions', 'post_consumptions_unique': 'Lifetime Post Consumers', 'post_engaged_users': 'Lifetime Engaged Users', 'post_video_avg_time_watched': 'Lifetime Average time video viewed', 'post_video_length': 'Lifetime Video length', 'post_video_complete_views_organic': 'Lifetime Organic watches at 95%', 'post_video_complete_views_paid': 'Lifetime Paid watches at 95%', 'post_video_views': 'Lifetime Total Video Views', 'post_video_views_unique': 'Lifetime Unique Video Views','post_video_views_10s':'Lifetime Total 10-Second Views','post_video_views_10s_unique':'Lifetime Unique 10-Second Views','post_video_view_time':'Lifetime Total Video View Time (in MS)'}
  100.  
  101. vartitles.update(dict(zip(fields.split(','),fields.replace('_',' ').split(',')))) # add field titles with underscore replaced by space
  102. vartitles.update({'like':'Likes','share':'Shares','comment':'Comments','hide_all_clicks': "Hide all clicks",'hide_clicks':'Hide clicks'}) # add nested response titles
  103.  
  104. posts = [{vartitles[k]: v for k, v in p.items()} for p in posts] # add names
  105. print("Posts ORIG: ", len(posts))
  106.  
  107. # filter by date
  108. posts = [d for d in posts if datetime.strptime(d['created time'],'%Y-%m-%dT%H:%M:%S+0000') <= datetime.strptime('10/1/2017', '%m/%d/%Y')]
  109. print("Posts FILTERED: ",len(posts))
  110.  
  111.  
  112. # with io.open(r'C:\Users\kkhitalishvili\Desktop\fb-content\data-mining\backup\fb_posts.txt','w', encoding='utf-8') as file:
  113. # file.write("\t".join(posts[0].keys()))
  114. # file.write('\n')
  115. # for row in posts:
  116. # file.write("\t".join([str(val).replace('\n',' ') for val in row.values()]))
  117. # file.write('\n')
  118.  
  119.  
  120. df = pd.DataFrame(posts)
  121. df.to_csv(r'C:\Users\kkhitalishvili\Desktop\social\social-content-report\sixers_fb2.csv')
  122. return
  123.  
  124. count = 1
  125. cursor = azure.connect_to_azure()
  126. for row in posts:
  127. print()
  128. cursor.execute('''INSERT INTO [sixers].[facebook_posts]
  129. ( id,
  130. [created time],
  131. caption,
  132. description,
  133. link,
  134. message,
  135. type,
  136. [status type],
  137. place,
  138. [permalink url],
  139. [Lifetime Post Total Impressions],
  140. [Lifetime Post Total Reach],
  141. [Lifetime Post Paid Impressions],
  142. [Lifetime Post Paid Reach],
  143. [Lifetime Post Organic Impressions],
  144. [Lifetime Post organic reach],
  145. [Lifetime Post Consumptions],
  146. [Lifetime Post Consumers],
  147. [Lifetime Engaged Users],
  148. [Lifetime Average time video viewed],
  149. [Lifetime Organic views to 95%],
  150. [Lifetime Paid views to 95%],
  151. [Lifetime Total Video Views],
  152. [Lifetime Unique Video Views],
  153. [Lifetime Total 10-Second Views],
  154. [Lifetime Unique 10-Second Views],
  155. [Lifetime Video length],
  156. [Lifetime Total Video View Time (in MS)],
  157. Likes,
  158. Shares,
  159. Comments,
  160. [Hide all clicks],
  161. [Hide clicks]
  162.  
  163. ) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
  164. ''', tuple(row.values()))
  165. print("Inserted " + str(count) + "/" + str(len(posts)) + " rows")
  166. count += 1
  167. cursor.commit()
  168.  
  169. cursor.close()
  170.  
  171.  
  172. if __name__ == "__main__":
  173. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement