Advertisement
Guest User

Untitled

a guest
Dec 15th, 2019
361
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 66.60 KB | None | 0 0
  1. import traceback
  2. import schedule
  3. import time
  4. import requests
  5. import pickle
  6. from lucidyalib.lucidya_stats import LucidyaStats as lucidyaLibStats
  7. from lucidyalib.InstagramOfflineStats import InstagramOfflineStats
  8. from lucidyalib.FacebookOfflineStats import FacebookOfflineStats
  9. from lucidyalib.ExceptionHandler import ExceptionHandler
  10. from lucidyalib.PostgresClient import PostgresCLient
  11. from lucidyalib.rabbitmqClient import rabbitmqPublisher
  12. from lucidyalib.helper import helper
  13. from offline_stats.quick_inspect_worker import QuickInspector
  14. from accountanalysis.Stats import Stats as AAStats
  15. #from lucidyalib.topic_discovery.TopicDiscovery import TopicDiscovery
  16. from lucidyalib.topic_discovery.TopicDiscovery_V2 import TopicDiscovery
  17. import sys
  18. sys.path.append('/opt/lucidya/config')
  19. import config as Config
  20. import os
  21. import collections
  22. from datetime import datetime, timedelta
  23. import threading
  24. import json
  25. import pytimeparse
  26. import math
  27. import copy
  28.  
  29. try:
  30. from lucidyalib.CassandraClient import CassandraClient
  31. except Exception: # ImportError
  32. from CassandraClient import CassandraClient
  33.  
  34.  
  35. class FilteredStats:
  36.  
  37. NUM_OF_STATS = 10
  38.  
  39. def __init__(self, number_of_days, monitor_id = None, skip_qi=False):
  40. self.skip_qi = skip_qi
  41. self.NUM_OF_THREADS = 5
  42. self.specific_monitor_id = monitor_id
  43. for i in range(number_of_days, 0, -1):
  44. date = datetime.today() - timedelta(days=i)
  45. start_date = date.replace(hour=0,minute=0,second=0,microsecond=0)
  46. end_date = date.replace(hour=23,minute=59,second=59,microsecond=999999)
  47. self.retrieve_trackers_tweets(start_date, end_date, 'daily')
  48. self.retrieve_trackers_instagram_media(start_date, end_date, 'daily')
  49. self.retrieve_trackers_facebook_posts(start_date, end_date, 'daily')
  50.  
  51. #schedule.every().day.at("00:10").do(self.save_daily_stats)
  52. #while True:
  53. # schedule.run_pending()
  54. # time.sleep(1)
  55.  
  56. def save_daily_stats(self):
  57. start_date = datetime.today() - timedelta(days=1)
  58. self.retrieve_trackers_tweets(start_date, start_date, 'daily')
  59. self.retrieve_trackers_instagram_media(start_date, start_date, 'daily')
  60. print(str(datetime.now()) + " ---> Finished daily materialized metrics for " + datetime.today().strftime("%Y-%m-%d"))
  61.  
  62. @staticmethod
  63. def retrieve_trackers_tweets_tracker(tracker_id, start_date, end_date,time_zone,file_name_prefix,skip_qi=False):
  64. psql = PostgresCLient()
  65. # yesterday metrics
  66. limit_filter = dict()
  67. #limit_filter['reverse'] = True
  68. #tweets = psql.search(tracker_id, start_date.strftime("%Y-%m-%d 00:00:00"),end_date.strftime("%Y-%m-%d 23:59:59"), None, None, limit_filter, 'tweets')
  69. local_start_date = (start_date - timedelta(hours=time_zone)).strftime("%Y-%m-%d %H:%M:%S")
  70. local_end_date = (end_date - timedelta(hours=time_zone)).strftime("%Y-%m-%d %H:%M:%S")
  71. helper.process_tweets(tracker_id,dict(),local_start_date,local_end_date,psql,useStream=True)
  72. # get filtered stats
  73. advanced_filters = dict()
  74. advanced_filters['start_date'] = local_start_date
  75. statsObj = lucidyaLibStats(tracker_id, time_zone=time_zone,postgresClient=psql,advanced_filters=advanced_filters,start_date=local_start_date)
  76. stats = statsObj.get_metrics()
  77.  
  78. # serialize stats result
  79. file_path = Config.monitors_metrics_filtered_path + "/%s/%s/" % (file_name_prefix, str(tracker_id))
  80. os.makedirs(file_path, exist_ok=True)
  81. with open(file_path + "%s.pickle" % start_date.strftime("%Y-%m-%d"), 'wb') as f:
  82. pickle.dump(stats, f)
  83. psql.cleanup()
  84.  
  85. if not skip_qi:
  86. #FilteredStats(31, new_tracker_id)
  87. publisher = rabbitmqPublisher()
  88. publisher_channel = publisher.getChannel()
  89. eventName = 'calculate_qi_stats'
  90.  
  91. eventData = dict()
  92. eventData['tracker_id'] = tracker_id
  93. eventData['start_date'] = local_start_date
  94. eventData['end_date'] = local_end_date
  95. eventData['time_zone'] = time_zone
  96. eventData['inspect_type'] = 'predefined'
  97. eventData['advanced_filters'] = advanced_filters
  98. eventData['stats_metrics'] = stats
  99. eventData['complete_file_path'] = 'predefinedqi' + "/" + str(tracker_id) + '/' + local_end_date.split(" ")[0]
  100.  
  101. message = json.dumps({'eventName': eventName, 'eventData': eventData}, ensure_ascii=False).encode('utf8')
  102. publisher.publish(publisher_channel, eventName, message)
  103. publisher.cleanup()
  104.  
  105.  
  106. @staticmethod
  107. def retrieve_account_trackers_tweets_tracker(tracker_id, start_date, end_date,time_zone,file_name_prefix,account_name):
  108. # get filtered stats
  109. advanced_filters = dict()
  110. advanced_filters['start_date'] = start_date.strftime("%Y-%m-%d %H:%M:%S")
  111. advanced_filters['end_date'] = end_date.strftime("%Y-%m-%d %H:%M:%S")
  112.  
  113. data = dict()
  114. data['tracker_id'] = tracker_id
  115. data['account_name'] = account_name
  116. data['start_date'] = start_date.strftime("%Y-%m-%d %H:%M:%S")
  117. data['end_date'] = end_date.strftime("%Y-%m-%d %H:%M:%S")
  118. data['file_name'] = start_date.strftime("%Y-%m-%d")
  119. data['advanced_filters'] = advanced_filters
  120. event_name = 'compute_account_filter_stats_days'
  121.  
  122. print("####----<<<<< send data to rabbitMq for account analysis")
  123. helper.sendToRabbitmq(event_name,data)
  124.  
  125. def retrieve_trackers_facebook_posts(self , start_date, end_date , file_name_prefix):
  126. print(str(datetime.now()) + " ---> Start retrieve_trackers_facebook_posts for "+str(start_date)+" --> "+str(end_date))
  127. # Get all trackers, create stream for each account id and run it.
  128. response = requests.get(Config.BACKEND_API_URL + "trackers?source_id=1&token=" + Config.TOKEN)
  129. if response.status_code != 200:
  130. try:
  131. print(str(response))
  132. print(response.status_code)
  133. print(response.json())
  134. except:
  135. pass
  136. print(str(datetime.now()) + " ---> Can not get facebook trackers: "+str(response))
  137. ExceptionHandler.handle_exception("Can not get facebook trackers: "+str(response),"filtered_stats.py")
  138. else:
  139. init_list = response.json()
  140. temp_list = []
  141. for tracker in init_list:
  142. temp_list.append(copy.deepcopy(tracker))
  143.  
  144. list_of_lists = []
  145. for i in range(0,self.NUM_OF_THREADS):
  146. list_of_lists.append(list())
  147.  
  148. #split the list to sublists
  149. for index,item in enumerate(temp_list):
  150. list_of_lists[index%self.NUM_OF_THREADS].append(copy.deepcopy(item))
  151.  
  152. thread_list = []
  153. #Start a thread for each list
  154. for index,sub_list in enumerate(list_of_lists):
  155. my_tread = threading.Thread(target=self.process_facebook_list,args=(index,sub_list,start_date,end_date,file_name_prefix))
  156. thread_list.append(my_tread)
  157. my_tread.start()
  158. time.sleep(5)
  159. #wait for all threads
  160. print("Waiting for "+str(len(thread_list))+" threads")
  161. for thread in thread_list:
  162. thread.join()
  163.  
  164. print(str(datetime.now()) + " ---> End retrieve_trackers_facebook_posts")
  165.  
  166.  
  167. def retrieve_trackers_instagram_media(self , start_date, end_date , file_name_prefix):
  168. print(str(datetime.now()) + " ---> Start retrieve_trackers_instagram_media for "+str(start_date)+" --> "+str(end_date))
  169. # Get all trackers, create stream for each account id and run it.
  170. response = requests.get(Config.BACKEND_API_URL + "trackers?source_id=3&token=" + Config.TOKEN)
  171. if response.status_code != 200:
  172. try:
  173. print(str(response))
  174. print(response.status_code)
  175. print(response.json())
  176. except:
  177. pass
  178. print(str(datetime.now()) + " ---> Can not get instagram trackers: "+str(response))
  179. ExceptionHandler.handle_exception("Can not get instagram trackers: "+str(response),"filtered_stats.py")
  180. else:
  181. init_list = response.json()
  182. temp_list = []
  183. for tracker in init_list:
  184. temp_list.append(copy.deepcopy(tracker))
  185.  
  186. list_of_lists = []
  187. for i in range(0,self.NUM_OF_THREADS):
  188. list_of_lists.append(list())
  189.  
  190. #split the list to sublists
  191. for index,item in enumerate(temp_list):
  192. list_of_lists[index%self.NUM_OF_THREADS].append(copy.deepcopy(item))
  193.  
  194. thread_list = []
  195. #Start a thread for each list
  196. for index,sub_list in enumerate(list_of_lists):
  197. my_tread = threading.Thread(target=self.process_instagram_list,args=(index,sub_list,start_date,end_date,file_name_prefix))
  198. thread_list.append(my_tread)
  199. my_tread.start()
  200. time.sleep(5)
  201. #wait for all threads
  202. print("Waiting for "+str(len(thread_list))+" threads")
  203. for thread in thread_list:
  204. thread.join()
  205.  
  206. print(str(datetime.now()) + " ---> End retrieve_trackers_instagram_media")
  207.  
  208. def retrieve_trackers_tweets(self, start_date, end_date, file_name_prefix):
  209. print(str(datetime.now()) + " ---> Start retrieve_trackers_tweets for "+str(start_date)+" --> "+str(end_date))
  210. # Get all trackers, create stream for each account id and run it.
  211. response = requests.get(Config.BACKEND_API_URL + "trackers?source_id=-1&token=" + Config.TOKEN)
  212. if response.status_code != 200:
  213. try:
  214. print(str(response))
  215. print(response.status_code)
  216. print(response.json())
  217. except:
  218. pass
  219. #raise Exception("Can not get trackers: "+str(response))
  220. print(str(datetime.now()) + " ---> Can not get trackers: "+str(response))
  221. ExceptionHandler.handle_exception("Can not get trackers: "+str(response),"filtered_stats.py")
  222. else:
  223. # create stream for each tracker type then start all at once
  224. init_list = response.json()
  225. temp_list = []
  226. for tracker in init_list:
  227. if tracker["data_source_name"] == "TWITTER":
  228. temp_list.append(copy.deepcopy(tracker))
  229.  
  230. temp_list.sort(key=lambda monitor_object: monitor_object['total_tweets'])
  231. # init_list.sort(key=lambda monitor_object: monitor_object['total_tweets'])
  232. list_of_lists = []
  233. for i in range(0,self.NUM_OF_THREADS):
  234. list_of_lists.append(list())
  235. #split the list to sublists
  236. for index,item in enumerate(temp_list):
  237. list_of_lists[index%self.NUM_OF_THREADS].append(copy.deepcopy(item))
  238.  
  239. thread_list = []
  240. #Start a thread for each list
  241. for index,sub_list in enumerate(list_of_lists):
  242. my_tread = threading.Thread(target=self.process_list,args=(index,sub_list,start_date,end_date,file_name_prefix))
  243. thread_list.append(my_tread)
  244. my_tread.start()
  245. time.sleep(5)
  246. #wait for all threads
  247. print("Waiting for "+str(len(thread_list))+" threads")
  248. for thread in thread_list:
  249. thread.join()
  250.  
  251. print(str(datetime.now()) + " ---> End retrieve_trackers_tweets")
  252.  
  253. def process_facebook_list(self, id, sub_list, start_date, end_date, file_name_prefix):
  254. for tracker_info in sub_list:
  255. try:
  256. tracker_id = tracker_info["tracker_id"]
  257. psql = PostgresCLient()
  258. posts = []
  259. print("Thread " + str(id) + " Processing: " + str(tracker_id))
  260. if tracker['monitor_type'] == 'ACCOUNT_ANALYSIS':
  261. posts = psql.get_all_fb_public_comments(tracker_id, {'start_date' : start_date , 'end_date' : end_date})
  262. posts += psql.get_all_fb_public_posts(tracker_id, {'start_date' : start_date , 'end_date' : end_date})
  263. elif tracker['monitor_type'] == 'MANAGED_PAGE':
  264. posts = psql.get_all_fb_comments(tracker_id, {'start_date' : start_date , 'end_date' : end_date})
  265.  
  266. statsObj = FacebookOfflineStats(tracker_id, posts, tracker['monitor_type'])
  267. stats = statsObj.get_metrics()
  268.  
  269. # serialize stats result
  270. file_path = Config.monitors_metrics_filtered_path + "/%s/%s/facebook/" % (
  271. file_name_prefix, str(tracker_id))
  272. os.makedirs(file_path, exist_ok=True)
  273. with open(file_path + "%s.pickle" % start_date.strftime("%Y-%m-%d"), 'wb') as f:
  274. pickle.dump(stats, f)
  275. psql.cleanup()
  276. except Exception as e:
  277. ExceptionHandler.handle_exception("process_list error: " + str(e), "filtered_stats")
  278. traceback.print_exc()
  279.  
  280. def process_instagram_list(self,id, sub_list,start_date,end_date,file_name_prefix):
  281. for tracker_info in sub_list:
  282. try:
  283. tracker_id = tracker_info["tracker_id"]
  284. psql = PostgresCLient()
  285. media = []
  286. is_comment = True
  287. print("Thread "+str(id)+" Processing: " + str(tracker_id))
  288. if 'follow' in tracker_info and tracker_info['follow'] == 1:
  289. media = psql.searchInstagramComments(tracker_id, {}, start_date, end_date)
  290. is_comment = False
  291. else:
  292. media = psql.searchInstagramMedia(tracker_id, {}, start_date, end_date)
  293.  
  294. statsObj = InstagramOfflineStats(tracker_id, media , is_comment)
  295. stats = statsObj.get_metrics()
  296.  
  297. # serialize stats result
  298. file_path = Config.monitors_metrics_filtered_path + "/%s/%s/instagram/" % (file_name_prefix, str(tracker_id))
  299. os.makedirs(file_path, exist_ok=True)
  300. with open(file_path + "%s.pickle" % start_date.strftime("%Y-%m-%d"), 'wb') as f:
  301. pickle.dump(stats, f)
  302. psql.cleanup()
  303. except Exception as e:
  304. ExceptionHandler.handle_exception("process_list error: "+str(e),"filtered_stats")
  305. traceback.print_exc()
  306.  
  307. def process_list(self,id, sub_list,start_date,end_date,file_name_prefix):
  308. for tracker_info in sub_list:
  309. try:
  310. tracker_id = tracker_info["tracker_id"]
  311. time_zone = tracker_info["time_zone"]
  312. # tracker_id_int = int(tracker_id)
  313. # if tracker_id_int != 1653 and tracker_id_int != 1651 and tracker_id_int != 1956:
  314. if self.specific_monitor_id is not None and int(tracker_id) != int(self.specific_monitor_id):
  315. continue
  316. print("Thread "+str(id)+" Processing: " + str(tracker_id))
  317. if tracker_info['follow'] is not None and tracker_info['follow'] != '':
  318. FilteredStats.retrieve_account_trackers_tweets_tracker(tracker_id, start_date, end_date, time_zone,file_name_prefix,tracker_info['follow'])
  319.  
  320. FilteredStats.retrieve_trackers_tweets_tracker(tracker_id, start_date, end_date, time_zone,file_name_prefix,self.skip_qi)
  321. except Exception as e:
  322. ExceptionHandler.handle_exception("process_list error: "+str(e),"filtered_stats")
  323. traceback.print_exc()
  324.  
  325. @staticmethod
  326. def _merge_filtered_stats(stats_list, NUM_OF_STATS):
  327. cassandra = CassandraClient()
  328. FilteredStats.NUM_OF_STATS = NUM_OF_STATS
  329. engagers = collections.Counter()
  330. influencers = collections.Counter()
  331. original_tweets_influencers = collections.Counter()
  332. verified_authors = collections.Counter()
  333. languages = collections.Counter()
  334. countriesDistribution = collections.Counter()
  335. citiesDistribution = collections.Counter()
  336. topics = collections.Counter()
  337. dialects = collections.Counter()
  338. accounts_types = collections.Counter()
  339. sources = collections.Counter()
  340. genders = collections.Counter()
  341. potential_impression = 0
  342. unique_potential_impression = 0
  343. unique_authors = 0
  344. original_tweets_potential_impression = 0
  345. original_tweets_unique_potential_impression = 0
  346. original_tweets_unique_authors = 0
  347. urls = collections.Counter()
  348. hashtags = collections.Counter()
  349. videos = collections.Counter()
  350. photos = collections.Counter()
  351.  
  352. videos_retweets = collections.Counter()
  353. photos_retweets = collections.Counter()
  354.  
  355. daily_counts = collections.OrderedDict()
  356. meta = dict()
  357. re_tweets = dict()
  358. re_tweets_counts = collections.Counter()
  359. top_retweeted_tweets = []
  360. sentiment = dict()
  361. overallsentiment = dict()
  362. overallsentiment_overtime = dict()
  363. sentiment_overtime = dict()
  364. benchmark = dict()
  365. #latest_posts = []
  366. top_words = collections.Counter()
  367.  
  368. topic_discovery = TopicDiscovery()
  369. topic_discovery_result = collections.Counter()
  370. topic_discovery_list = []
  371. trending_topics = []
  372. daily_topic_discovery = collections.OrderedDict()
  373.  
  374. favorites_count = 0
  375. retweets_count = 0
  376.  
  377. daily_favorites_count = {}
  378. daily_retweets_count = {}
  379.  
  380. content_type = {}
  381. content_type['spam'] = 0
  382. content_type['news'] = 0
  383. content_type['original'] = 0
  384. content_type['verses'] = 0
  385.  
  386. weights = []
  387. statistics_items_map = {
  388. 'ActivePeople': engagers, 'PopularPeople': influencers, 'OriginalTweetsPopularPeople': original_tweets_influencers,
  389. 'verifiedAuthors': verified_authors, 'LanguagesDistribution': languages,
  390. 'SourceDistribution': sources, 'Gender': genders, 'TopURLs': urls, 'ActiveHashtags': hashtags,
  391. 'CountriesDistribution': countriesDistribution, 'CitiesDistribution': citiesDistribution, 'Topics': topics,
  392. 'Dialects': dialects, 'AccountsTypes': accounts_types, 'TopKeywords': top_words
  393. }
  394. statistics_int_map = {
  395. 'PotentialImpression': potential_impression, 'UniquePotentialImpression':
  396. unique_potential_impression, 'UniqueAuthors': unique_authors,
  397. 'original_tweets_PotentialImpression': original_tweets_potential_impression,
  398. 'original_tweets_UniquePotentialImpression': original_tweets_unique_potential_impression,
  399. 'original_tweets_UniqueAuthors': original_tweets_unique_authors,
  400. }
  401. for stats in stats_list:
  402. for item in stats['statistics']:
  403. item_name = list(item.keys())[0]
  404. item_list = list(item.values())[0]
  405. for obj in item_list:
  406. if item_name in statistics_items_map:
  407. if item_name == 'PopularPeople' or item_name == 'verifiedAuthors':
  408. statistics_items_map[item_name][obj['id']] = int(obj['stats_count'])
  409. else:
  410. statistics_items_map[item_name][obj['id']] += int(obj['stats_count'])
  411. elif item_name in statistics_int_map:
  412. if item_name == 'PotentialImpression':
  413. potential_impression += int(obj['stats_count'])
  414. elif item_name == 'UniquePotentialImpression':
  415. unique_potential_impression += int(obj['stats_count'])
  416. elif item_name == 'UniqueAuthors':
  417. unique_authors += int(obj['stats_count'])
  418. if item_name == 'original_tweets_PotentialImpression':
  419. original_tweets_potential_impression += int(obj['stats_count'])
  420. elif item_name == 'original_tweets_UniquePotentialImpression':
  421. original_tweets_unique_potential_impression += int(obj['stats_count'])
  422. elif item_name == 'original_tweets_UniqueAuthors':
  423. original_tweets_unique_authors += int(obj['stats_count'])
  424. elif item_name == 'TopImages':
  425. photos[obj['id']] += int(obj['stats_count'])
  426. meta[obj['id']] = {'id': obj['id'], 'data': obj['src'], 'src': obj['src']}#{'thumb': obj['id']}
  427. elif item_name == 'TopVideos':
  428. videos[obj['id']] += int(obj['stats_count'])
  429. meta[obj['id']] = {'thumb': obj['data'], 'url': obj['id'], 'src': obj['src']}
  430. elif item_name == 'TopImages_retweets':
  431. photos_retweets[obj['id']] = int(obj['stats_count'])
  432. meta[obj['id']] = {'id': obj['id'], 'data': obj['src'], 'src': obj['src']}#{'thumb': obj['id']}
  433. elif item_name == 'TopVideos_retweets':
  434. videos_retweets[obj['id']] = int(obj['stats_count'])
  435. meta[obj['id']] = {'thumb': obj['data'], 'url': obj['id'], 'src': obj['src']}
  436.  
  437. for itemName, itemValue in stats['sentiment_stats'].items():
  438. sentiment_label = str(itemName)
  439. if sentiment_label in sentiment:
  440. sentiment[sentiment_label] += itemValue
  441. else:
  442. sentiment[sentiment_label] = itemValue
  443. if 'overall_sentiment_stats' in stats:
  444. for itemName, itemValue in stats['overall_sentiment_stats'].items():
  445. sentiment_label = str(itemName)
  446. if sentiment_label in overallsentiment:
  447. overallsentiment[sentiment_label] += itemValue
  448. else:
  449. overallsentiment[sentiment_label] = itemValue
  450. for item in stats['tweets_over_time']:
  451. daily_counts[item['id']] = item['stats_count']
  452. #for item in stats['latest_posts']:
  453. # latest_posts.append(item)
  454. for tweet in stats['top_retweeted_tweets']:
  455. if tweet['id_str'] in re_tweets_counts:
  456. re_tweets[tweet['id_str']]['retweet_count'] += tweet['retweet_count']
  457. re_tweets_counts[tweet['id_str']] += tweet['retweet_count']
  458. else:
  459. re_tweets_counts[tweet['id_str']] = tweet['retweet_count']
  460. re_tweets[tweet['id_str']] = tweet
  461. #TODO: Disabling topics until furhter notice. This has to be returned once it is improved
  462. #if 'topics' in stats and len(stats['topics']['top_words']) > 0:
  463. # top_words = stats['topics']['top_words']
  464. # weights = stats['topics']['weights']
  465. if 'overallsentiment_overtime' in stats:
  466. for itemName, itemValues in stats['overallsentiment_overtime'].items():
  467. overallsentiment_label = str(itemName)
  468. if overallsentiment_label in overallsentiment_overtime:
  469. for value in itemValues:
  470. overallsentiment_overtime[overallsentiment_label].append(value)
  471. else:
  472. overallsentiment_overtime[overallsentiment_label] = itemValues
  473. if 'sentiment_overtime' in stats:
  474. for itemName, itemValues in stats['sentiment_overtime'].items():
  475. overallsentiment_label = str(itemName)
  476. if overallsentiment_label in sentiment_overtime:
  477. for value in itemValues:
  478. sentiment_overtime[overallsentiment_label].append(value)
  479. else:
  480. sentiment_overtime[overallsentiment_label] = itemValues
  481. # if overallsentiment_label in overallsentiment_overtime:
  482. # overallsentiment[overallsentiment_label] += itemValue
  483. # else:
  484. # overallsentiment[overallsentiment_label] = itemValue
  485.  
  486. if 'content_type' in stats:
  487. content_type['spam'] = content_type['spam'] + stats['content_type']['spam']
  488. content_type['news'] = content_type['news'] + stats['content_type']['news']
  489. content_type['original'] = content_type['original'] + stats['content_type']['original']
  490. content_type['verses'] = content_type['verses'] + stats['content_type']['verses']
  491.  
  492. if 'topic_discovery' in stats and stats['topic_discovery'] is not None\
  493. and len(stats['topic_discovery']) > 0:
  494. topic_discovery_list.append(stats['topic_discovery'])
  495.  
  496. if 'trending_topics' in stats and stats['trending_topics'] is not None\
  497. and len(stats['trending_topics']) > 0:
  498. for topic in stats['trending_topics']:
  499. day_str = topic['date']
  500. if day_str in daily_topic_discovery:
  501. daily_topic_discovery[day_str].append(topic['topics'])
  502. else:
  503. daily_topic_discovery[day_str] = []
  504. daily_topic_discovery[day_str].append(topic['topics'])
  505.  
  506. if 'daily_interactions' in stats and stats['daily_interactions'] is not None:
  507. for favorite_count in stats['daily_interactions']['favorites_count']:
  508. day_str = favorite_count['id']
  509. if day_str not in daily_favorites_count:
  510. daily_favorites_count[day_str] = int(favorite_count['count'])
  511. else:
  512. daily_favorites_count[day_str] += int(favorite_count['count'])
  513.  
  514. for retweet_count in stats['daily_interactions']['retweets_count']:
  515. day_str = retweet_count['id']
  516. if day_str not in daily_retweets_count:
  517. daily_retweets_count[day_str] = int(retweet_count['count'])
  518. else:
  519. daily_retweets_count[day_str] += int(retweet_count['count'])
  520.  
  521. if 'interactions' in stats and stats['interactions'] is not None:
  522. favorites_count += int(stats['interactions']['favorites_count'])
  523. retweets_count += int(stats['interactions']['retweets_count'])
  524.  
  525. if 'benchmark' in stats:
  526. for name,value in stats['benchmark'].items():
  527. if name in benchmark:
  528. benchmark[name]+= value
  529. else:
  530. benchmark[name] = value
  531.  
  532. # merge avg statistics for benchmark
  533. if 'posts_avg' in benchmark:
  534. benchmark['posts_avg'] = benchmark['posts_avg'] // len(stats_list)
  535.  
  536. if 'positive_posts_avg' in benchmark:
  537. benchmark['positive_posts_avg'] = benchmark['positive_posts_avg'] // len(stats_list)
  538.  
  539. if 'negative_posts_avg' in benchmark:
  540. benchmark['negative_posts_avg'] = benchmark['negative_posts_avg'] // len(stats_list)
  541.  
  542.  
  543. topic_discovery_result = topic_discovery.summarize_topics(topic_discovery_list)
  544. for tracker_topic in topic_discovery_result:
  545. keywords_weights = tracker_topic['keywords_weights']
  546. sort_keywords = sorted(keywords_weights, key=lambda k: keywords_weights[k], reverse=False)
  547. top5 = sort_keywords[:5]
  548. tracker_topic["top5"] = top5
  549.  
  550. trending_topics = []
  551. for day, unmerged_topics in daily_topic_discovery.items():
  552. merged_topics = topic_discovery.summarize_topics(unmerged_topics)
  553. for tracker_topic in merged_topics:
  554. keywords_weights = tracker_topic['keywords_weights']
  555. sort_keywords = sorted(keywords_weights, key=lambda k: keywords_weights[k], reverse=False)
  556. top5 = sort_keywords[:5]
  557. tracker_topic["top5"] = top5
  558.  
  559. trending_topics.append({'date': day, 'topics': merged_topics})
  560.  
  561. trending_topics.reverse()
  562. daily_interactions = {}
  563. sorted_daily_favorites_count = collections.OrderedDict(sorted(
  564. daily_favorites_count.items()))
  565.  
  566. daily_interactions["favorites_count"] = [{'id': day, 'count': count}
  567. for day, count in sorted_daily_favorites_count.items()]
  568.  
  569. sorted_daily_retweets_count = collections.OrderedDict(sorted(
  570. daily_retweets_count.items()))
  571.  
  572. daily_interactions["retweets_count"] = [{'id': day, 'count': count}
  573. for day, count in sorted_daily_retweets_count.items()]
  574.  
  575. top_re_tweets_idxs = re_tweets_counts.most_common(FilteredStats.NUM_OF_STATS)
  576. for idx, count in top_re_tweets_idxs:
  577. tweet = re_tweets[idx]
  578. tweet['retweet_count'] = count
  579. top_retweeted_tweets.append(tweet)
  580.  
  581. statistics = []
  582. top_engagers = engagers.most_common(FilteredStats.NUM_OF_STATS)
  583. active_people = []
  584. for i in top_engagers:
  585. author_id = cassandra.get_author_id(i[0])
  586. active_people.append({'id': i[0], 'stats_count': i[1] , 'user_id': author_id})
  587. statistics.append({"ActivePeople": active_people})
  588.  
  589. top_influencers = influencers.most_common(FilteredStats.NUM_OF_STATS)
  590. popular_people = []
  591. for i in top_influencers:
  592. author_id = cassandra.get_author_id(i[0])
  593. popular_people.append({'id': i[0], 'stats_count': i[1], 'user_id': author_id})
  594. statistics.append( {"PopularPeople": popular_people})
  595.  
  596. original_tweets_top_influencers = original_tweets_influencers.most_common(FilteredStats.NUM_OF_STATS)
  597. original_tweets_popular_people = []
  598. for i in original_tweets_top_influencers:
  599. author_id = cassandra.get_author_id(i[0])
  600. original_tweets_popular_people.append({'id': i[0], 'stats_count': i[1], 'user_id': author_id})
  601. statistics.append({"OriginalTweetsPopularPeople": original_tweets_popular_people})
  602.  
  603. top_verified_authors = verified_authors.most_common(FilteredStats.NUM_OF_STATS)
  604. verified_authors = []
  605. for i in top_verified_authors:
  606. author_id = cassandra.get_author_id(i[0])
  607. verified_authors.append({'id': i[0], 'stats_count': i[1] , 'user_id': author_id})
  608. statistics.append({"verifiedAuthors": verified_authors})
  609.  
  610. top_languages = languages.most_common(FilteredStats.NUM_OF_STATS)
  611. statistics.append(
  612. {"LanguagesDistribution": [{'id': i[0], 'stats_count': i[1]} for i in top_languages]})
  613. top_countries = countriesDistribution.most_common(FilteredStats.NUM_OF_STATS)
  614. statistics.append(
  615. {"CountriesDistribution": [{'id': i[0], 'stats_count': i[1]} for i in top_countries]})
  616. top_cities = citiesDistribution.most_common(FilteredStats.NUM_OF_STATS)
  617. statistics.append(
  618. {"CitiesDistribution": [{'id': i[0], 'stats_count': i[1]} for i in top_cities]})
  619. top_topics = topics.most_common(FilteredStats.NUM_OF_STATS)
  620. statistics.append(
  621. {"Topics": [{'id': i[0], 'stats_count': i[1]} for i in top_topics]})
  622. top_dialects = dialects.most_common(FilteredStats.NUM_OF_STATS)
  623. statistics.append(
  624. {"Dialects": [{'id': i[0], 'stats_count': i[1]} for i in top_dialects]})
  625. top_accounts_types = accounts_types.most_common(FilteredStats.NUM_OF_STATS)
  626. statistics.append(
  627. {"AccountsTypes": [{'id': i[0], 'stats_count': i[1]} for i in top_accounts_types]})
  628. top_sources = sources.most_common(FilteredStats.NUM_OF_STATS)
  629. statistics.append(
  630. {"SourceDistribution": [{'id': i[0], 'stats_count': i[1]} for i in top_sources]})
  631. top_genders = genders.most_common(FilteredStats.NUM_OF_STATS)
  632. statistics.append(
  633. {"Gender": [{'id': i[0], 'stats_count': i[1]} for i in top_genders]})
  634. top_urls = urls.most_common(FilteredStats.NUM_OF_STATS)
  635. statistics.append(
  636. {"TopURLs": [{'id': i[0], 'stats_count': i[1]} for i in top_urls]})
  637. top_hashtags = hashtags.most_common(FilteredStats.NUM_OF_STATS)
  638. statistics.append(
  639. {"ActiveHashtags": [{'id': i[0], 'stats_count': i[1]} for i in top_hashtags]})
  640.  
  641. top_keywords = top_words.most_common(FilteredStats.NUM_OF_STATS)
  642. statistics.append(
  643. {"TopKeywords": [{'id': i[0], 'stats_count': i[1]} for i in top_keywords]})
  644.  
  645. statistics.append({'PotentialImpression': [{'id': 'PotentialImpression',
  646. 'stats_count': potential_impression}]})
  647. statistics.append({'UniquePotentialImpression': [{'id': 'UniquePotentialImpression',
  648. 'stats_count': unique_potential_impression}]})
  649. statistics.append({'UniqueAuthors': [{'id': 'UniqueAuthors', 'stats_count': unique_authors}]})
  650.  
  651. statistics.append({'original_tweets_PotentialImpression': [{'id': 'original_tweets_PotentialImpression',
  652. 'stats_count': original_tweets_potential_impression}]})
  653. statistics.append({'original_tweets_UniquePotentialImpression': [{'id': 'original_tweets_UniquePotentialImpression',
  654. 'stats_count': original_tweets_unique_potential_impression}]})
  655. statistics.append({'original_tweets_UniqueAuthors': [{'id': 'original_tweets_UniqueAuthors', 'stats_count': original_tweets_unique_authors}]})
  656.  
  657. top_photos = photos.most_common(FilteredStats.NUM_OF_STATS)
  658. statistics.append(
  659. {"TopImages": [{'id': meta[i[0]]['id'], 'src': meta[i[0]]['src'], 'data': meta[i[0]]['data'] + ":thumb", 'stats_count': i[1]} for i in top_photos]})
  660.  
  661. top_videos = videos.most_common(FilteredStats.NUM_OF_STATS)
  662. statistics.append(
  663. {"TopVideos": [{'id': meta[i[0]]['url'], 'stats_count': i[1], 'data': meta[i[0]]['thumb'], 'src': meta[i[0]]['src']}
  664. for i in top_videos]})
  665.  
  666. top_photos = photos_retweets.most_common(FilteredStats.NUM_OF_STATS)
  667. statistics.append(
  668. {"TopImages_retweets": [{'id': meta[i[0]]['id'], 'src': meta[i[0]]['src'], 'data': meta[i[0]]['data'] + ":thumb", 'stats_count': i[1]} for i in top_photos]})
  669.  
  670. top_videos = videos_retweets.most_common(FilteredStats.NUM_OF_STATS)
  671. statistics.append(
  672. {"TopVideos_retweets": [{'id': meta[i[0]]['url'], 'stats_count': i[1], 'data': meta[i[0]]['thumb'], 'src': meta[i[0]]['src']}
  673. for i in top_videos]})
  674.  
  675. authors_statistics = {}
  676. authors_statistics['percentage'] = 0
  677. authors_statistics['total_count'] = 0
  678.  
  679.  
  680. total_auth = unique_authors
  681. if total_auth > 0:
  682. total_verified_auth = len(verified_authors)
  683. authors_statistics['percentage'] = round((total_verified_auth / total_auth) * 100, 2)
  684. authors_statistics['total_count'] = total_verified_auth
  685.  
  686. statistics.append({'verifiedAuthorsStatistics': authors_statistics})
  687.  
  688. interactions = {"favorites_count" : favorites_count ,
  689. "retweets_count" : retweets_count }
  690. return {
  691. #'latest_posts': latest_posts[-FilteredStats.NUM_OF_STATS:],
  692. 'statistics': statistics,
  693. 'top_retweeted_tweets': top_retweeted_tweets,
  694. 'tweets_over_time': [{'id': day, 'stats_count': count} for day, count in sorted(daily_counts.items())],
  695. 'sentiment_stats': sentiment,
  696. 'overall_sentiment_stats': overallsentiment,
  697. #'topics': {'top_words': top_words, 'weights': weights},
  698. 'overallsentiment_overtime': overallsentiment_overtime,
  699. 'sentiment_overtime': sentiment_overtime,
  700. 'content_type': content_type,
  701. 'topic_discovery' : topic_discovery_result,
  702. 'trending_topics' : trending_topics,
  703. 'interactions' : interactions,
  704. 'daily_interactions' : daily_interactions,
  705. 'benchmark' : benchmark
  706. }
  707.  
  708. @staticmethod
  709. def get_report_stats(report_id, report_name, NUM_OF_STATS=10):
  710. file_path = Config.REPORTS_PATH + "%s/%s" % (str(report_id), report_name)
  711. try:
  712. stats_list = json.load(open(file_path, 'rb'))
  713. stats = FilteredStats._merge_filtered_stats([stats_list['twitter_metrics']], NUM_OF_STATS)
  714. except FileNotFoundError:
  715. stats = {}
  716.  
  717. return stats
  718.  
  719. @staticmethod
  720. def get_filtered_stats(tracker_id, time_duration, filter_label, NUM_OF_STATS=10):
  721. if time_duration == 'yesterday':
  722. date = datetime.today() - timedelta(days=1)
  723.  
  724. file_path = Config.monitors_metrics_filtered_path + "%s/%s/%s.pickle" \
  725. % ('daily', str(tracker_id), date.strftime("%Y-%m-%d"))
  726. try:
  727. stats_list = pickle.load(open(file_path, 'rb'))
  728. # Send one stats to merge filters to cut top X
  729. stats = FilteredStats._merge_filtered_stats([stats_list], NUM_OF_STATS)
  730. except FileNotFoundError:
  731. stats = {}
  732. elif time_duration == 'last-week':
  733. stats_list = []
  734. for i in range(7, 0, -1):
  735. date = datetime.today() - timedelta(days=i)
  736. file_path = Config.monitors_metrics_filtered_path + "%s/%s/%s.pickle" \
  737. % ('daily', str(tracker_id), date.strftime("%Y-%m-%d"))
  738. try:
  739. stats_list.append(pickle.load(open(file_path, 'rb')))
  740. except FileNotFoundError:
  741. # only couple of days found and rest of the week wasn't found
  742. continue
  743. stats = FilteredStats._merge_filtered_stats(stats_list, NUM_OF_STATS)
  744. elif time_duration == 'last-month':
  745. stats_list = []
  746. for i in range(30, 0, -1):
  747. date = datetime.today() - timedelta(days=i)
  748. file_path = Config.monitors_metrics_filtered_path + "%s/%s/%s.pickle" \
  749. % ('daily', str(tracker_id), date.strftime("%Y-%m-%d"))
  750. try:
  751. stats_list.append(pickle.load(open(file_path, 'rb')))
  752. except FileNotFoundError:
  753. # only couple of days found and rest of the week wasn't found
  754. continue
  755. stats = FilteredStats._merge_filtered_stats(stats_list, NUM_OF_STATS)
  756. else:
  757. stats_list = []
  758. file_path = Config.monitors_metrics_filtered_path + "%s/%s/%s.pickle" \
  759. % (time_duration, str(tracker_id), filter_label)
  760. try:
  761. stats_list.append(pickle.load(open(file_path, 'rb')))
  762. except FileNotFoundError:
  763. #This error should not happen! The file may have been deleted
  764. ExceptionHandler.handle_exception("Cannot load filtered stats as file "+str(file_path)+" does not exists!",
  765. "filtered_stats.py")
  766. pass
  767. stats = FilteredStats._merge_filtered_stats(stats_list, NUM_OF_STATS)
  768. return stats
  769.  
  770. @staticmethod
  771. def get_filtered_stats_by_dates(tracker_id, start_date, end_date, NUM_OF_STATS=10, ignore_missing = True):
  772. start_date = datetime.strptime(start_date, '%Y-%m-%d')
  773. end_date = datetime.strptime(end_date, '%Y-%m-%d')
  774. stats_list = []
  775. while start_date <= end_date:
  776. file_path = Config.monitors_metrics_filtered_path + "%s/%s/%s.pickle" \
  777. % ('daily', str(tracker_id), start_date.strftime("%Y-%m-%d"))
  778. try:
  779. stats_list.append(pickle.load(open(file_path, 'rb')))
  780. except FileNotFoundError:
  781. print("file not found for tracker_id" + str(tracker_id) + " and date" + start_date.strftime("%Y-%m-%d"))
  782. if not ignore_missing:
  783. return {'missing_files':True}
  784. start_date = start_date + timedelta(days=1)
  785.  
  786. stats = FilteredStats._merge_filtered_stats(stats_list, NUM_OF_STATS)
  787. return stats
  788.  
  789. @staticmethod
  790. def get_account_filtered_stats_by_dates(tracker_id, start_date, end_date, ignore_missing = True, stats_name='benchmark', page_number = 0):
  791. # stats_name = 'audience'
  792. start_date = datetime.strptime(start_date, '%Y-%m-%d')
  793. end_date = datetime.strptime(end_date, '%Y-%m-%d')
  794. stats_list = []
  795. while start_date <= end_date:
  796. file_path = Config.account_monitor_metrics_filtered_path + "%s/%s/%s/%s.pickle" \
  797. % (stats_name.replace(" ", "_"), 'daily', str(tracker_id), start_date.strftime("%Y-%m-%d"))
  798. try:
  799. stats_list.append(pickle.load(open(file_path, 'rb')))
  800. except FileNotFoundError:
  801. print(" file not found for tracker_id" + str(tracker_id) + " and date" + start_date.strftime("%Y-%m-%d"))
  802. if not ignore_missing:
  803. return {'missing_files':True}
  804. start_date = start_date + timedelta(days=1)
  805. if stats_name == 'benchmark':
  806. stats = FilteredStats._merge_account_benchmark(stats_list)
  807. elif stats_name == 'audience':
  808. print("Audience")
  809. stats = FilteredStats._merge_account_audience_stats(stats_list)
  810. elif stats_name == 'content':
  811. stats = FilteredStats._merge_account_content_stats(stats_list)
  812. elif stats_name =="engagement":
  813. stats = FilteredStats._merge_account_engagement_stats(stats_list)
  814. elif stats_name == "customer care":
  815. stats = FilteredStats._merge_account_customercare_stats(stats_list)
  816. elif stats_name == "customer_care_questions_list":
  817. stats = FilteredStats._merge_account_questions_stats(tracker_id, stats_list, int(page_number))
  818. return stats
  819.  
  820. @staticmethod
  821. def get_qi_predefined_filtered_stats(tracker_id, time_duration, inspect_type, inspect_name, NUM_OF_STATS=10):
  822. # daily_path = 'quick_inspect/daily'
  823. if time_duration == 'yesterday':
  824. date = datetime.today() - timedelta(days=1)
  825.  
  826. # file_path = Config.monitors_metrics_filtered_path + "%s/%s/%s.pickle" \
  827. # % (daily_path,str(tracker_id), date.strftime("%Y-%m-%d"))
  828. try:
  829. stats_list = QuickInspector.get_inspector_stats(tracker_id, date.strftime("%Y-%m-%d"), inspect_type, 'predefinedqi', date.strftime("%Y-%m-%d"), inspect_name)
  830. # stats_list = pickle.load(open(file_path, 'rb'))
  831. # Send one stats to merge filters to cut top X
  832. stats = FilteredStats._merge_qi_stats([stats_list], NUM_OF_STATS)
  833. except FileNotFoundError:
  834. stats = {}
  835. elif time_duration == 'last-week':
  836. stats_list = []
  837. for i in range(7, 0, -1):
  838. date = datetime.today() - timedelta(days=i)
  839. # file_path = Config.monitors_metrics_filtered_path + "%s/%s/%s.pickle" \
  840. # % (daily_path,str(tracker_id), date.strftime("%Y-%m-%d"))
  841. try:
  842. inspect = QuickInspector.get_inspector_stats(tracker_id, date.strftime("%Y-%m-%d"), inspect_type, 'predefinedqi', date.strftime("%Y-%m-%d"), inspect_name)
  843. stats_list.append(inspect)
  844. except FileNotFoundError:
  845. # only couple of days found and rest of the week wasn't found
  846. continue
  847. stats = FilteredStats._merge_qi_stats(stats_list, NUM_OF_STATS)
  848. elif time_duration == 'last-month':
  849. stats_list = []
  850. for i in range(30, 0, -1):
  851. date = datetime.today() - timedelta(days=i)
  852. # file_path = Config.monitors_metrics_filtered_path + "%s/%s/%s.pickle" \
  853. # % (daily_path,str(tracker_id), date.strftime("%Y-%m-%d"))
  854. try:
  855. inspect = QuickInspector.get_inspector_stats(tracker_id, date.strftime("%Y-%m-%d"), inspect_type, 'predefinedqi', date.strftime("%Y-%m-%d"), inspect_name)
  856.  
  857. stats_list.append(inspect)
  858. except FileNotFoundError:
  859. # only couple of days found and rest of the week wasn't found
  860. continue
  861. stats = FilteredStats._merge_qi_stats(stats_list, NUM_OF_STATS)
  862. else:
  863. ExceptionHandler.handle_exception("Error in QI filtered stats "+str(time_duration)+" does not exists!",
  864. "filtered_stats.py")
  865. return stats
  866.  
  867. @staticmethod
  868. def get_metric_by_name(tracker_id, filter_type, filter_label, metric_name):
  869. NUM_OF_STATS = 100
  870. stats = FilteredStats.get_filtered_stats(tracker_id, filter_type,filter_label, NUM_OF_STATS)
  871. # metric names are
  872. if metric_name == "ActivePeople" or metric_name == "PopularPeople" or metric_name == "ActiveHashtags"or metric_name == "TopImages" or metric_name == "TopVideos" or metric_name == "TopURLs" or metric_name == "TopKeywords":
  873. for index in range(len(stats['statistics'])):
  874. for key in stats['statistics'][index]:
  875. if key == metric_name:
  876. return(stats['statistics'][index][key])
  877.  
  878.  
  879. if metric_name == "top_retweeted_tweets":
  880. return stats['top_retweeted_tweets']
  881.  
  882.  
  883. @staticmethod
  884. def _merge_qi_stats(stats_list, NUM_OF_STATS):
  885. cassandra = CassandraClient()
  886.  
  887. tweet_cnt = 0
  888. unique_authors = 0
  889. engagers = collections.Counter()
  890. potential_impression = 0
  891. unique_potential_impression = 0
  892. hashtags = collections.Counter()
  893. overallsentiment = dict()
  894. sentiment = dict()
  895. hours = collections.Counter()
  896. re_tweets_counts = collections.Counter()
  897. top_retweeted_tweets = []
  898. re_tweets = dict()
  899. FilteredStats.NUM_OF_STATS = NUM_OF_STATS
  900.  
  901.  
  902.  
  903. weights = []
  904. statistics_items_map = {
  905. 'ActivePeople': engagers, 'ActiveHashtags': hashtags , 'tweets_over_hours': hours
  906. }
  907. statistics_int_map = {
  908. 'PotentialImpression': potential_impression, 'UniquePotentialImpression':
  909. unique_potential_impression, 'UniqueAuthors': unique_authors, 'tweets_num': tweet_cnt
  910. }
  911. for stats in stats_list:
  912. if stats == {}:
  913. continue
  914. if 'statistics' in stats:
  915. for item in stats['statistics']:
  916. item_name = list(item.keys())[0]
  917. item_list = list(item.values())[0]
  918. for obj in item_list:
  919. if item_name in statistics_items_map:
  920. statistics_items_map[item_name][obj['id']] += int(obj['stats_count'])
  921. elif item_name in statistics_int_map:
  922. if item_name == 'PotentialImpression':
  923. potential_impression += int(obj['stats_count'])
  924. elif item_name == 'UniquePotentialImpression':
  925. unique_potential_impression += int(obj['stats_count'])
  926. elif item_name == 'UniqueAuthors':
  927. unique_authors += int(obj['stats_count'])
  928.  
  929. if 'sentiment_stats' in stats:
  930. for itemName, itemValue in stats['sentiment_stats'].items():
  931. sentiment_label = str(itemName)
  932. if sentiment_label in sentiment:
  933. sentiment[sentiment_label] += itemValue
  934. else:
  935. sentiment[sentiment_label] = itemValue
  936.  
  937. if 'overall_sentiment_stats' in stats:
  938. for itemName, itemValue in stats['overall_sentiment_stats'].items():
  939. sentiment_label = str(itemName)
  940. if sentiment_label in overallsentiment:
  941. overallsentiment[sentiment_label] += itemValue
  942. else:
  943. overallsentiment[sentiment_label] = itemValue
  944. #for item in stats['tweets_over_time']:
  945. # daily_counts[item['id']] = item['stats_count']
  946. #for item in stats['latest_posts']:
  947. # latest_posts.append(item)
  948. for tweet in stats['top_retweeted_tweets']:
  949. if tweet['id_str'] in re_tweets_counts:
  950. re_tweets[tweet['id_str']]['retweet_count'] += tweet['retweet_count']
  951. re_tweets_counts[tweet['id_str']] += tweet['retweet_count']
  952. else:
  953. re_tweets_counts[tweet['id_str']] = tweet['retweet_count']
  954. re_tweets[tweet['id_str']] = tweet
  955.  
  956.  
  957. if 'tweets_num' in stats and stats['tweets_num'] is not None:
  958. tweet_cnt += int(stats['tweets_num'])
  959.  
  960.  
  961.  
  962. top_re_tweets_idxs = re_tweets_counts.most_common(FilteredStats.NUM_OF_STATS)
  963. for idx, count in top_re_tweets_idxs:
  964. tweet = re_tweets[idx]
  965. tweet['retweet_count'] = count
  966. top_retweeted_tweets.append(tweet)
  967.  
  968. statistics = []
  969.  
  970. top_hashtags = hashtags.most_common(FilteredStats.NUM_OF_STATS)
  971. statistics.append(
  972. {"ActiveHashtags": [{'id': i[0], 'stats_count': i[1]} for i in top_hashtags]})
  973.  
  974. # top_keywords = top_words.most_common(FilteredStats.NUM_OF_STATS)
  975. # statistics.append(
  976. # {"TopKeywords": [{'id': i[0], 'stats_count': i[1]} for i in top_keywords]})
  977.  
  978. statistics.append({'PotentialImpression': [{'id': 'PotentialImpression',
  979. 'stats_count': potential_impression}]})
  980. statistics.append({'UniquePotentialImpression': [{'id': 'UniquePotentialImpression',
  981. 'stats_count': unique_potential_impression}]})
  982. statistics.append({'UniqueAuthors': [{'id': 'UniqueAuthors', 'stats_count': unique_authors}]})
  983.  
  984.  
  985. return {
  986. 'tweets_num': tweet_cnt,
  987. 'statistics': statistics,
  988. 'top_retweeted_tweets': top_retweeted_tweets,
  989. 'sentiment_stats': sentiment,
  990. 'overall_sentiment_stats': overallsentiment,
  991.  
  992. }
  993.  
  994.  
  995. @staticmethod
  996. def _merge_account_benchmark(stats_list):
  997. benchmark = dict()
  998. initial_followers = 0
  999. followers_growth = 0
  1000. for stats in stats_list:
  1001. if 'initial_followers' in stats['benchmark']:
  1002. initial_followers = stats['benchmark']['initial_followers']
  1003.  
  1004. if 'followers_growth' in stats['benchmark']:
  1005. followers_growth = stats['benchmark']['followers_growth']
  1006.  
  1007. for name,value in stats['benchmark'].items():
  1008. if name in benchmark:
  1009. benchmark[name]+= value
  1010. else:
  1011. benchmark[name] = value
  1012.  
  1013. benchmark['initial_followers'] = initial_followers
  1014.  
  1015. if 'avg_posts' in benchmark:
  1016. benchmark['avg_posts'] = benchmark['avg_posts'] // len(stats_list)
  1017.  
  1018. if 'avg_positive_posts' in benchmark:
  1019. benchmark['avg_positive_posts'] = benchmark['avg_positive_posts'] // len(stats_list)
  1020.  
  1021. if 'avg_negative_posts' in benchmark:
  1022. benchmark['avg_negative_posts'] = benchmark['avg_negative_posts'] // len(stats_list)
  1023.  
  1024. if 'response_time' in benchmark:
  1025. benchmark['response_time'] = str(timedelta(seconds=int(benchmark['response_time']) // len(stats_list)))
  1026.  
  1027. if 'response_time_positive' in benchmark:
  1028. benchmark['response_time_positive'] = str(timedelta(seconds=int(benchmark['response_time_positive']) // len(stats_list)))
  1029.  
  1030. if 'response_time_negative' in benchmark:
  1031. benchmark['response_time_negative'] = str(timedelta(seconds=int(benchmark['response_time_negative']) // len(stats_list)))
  1032.  
  1033.  
  1034. if 'response_rate' in benchmark:
  1035. benchmark['response_rate'] = round(benchmark['response_rate'] / len(stats_list),2) * 100
  1036. if 'response_rate_positive' in benchmark:
  1037. benchmark['response_rate_positive'] = round(benchmark['response_rate_positive'] / len(stats_list),2)
  1038. if 'response_rate_negative' in benchmark:
  1039. benchmark['response_rate_negative'] = round(benchmark['response_rate_negative'] / len(stats_list),2)
  1040. return {
  1041. 'benchmark' : benchmark
  1042. }
  1043.  
  1044. @staticmethod
  1045. def _merge_account_content_stats(stats_list):
  1046.  
  1047. tweets_over_time = collections.Counter()
  1048. content_type = collections.Counter()
  1049.  
  1050. total_tweets = 0
  1051. total_tweets_today = 0
  1052. total_tweets_last_week = 0
  1053. total_tweets_last_month = 0
  1054.  
  1055. for stats in stats_list:
  1056. if stats == {}:
  1057. continue
  1058. if 'content_type' in stats:
  1059. for item in stats['content_type']:
  1060. content_type[item['name']] += item['value']
  1061.  
  1062. if 'tweets_over_time' in stats:
  1063. for item in stats['tweets_over_time']:
  1064. tweets_over_time[item['name']] += item['value']
  1065.  
  1066. if 'total_tweets' in stats:
  1067. total_tweets += stats['total_tweets']
  1068.  
  1069. if 'total_tweets_today' in stats:
  1070. total_tweets_today += stats['total_tweets_today']
  1071.  
  1072. if 'total_tweets_last_week' in stats:
  1073. total_tweets_last_week += stats['total_tweets_last_week']
  1074.  
  1075. if 'total_tweets_last_month' in stats:
  1076. total_tweets_last_month += stats['total_tweets_last_month']
  1077.  
  1078. response = {}
  1079.  
  1080. tweets_over_time = tweets_over_time.most_common(FilteredStats.NUM_OF_STATS)
  1081. content_type = content_type.most_common(FilteredStats.NUM_OF_STATS)
  1082.  
  1083. response["tweets_over_time"] = [{'name': i[0], 'value': i[1]} for i in tweets_over_time]
  1084. response["content_type"] = [{'name': i[0], 'value': i[1]} for i in content_type]
  1085. response['total_tweets_today'] = total_tweets_today
  1086. response['total_tweets_last_week'] = total_tweets_last_week
  1087. response['total_tweets_last_month'] = total_tweets_last_month
  1088.  
  1089.  
  1090. return response
  1091.  
  1092.  
  1093. @staticmethod
  1094. def _merge_account_audience_stats(stats_list):
  1095.  
  1096. statistics = []
  1097.  
  1098. avgFollowersChange = 0
  1099. maxFollowersChange = 0
  1100. maxFollowersChangeDate = ' '
  1101. totalFollowers = 0
  1102. totalFollowersChange = 0
  1103.  
  1104. followers_over_time = []
  1105. TopEngagers = []
  1106. TopInfluencers = []
  1107. TopFans = []
  1108. TopRetweeters = []
  1109.  
  1110. for stats in stats_list:
  1111. if stats == {}:
  1112. continue
  1113. if 'avgFollowersChange' in stats:
  1114. avgFollowersChange+= stats['avgFollowersChange']
  1115. if 'maxFollowersChange' in stats:
  1116. maxFollowersChange = max(maxFollowersChange, stats['maxFollowersChange'])
  1117. if (maxFollowersChange == stats['maxFollowersChange']):
  1118. maxFollowersChangeDate = stats['maxFollowersChangeDate']
  1119.  
  1120. if 'totalFollowers' in stats:
  1121. totalFollowers+= stats['totalFollowers']
  1122. if 'totalFollowersChange' in stats:
  1123. totalFollowersChange+= stats['totalFollowersChange']
  1124.  
  1125. if 'followers_over_time' in stats:
  1126. for item in stats['followers_over_time']:
  1127. followers_over_time.append(item)
  1128.  
  1129. if 'statistics' in stats:
  1130. for item in stats['statistics']:
  1131. item_name = list(item.keys())[0]
  1132. item_list = list(item.values())[0]
  1133. for obj in item_list:
  1134. if item_name == 'TopEngagers':
  1135. item = FilteredStats.search_list_objects_by_obj(TopEngagers, obj)
  1136. if item:
  1137. TopEngagers.remove(item)
  1138. item['value'] += obj['value']
  1139. TopEngagers.append(item)
  1140. else:
  1141. TopEngagers.append(obj)
  1142.  
  1143. if item_name == 'TopInfluencers':
  1144. item = FilteredStats.search_list_objects_by_obj(TopInfluencers, obj)
  1145. if item:
  1146. TopInfluencers.remove(item)
  1147. item['value'] += obj['value']
  1148. TopInfluencers.append(item)
  1149. else:
  1150. TopInfluencers.append(obj)
  1151.  
  1152. if item_name == 'TopFans':
  1153. item = FilteredStats.search_list_objects_by_obj(TopFans, obj)
  1154. if item:
  1155. TopFans.remove(item)
  1156. item['value'] += obj['value']
  1157. TopFans.append(item)
  1158. else:
  1159. TopFans.append(obj)
  1160.  
  1161. if item_name == 'TopRetweeters':
  1162. item = FilteredStats.search_list_objects_by_obj(TopRetweeters, obj)
  1163. if item:
  1164. TopRetweeters.remove(item)
  1165. item['value'] += obj['value']
  1166. TopRetweeters.append(item)
  1167. else:
  1168. TopRetweeters.append(obj)
  1169.  
  1170.  
  1171.  
  1172. avgFollowersChange = avgFollowersChange // len(stats_list)
  1173. totalFollowers = totalFollowers // len(stats_list)
  1174. totalFollowersChange = totalFollowersChange // len(stats_list)
  1175.  
  1176. TopEngagers = sorted(TopEngagers, key=lambda k: k['value'], reverse=True)
  1177. TopInfluencers = sorted(TopInfluencers, key=lambda k: k['value'], reverse=True)
  1178. TopFans = sorted(TopFans, key=lambda k: k['value'], reverse=True)
  1179. TopRetweeters = sorted(TopRetweeters, key=lambda k: k['value'], reverse=True)
  1180.  
  1181. return {
  1182. 'followers_over_time': followers_over_time,
  1183. 'statistics': [{'TopEngagers': TopEngagers[:10]},{'TopInfluencers': TopInfluencers[:10]},{'TopFans': TopFans[:10]},{'TopRetweeters': TopRetweeters[:10]}],
  1184. 'totalFollowers': totalFollowers,
  1185. 'totalFollowersChange': totalFollowersChange,
  1186. 'maxFollowersChange': maxFollowersChange,
  1187. 'maxFollowersChangeDate': maxFollowersChangeDate,
  1188. 'avgFollowersChange': avgFollowersChange,
  1189. }
  1190.  
  1191. @staticmethod
  1192. def _merge_account_engagement_stats(stats_list):
  1193.  
  1194. avg_interaction_per_day = 0
  1195. max_interaction = 0
  1196. max_interaction_date = ' '
  1197.  
  1198. interactions_distribution = collections.Counter()
  1199. posts_engagement = []
  1200. top_engaged_posts = []
  1201. user_activity = [
  1202. {'name': 'Saturday', 'value': [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},
  1203. {'name': 'Sunday', 'value': [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},
  1204. {'name': 'Monday', 'value': [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},
  1205. {'name': 'Tuesday', 'value': [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},
  1206. {'name': 'Wednesday', 'value':[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},
  1207. {'name': 'Thursday', 'value': [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},
  1208. {'name': 'Friday', 'value': [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}]
  1209.  
  1210.  
  1211. for stats in stats_list:
  1212. if stats == {}:
  1213. continue
  1214. if 'max_interaction' in stats:
  1215. max_interaction = max(max_interaction, stats['max_interaction'])
  1216. if (max_interaction == stats['max_interaction']):
  1217. max_interaction_date = stats['max_interaction_date']
  1218.  
  1219. if 'avg_interaction_per_day' in stats:
  1220. avg_interaction_per_day+= stats['avg_interaction_per_day']
  1221.  
  1222.  
  1223. if 'interactions_distribution' in stats:
  1224. for item in stats['interactions_distribution']:
  1225. interactions_distribution[item['name']] += item['value']
  1226.  
  1227.  
  1228. if 'posts_engagement' in stats:
  1229. for item in stats['posts_engagement']:
  1230. posts_engagement.append(item)
  1231.  
  1232. if 'top_engaged_posts' in stats:
  1233. for item in stats['top_engaged_posts']:
  1234. top_engaged_posts.append(item)
  1235.  
  1236. if 'user_activity' in stats and len(stats['user_activity']) > 0:
  1237. for item in stats['user_activity']:
  1238. for activity in user_activity:
  1239. if activity['name'] == item['name']:
  1240. total = list(map(int.__add__, activity['value'], item['value']))
  1241. activity['value'] = total
  1242.  
  1243. interactions_distribution = interactions_distribution.most_common()
  1244.  
  1245. metrics_res = {
  1246. 'top_engaged_posts': top_engaged_posts,
  1247. 'interactions_distribution': [{'name': i[0], 'value': i[1]} for i in interactions_distribution],
  1248. 'user_activity': user_activity,
  1249. 'posts_engagement': posts_engagement[:10],
  1250. 'max_interaction': max_interaction,
  1251. 'max_interaction_date': max_interaction_date,
  1252. 'avg_interaction_per_day': avg_interaction_per_day
  1253. }
  1254. return metrics_res
  1255.  
  1256. @staticmethod
  1257. def _merge_account_questions_stats(tracker_id, stats_list, page_number):
  1258. tweets_per_page = 10
  1259. first_index = (page_number - 1) * tweets_per_page
  1260. last_index = page_number * tweets_per_page
  1261. list_size = 0
  1262.  
  1263. tweets_ids_list = []
  1264.  
  1265. for stats in stats_list:
  1266. if stats == {}:
  1267. continue
  1268.  
  1269. if 'tweet_ids' in stats:
  1270. for tweet_id in stats['tweet_ids']:
  1271. tweets_ids_list.append(tweet_id)
  1272.  
  1273. list_size = len(tweets_ids_list)
  1274.  
  1275. tweets_ids_list = tweets_ids_list[first_index:last_index]
  1276.  
  1277. question_list = []
  1278.  
  1279. if len(tweets_ids_list) > 0:
  1280. psql = PostgresCLient()
  1281. tweets = psql.search(tracker_id, None, None, None, None, {'tweet_id': tweets_ids_list})
  1282. if len(tweets) > 0:
  1283. for tweet in tweets:
  1284. tweet['reply_count'] = 0
  1285. tweet['interaction_count'] = tweet['reply_count'] + tweet['retweet_count'] + tweet['favorite_count']
  1286. question_list.append(tweet)
  1287. else:
  1288. warnings.warn(" question is not found, for tracker " + str(tracker_id))
  1289.  
  1290. response = dict()
  1291. response['pages_count'] = math.ceil(list_size / tweets_per_page)
  1292. question_list = sorted(question_list, key=lambda x: datetime.strptime(x["created_at"],'%a %b %d %H:%M:%S +0000 %Y'), reverse=True)
  1293. response['tweets'] = question_list
  1294. return response
  1295.  
  1296. @staticmethod
  1297. def _merge_account_customercare_stats(stats_list):
  1298.  
  1299. response_avg_time = 0
  1300. total_questions = 0
  1301.  
  1302. response_rate = collections.Counter()
  1303. response_time_segments = collections.Counter()
  1304.  
  1305. for stats in stats_list:
  1306. if stats == {}:
  1307. continue
  1308. if 'response_avg_time' in stats:
  1309. response_avg_time += pytimeparse.parse(stats['response_avg_time'])
  1310.  
  1311. if 'total_questions' in stats:
  1312. total_questions += stats['total_questions']
  1313.  
  1314.  
  1315. if 'response_rate' in stats:
  1316. for item in stats['response_rate']:
  1317. response_rate[item['name']] += item['value']
  1318.  
  1319.  
  1320. if 'response_time_segments' in stats:
  1321. for item in stats['response_time_segments']:
  1322. response_time_segments[item['name']] += int(item['value'])
  1323.  
  1324. response_time_segments = response_time_segments.most_common()
  1325. response_rate = response_rate.most_common()
  1326.  
  1327.  
  1328. metrics_res = {
  1329. 'response_time_segments': [{'name': i[0], 'value': i[1]} for i in response_time_segments],
  1330. 'response_rate': [{'name': i[0], 'value': i[1]} for i in response_rate],
  1331. 'response_avg_time': str(timedelta(seconds=int(response_avg_time // len(stats_list)))),
  1332. 'total_questions': total_questions,
  1333. }
  1334. return metrics_res
  1335.  
  1336. @staticmethod
  1337. def search_list_objects_by_obj(object_list, current_obj):
  1338. for item in object_list:
  1339. if 'user_id' in item and item['user_id'] != {}:
  1340. if item['user_id'] == current_obj['user_id']:
  1341. return item
  1342. else:
  1343. if item['name'] == current_obj['name']:
  1344. return item
  1345.  
  1346.  
  1347. if __name__ == "__main__":
  1348. try:
  1349. skip_qi = "True" == sys.argv[2]
  1350. ExceptionHandler.debug_channel("Started filtered stats for "+sys.argv[1]+" day(s) and skip_qi = "+str(skip_qi),"filtered stats")
  1351. FilteredStats(number_of_days=int(sys.argv[1]),skip_qi=skip_qi)
  1352. ExceptionHandler.debug_channel("Finished filtered stats for " + sys.argv[1] + " day(s) and skip_qi = "+str(skip_qi), "filtered stats")
  1353. except Exception as e:
  1354. ExceptionHandler.handle_exception(e, "FilteredStats")
  1355. traceback.print_exc()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement