Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import traceback
- import schedule
- import time
- import requests
- import pickle
- from lucidyalib.lucidya_stats import LucidyaStats as lucidyaLibStats
- from lucidyalib.InstagramOfflineStats import InstagramOfflineStats
- from lucidyalib.FacebookOfflineStats import FacebookOfflineStats
- from lucidyalib.ExceptionHandler import ExceptionHandler
- from lucidyalib.PostgresClient import PostgresCLient
- from lucidyalib.rabbitmqClient import rabbitmqPublisher
- from lucidyalib.helper import helper
- from offline_stats.quick_inspect_worker import QuickInspector
- from accountanalysis.Stats import Stats as AAStats
- #from lucidyalib.topic_discovery.TopicDiscovery import TopicDiscovery
- from lucidyalib.topic_discovery.TopicDiscovery_V2 import TopicDiscovery
- import sys
- sys.path.append('/opt/lucidya/config')
- import config as Config
- import os
- import collections
- from datetime import datetime, timedelta
- import threading
- import json
- import pytimeparse
- import math
- import copy
- try:
- from lucidyalib.CassandraClient import CassandraClient
- except Exception: # ImportError
- from CassandraClient import CassandraClient
- class FilteredStats:
- NUM_OF_STATS = 10
- def __init__(self, number_of_days, monitor_id = None, skip_qi=False):
- self.skip_qi = skip_qi
- self.NUM_OF_THREADS = 5
- self.specific_monitor_id = monitor_id
- for i in range(number_of_days, 0, -1):
- date = datetime.today() - timedelta(days=i)
- start_date = date.replace(hour=0,minute=0,second=0,microsecond=0)
- end_date = date.replace(hour=23,minute=59,second=59,microsecond=999999)
- self.retrieve_trackers_tweets(start_date, end_date, 'daily')
- self.retrieve_trackers_instagram_media(start_date, end_date, 'daily')
- self.retrieve_trackers_facebook_posts(start_date, end_date, 'daily')
- #schedule.every().day.at("00:10").do(self.save_daily_stats)
- #while True:
- # schedule.run_pending()
- # time.sleep(1)
- def save_daily_stats(self):
- start_date = datetime.today() - timedelta(days=1)
- self.retrieve_trackers_tweets(start_date, start_date, 'daily')
- self.retrieve_trackers_instagram_media(start_date, start_date, 'daily')
- print(str(datetime.now()) + " ---> Finished daily materialized metrics for " + datetime.today().strftime("%Y-%m-%d"))
- @staticmethod
- def retrieve_trackers_tweets_tracker(tracker_id, start_date, end_date,time_zone,file_name_prefix,skip_qi=False):
- psql = PostgresCLient()
- # yesterday metrics
- limit_filter = dict()
- #limit_filter['reverse'] = True
- #tweets = psql.search(tracker_id, start_date.strftime("%Y-%m-%d 00:00:00"),end_date.strftime("%Y-%m-%d 23:59:59"), None, None, limit_filter, 'tweets')
- local_start_date = (start_date - timedelta(hours=time_zone)).strftime("%Y-%m-%d %H:%M:%S")
- local_end_date = (end_date - timedelta(hours=time_zone)).strftime("%Y-%m-%d %H:%M:%S")
- helper.process_tweets(tracker_id,dict(),local_start_date,local_end_date,psql,useStream=True)
- # get filtered stats
- advanced_filters = dict()
- advanced_filters['start_date'] = local_start_date
- statsObj = lucidyaLibStats(tracker_id, time_zone=time_zone,postgresClient=psql,advanced_filters=advanced_filters,start_date=local_start_date)
- stats = statsObj.get_metrics()
- # serialize stats result
- file_path = Config.monitors_metrics_filtered_path + "/%s/%s/" % (file_name_prefix, str(tracker_id))
- os.makedirs(file_path, exist_ok=True)
- with open(file_path + "%s.pickle" % start_date.strftime("%Y-%m-%d"), 'wb') as f:
- pickle.dump(stats, f)
- psql.cleanup()
- if not skip_qi:
- #FilteredStats(31, new_tracker_id)
- publisher = rabbitmqPublisher()
- publisher_channel = publisher.getChannel()
- eventName = 'calculate_qi_stats'
- eventData = dict()
- eventData['tracker_id'] = tracker_id
- eventData['start_date'] = local_start_date
- eventData['end_date'] = local_end_date
- eventData['time_zone'] = time_zone
- eventData['inspect_type'] = 'predefined'
- eventData['advanced_filters'] = advanced_filters
- eventData['stats_metrics'] = stats
- eventData['complete_file_path'] = 'predefinedqi' + "/" + str(tracker_id) + '/' + local_end_date.split(" ")[0]
- message = json.dumps({'eventName': eventName, 'eventData': eventData}, ensure_ascii=False).encode('utf8')
- publisher.publish(publisher_channel, eventName, message)
- publisher.cleanup()
- @staticmethod
- def retrieve_account_trackers_tweets_tracker(tracker_id, start_date, end_date,time_zone,file_name_prefix,account_name):
- # get filtered stats
- advanced_filters = dict()
- advanced_filters['start_date'] = start_date.strftime("%Y-%m-%d %H:%M:%S")
- advanced_filters['end_date'] = end_date.strftime("%Y-%m-%d %H:%M:%S")
- data = dict()
- data['tracker_id'] = tracker_id
- data['account_name'] = account_name
- data['start_date'] = start_date.strftime("%Y-%m-%d %H:%M:%S")
- data['end_date'] = end_date.strftime("%Y-%m-%d %H:%M:%S")
- data['file_name'] = start_date.strftime("%Y-%m-%d")
- data['advanced_filters'] = advanced_filters
- event_name = 'compute_account_filter_stats_days'
- print("####----<<<<< send data to rabbitMq for account analysis")
- helper.sendToRabbitmq(event_name,data)
- def retrieve_trackers_facebook_posts(self , start_date, end_date , file_name_prefix):
- print(str(datetime.now()) + " ---> Start retrieve_trackers_facebook_posts for "+str(start_date)+" --> "+str(end_date))
- # Get all trackers, create stream for each account id and run it.
- response = requests.get(Config.BACKEND_API_URL + "trackers?source_id=1&token=" + Config.TOKEN)
- if response.status_code != 200:
- try:
- print(str(response))
- print(response.status_code)
- print(response.json())
- except:
- pass
- print(str(datetime.now()) + " ---> Can not get facebook trackers: "+str(response))
- ExceptionHandler.handle_exception("Can not get facebook trackers: "+str(response),"filtered_stats.py")
- else:
- init_list = response.json()
- temp_list = []
- for tracker in init_list:
- temp_list.append(copy.deepcopy(tracker))
- list_of_lists = []
- for i in range(0,self.NUM_OF_THREADS):
- list_of_lists.append(list())
- #split the list to sublists
- for index,item in enumerate(temp_list):
- list_of_lists[index%self.NUM_OF_THREADS].append(copy.deepcopy(item))
- thread_list = []
- #Start a thread for each list
- for index,sub_list in enumerate(list_of_lists):
- my_tread = threading.Thread(target=self.process_facebook_list,args=(index,sub_list,start_date,end_date,file_name_prefix))
- thread_list.append(my_tread)
- my_tread.start()
- time.sleep(5)
- #wait for all threads
- print("Waiting for "+str(len(thread_list))+" threads")
- for thread in thread_list:
- thread.join()
- print(str(datetime.now()) + " ---> End retrieve_trackers_facebook_posts")
- def retrieve_trackers_instagram_media(self , start_date, end_date , file_name_prefix):
- print(str(datetime.now()) + " ---> Start retrieve_trackers_instagram_media for "+str(start_date)+" --> "+str(end_date))
- # Get all trackers, create stream for each account id and run it.
- response = requests.get(Config.BACKEND_API_URL + "trackers?source_id=3&token=" + Config.TOKEN)
- if response.status_code != 200:
- try:
- print(str(response))
- print(response.status_code)
- print(response.json())
- except:
- pass
- print(str(datetime.now()) + " ---> Can not get instagram trackers: "+str(response))
- ExceptionHandler.handle_exception("Can not get instagram trackers: "+str(response),"filtered_stats.py")
- else:
- init_list = response.json()
- temp_list = []
- for tracker in init_list:
- temp_list.append(copy.deepcopy(tracker))
- list_of_lists = []
- for i in range(0,self.NUM_OF_THREADS):
- list_of_lists.append(list())
- #split the list to sublists
- for index,item in enumerate(temp_list):
- list_of_lists[index%self.NUM_OF_THREADS].append(copy.deepcopy(item))
- thread_list = []
- #Start a thread for each list
- for index,sub_list in enumerate(list_of_lists):
- my_tread = threading.Thread(target=self.process_instagram_list,args=(index,sub_list,start_date,end_date,file_name_prefix))
- thread_list.append(my_tread)
- my_tread.start()
- time.sleep(5)
- #wait for all threads
- print("Waiting for "+str(len(thread_list))+" threads")
- for thread in thread_list:
- thread.join()
- print(str(datetime.now()) + " ---> End retrieve_trackers_instagram_media")
- def retrieve_trackers_tweets(self, start_date, end_date, file_name_prefix):
- print(str(datetime.now()) + " ---> Start retrieve_trackers_tweets for "+str(start_date)+" --> "+str(end_date))
- # Get all trackers, create stream for each account id and run it.
- response = requests.get(Config.BACKEND_API_URL + "trackers?source_id=-1&token=" + Config.TOKEN)
- if response.status_code != 200:
- try:
- print(str(response))
- print(response.status_code)
- print(response.json())
- except:
- pass
- #raise Exception("Can not get trackers: "+str(response))
- print(str(datetime.now()) + " ---> Can not get trackers: "+str(response))
- ExceptionHandler.handle_exception("Can not get trackers: "+str(response),"filtered_stats.py")
- else:
- # create stream for each tracker type then start all at once
- init_list = response.json()
- temp_list = []
- for tracker in init_list:
- if tracker["data_source_name"] == "TWITTER":
- temp_list.append(copy.deepcopy(tracker))
- temp_list.sort(key=lambda monitor_object: monitor_object['total_tweets'])
- # init_list.sort(key=lambda monitor_object: monitor_object['total_tweets'])
- list_of_lists = []
- for i in range(0,self.NUM_OF_THREADS):
- list_of_lists.append(list())
- #split the list to sublists
- for index,item in enumerate(temp_list):
- list_of_lists[index%self.NUM_OF_THREADS].append(copy.deepcopy(item))
- thread_list = []
- #Start a thread for each list
- for index,sub_list in enumerate(list_of_lists):
- my_tread = threading.Thread(target=self.process_list,args=(index,sub_list,start_date,end_date,file_name_prefix))
- thread_list.append(my_tread)
- my_tread.start()
- time.sleep(5)
- #wait for all threads
- print("Waiting for "+str(len(thread_list))+" threads")
- for thread in thread_list:
- thread.join()
- print(str(datetime.now()) + " ---> End retrieve_trackers_tweets")
- def process_facebook_list(self, id, sub_list, start_date, end_date, file_name_prefix):
- for tracker_info in sub_list:
- try:
- tracker_id = tracker_info["tracker_id"]
- psql = PostgresCLient()
- posts = []
- print("Thread " + str(id) + " Processing: " + str(tracker_id))
- if tracker['monitor_type'] == 'ACCOUNT_ANALYSIS':
- posts = psql.get_all_fb_public_comments(tracker_id, {'start_date' : start_date , 'end_date' : end_date})
- posts += psql.get_all_fb_public_posts(tracker_id, {'start_date' : start_date , 'end_date' : end_date})
- elif tracker['monitor_type'] == 'MANAGED_PAGE':
- posts = psql.get_all_fb_comments(tracker_id, {'start_date' : start_date , 'end_date' : end_date})
- statsObj = FacebookOfflineStats(tracker_id, posts, tracker['monitor_type'])
- stats = statsObj.get_metrics()
- # serialize stats result
- file_path = Config.monitors_metrics_filtered_path + "/%s/%s/facebook/" % (
- file_name_prefix, str(tracker_id))
- os.makedirs(file_path, exist_ok=True)
- with open(file_path + "%s.pickle" % start_date.strftime("%Y-%m-%d"), 'wb') as f:
- pickle.dump(stats, f)
- psql.cleanup()
- except Exception as e:
- ExceptionHandler.handle_exception("process_list error: " + str(e), "filtered_stats")
- traceback.print_exc()
- def process_instagram_list(self,id, sub_list,start_date,end_date,file_name_prefix):
- for tracker_info in sub_list:
- try:
- tracker_id = tracker_info["tracker_id"]
- psql = PostgresCLient()
- media = []
- is_comment = True
- print("Thread "+str(id)+" Processing: " + str(tracker_id))
- if 'follow' in tracker_info and tracker_info['follow'] == 1:
- media = psql.searchInstagramComments(tracker_id, {}, start_date, end_date)
- is_comment = False
- else:
- media = psql.searchInstagramMedia(tracker_id, {}, start_date, end_date)
- statsObj = InstagramOfflineStats(tracker_id, media , is_comment)
- stats = statsObj.get_metrics()
- # serialize stats result
- file_path = Config.monitors_metrics_filtered_path + "/%s/%s/instagram/" % (file_name_prefix, str(tracker_id))
- os.makedirs(file_path, exist_ok=True)
- with open(file_path + "%s.pickle" % start_date.strftime("%Y-%m-%d"), 'wb') as f:
- pickle.dump(stats, f)
- psql.cleanup()
- except Exception as e:
- ExceptionHandler.handle_exception("process_list error: "+str(e),"filtered_stats")
- traceback.print_exc()
- def process_list(self,id, sub_list,start_date,end_date,file_name_prefix):
- for tracker_info in sub_list:
- try:
- tracker_id = tracker_info["tracker_id"]
- time_zone = tracker_info["time_zone"]
- # tracker_id_int = int(tracker_id)
- # if tracker_id_int != 1653 and tracker_id_int != 1651 and tracker_id_int != 1956:
- if self.specific_monitor_id is not None and int(tracker_id) != int(self.specific_monitor_id):
- continue
- print("Thread "+str(id)+" Processing: " + str(tracker_id))
- if tracker_info['follow'] is not None and tracker_info['follow'] != '':
- FilteredStats.retrieve_account_trackers_tweets_tracker(tracker_id, start_date, end_date, time_zone,file_name_prefix,tracker_info['follow'])
- FilteredStats.retrieve_trackers_tweets_tracker(tracker_id, start_date, end_date, time_zone,file_name_prefix,self.skip_qi)
- except Exception as e:
- ExceptionHandler.handle_exception("process_list error: "+str(e),"filtered_stats")
- traceback.print_exc()
- @staticmethod
- def _merge_filtered_stats(stats_list, NUM_OF_STATS):
- cassandra = CassandraClient()
- FilteredStats.NUM_OF_STATS = NUM_OF_STATS
- engagers = collections.Counter()
- influencers = collections.Counter()
- original_tweets_influencers = collections.Counter()
- verified_authors = collections.Counter()
- languages = collections.Counter()
- countriesDistribution = collections.Counter()
- citiesDistribution = collections.Counter()
- topics = collections.Counter()
- dialects = collections.Counter()
- accounts_types = collections.Counter()
- sources = collections.Counter()
- genders = collections.Counter()
- potential_impression = 0
- unique_potential_impression = 0
- unique_authors = 0
- original_tweets_potential_impression = 0
- original_tweets_unique_potential_impression = 0
- original_tweets_unique_authors = 0
- urls = collections.Counter()
- hashtags = collections.Counter()
- videos = collections.Counter()
- photos = collections.Counter()
- videos_retweets = collections.Counter()
- photos_retweets = collections.Counter()
- daily_counts = collections.OrderedDict()
- meta = dict()
- re_tweets = dict()
- re_tweets_counts = collections.Counter()
- top_retweeted_tweets = []
- sentiment = dict()
- overallsentiment = dict()
- overallsentiment_overtime = dict()
- sentiment_overtime = dict()
- benchmark = dict()
- #latest_posts = []
- top_words = collections.Counter()
- topic_discovery = TopicDiscovery()
- topic_discovery_result = collections.Counter()
- topic_discovery_list = []
- trending_topics = []
- daily_topic_discovery = collections.OrderedDict()
- favorites_count = 0
- retweets_count = 0
- daily_favorites_count = {}
- daily_retweets_count = {}
- content_type = {}
- content_type['spam'] = 0
- content_type['news'] = 0
- content_type['original'] = 0
- content_type['verses'] = 0
- weights = []
- statistics_items_map = {
- 'ActivePeople': engagers, 'PopularPeople': influencers, 'OriginalTweetsPopularPeople': original_tweets_influencers,
- 'verifiedAuthors': verified_authors, 'LanguagesDistribution': languages,
- 'SourceDistribution': sources, 'Gender': genders, 'TopURLs': urls, 'ActiveHashtags': hashtags,
- 'CountriesDistribution': countriesDistribution, 'CitiesDistribution': citiesDistribution, 'Topics': topics,
- 'Dialects': dialects, 'AccountsTypes': accounts_types, 'TopKeywords': top_words
- }
- statistics_int_map = {
- 'PotentialImpression': potential_impression, 'UniquePotentialImpression':
- unique_potential_impression, 'UniqueAuthors': unique_authors,
- 'original_tweets_PotentialImpression': original_tweets_potential_impression,
- 'original_tweets_UniquePotentialImpression': original_tweets_unique_potential_impression,
- 'original_tweets_UniqueAuthors': original_tweets_unique_authors,
- }
- for stats in stats_list:
- for item in stats['statistics']:
- item_name = list(item.keys())[0]
- item_list = list(item.values())[0]
- for obj in item_list:
- if item_name in statistics_items_map:
- if item_name == 'PopularPeople' or item_name == 'verifiedAuthors':
- statistics_items_map[item_name][obj['id']] = int(obj['stats_count'])
- else:
- statistics_items_map[item_name][obj['id']] += int(obj['stats_count'])
- elif item_name in statistics_int_map:
- if item_name == 'PotentialImpression':
- potential_impression += int(obj['stats_count'])
- elif item_name == 'UniquePotentialImpression':
- unique_potential_impression += int(obj['stats_count'])
- elif item_name == 'UniqueAuthors':
- unique_authors += int(obj['stats_count'])
- if item_name == 'original_tweets_PotentialImpression':
- original_tweets_potential_impression += int(obj['stats_count'])
- elif item_name == 'original_tweets_UniquePotentialImpression':
- original_tweets_unique_potential_impression += int(obj['stats_count'])
- elif item_name == 'original_tweets_UniqueAuthors':
- original_tweets_unique_authors += int(obj['stats_count'])
- elif item_name == 'TopImages':
- photos[obj['id']] += int(obj['stats_count'])
- meta[obj['id']] = {'id': obj['id'], 'data': obj['src'], 'src': obj['src']}#{'thumb': obj['id']}
- elif item_name == 'TopVideos':
- videos[obj['id']] += int(obj['stats_count'])
- meta[obj['id']] = {'thumb': obj['data'], 'url': obj['id'], 'src': obj['src']}
- elif item_name == 'TopImages_retweets':
- photos_retweets[obj['id']] = int(obj['stats_count'])
- meta[obj['id']] = {'id': obj['id'], 'data': obj['src'], 'src': obj['src']}#{'thumb': obj['id']}
- elif item_name == 'TopVideos_retweets':
- videos_retweets[obj['id']] = int(obj['stats_count'])
- meta[obj['id']] = {'thumb': obj['data'], 'url': obj['id'], 'src': obj['src']}
- for itemName, itemValue in stats['sentiment_stats'].items():
- sentiment_label = str(itemName)
- if sentiment_label in sentiment:
- sentiment[sentiment_label] += itemValue
- else:
- sentiment[sentiment_label] = itemValue
- if 'overall_sentiment_stats' in stats:
- for itemName, itemValue in stats['overall_sentiment_stats'].items():
- sentiment_label = str(itemName)
- if sentiment_label in overallsentiment:
- overallsentiment[sentiment_label] += itemValue
- else:
- overallsentiment[sentiment_label] = itemValue
- for item in stats['tweets_over_time']:
- daily_counts[item['id']] = item['stats_count']
- #for item in stats['latest_posts']:
- # latest_posts.append(item)
- for tweet in stats['top_retweeted_tweets']:
- if tweet['id_str'] in re_tweets_counts:
- re_tweets[tweet['id_str']]['retweet_count'] += tweet['retweet_count']
- re_tweets_counts[tweet['id_str']] += tweet['retweet_count']
- else:
- re_tweets_counts[tweet['id_str']] = tweet['retweet_count']
- re_tweets[tweet['id_str']] = tweet
- #TODO: Disabling topics until furhter notice. This has to be returned once it is improved
- #if 'topics' in stats and len(stats['topics']['top_words']) > 0:
- # top_words = stats['topics']['top_words']
- # weights = stats['topics']['weights']
- if 'overallsentiment_overtime' in stats:
- for itemName, itemValues in stats['overallsentiment_overtime'].items():
- overallsentiment_label = str(itemName)
- if overallsentiment_label in overallsentiment_overtime:
- for value in itemValues:
- overallsentiment_overtime[overallsentiment_label].append(value)
- else:
- overallsentiment_overtime[overallsentiment_label] = itemValues
- if 'sentiment_overtime' in stats:
- for itemName, itemValues in stats['sentiment_overtime'].items():
- overallsentiment_label = str(itemName)
- if overallsentiment_label in sentiment_overtime:
- for value in itemValues:
- sentiment_overtime[overallsentiment_label].append(value)
- else:
- sentiment_overtime[overallsentiment_label] = itemValues
- # if overallsentiment_label in overallsentiment_overtime:
- # overallsentiment[overallsentiment_label] += itemValue
- # else:
- # overallsentiment[overallsentiment_label] = itemValue
- if 'content_type' in stats:
- content_type['spam'] = content_type['spam'] + stats['content_type']['spam']
- content_type['news'] = content_type['news'] + stats['content_type']['news']
- content_type['original'] = content_type['original'] + stats['content_type']['original']
- content_type['verses'] = content_type['verses'] + stats['content_type']['verses']
- if 'topic_discovery' in stats and stats['topic_discovery'] is not None\
- and len(stats['topic_discovery']) > 0:
- topic_discovery_list.append(stats['topic_discovery'])
- if 'trending_topics' in stats and stats['trending_topics'] is not None\
- and len(stats['trending_topics']) > 0:
- for topic in stats['trending_topics']:
- day_str = topic['date']
- if day_str in daily_topic_discovery:
- daily_topic_discovery[day_str].append(topic['topics'])
- else:
- daily_topic_discovery[day_str] = []
- daily_topic_discovery[day_str].append(topic['topics'])
- if 'daily_interactions' in stats and stats['daily_interactions'] is not None:
- for favorite_count in stats['daily_interactions']['favorites_count']:
- day_str = favorite_count['id']
- if day_str not in daily_favorites_count:
- daily_favorites_count[day_str] = int(favorite_count['count'])
- else:
- daily_favorites_count[day_str] += int(favorite_count['count'])
- for retweet_count in stats['daily_interactions']['retweets_count']:
- day_str = retweet_count['id']
- if day_str not in daily_retweets_count:
- daily_retweets_count[day_str] = int(retweet_count['count'])
- else:
- daily_retweets_count[day_str] += int(retweet_count['count'])
- if 'interactions' in stats and stats['interactions'] is not None:
- favorites_count += int(stats['interactions']['favorites_count'])
- retweets_count += int(stats['interactions']['retweets_count'])
- if 'benchmark' in stats:
- for name,value in stats['benchmark'].items():
- if name in benchmark:
- benchmark[name]+= value
- else:
- benchmark[name] = value
- # merge avg statistics for benchmark
- if 'posts_avg' in benchmark:
- benchmark['posts_avg'] = benchmark['posts_avg'] // len(stats_list)
- if 'positive_posts_avg' in benchmark:
- benchmark['positive_posts_avg'] = benchmark['positive_posts_avg'] // len(stats_list)
- if 'negative_posts_avg' in benchmark:
- benchmark['negative_posts_avg'] = benchmark['negative_posts_avg'] // len(stats_list)
- topic_discovery_result = topic_discovery.summarize_topics(topic_discovery_list)
- for tracker_topic in topic_discovery_result:
- keywords_weights = tracker_topic['keywords_weights']
- sort_keywords = sorted(keywords_weights, key=lambda k: keywords_weights[k], reverse=False)
- top5 = sort_keywords[:5]
- tracker_topic["top5"] = top5
- trending_topics = []
- for day, unmerged_topics in daily_topic_discovery.items():
- merged_topics = topic_discovery.summarize_topics(unmerged_topics)
- for tracker_topic in merged_topics:
- keywords_weights = tracker_topic['keywords_weights']
- sort_keywords = sorted(keywords_weights, key=lambda k: keywords_weights[k], reverse=False)
- top5 = sort_keywords[:5]
- tracker_topic["top5"] = top5
- trending_topics.append({'date': day, 'topics': merged_topics})
- trending_topics.reverse()
- daily_interactions = {}
- sorted_daily_favorites_count = collections.OrderedDict(sorted(
- daily_favorites_count.items()))
- daily_interactions["favorites_count"] = [{'id': day, 'count': count}
- for day, count in sorted_daily_favorites_count.items()]
- sorted_daily_retweets_count = collections.OrderedDict(sorted(
- daily_retweets_count.items()))
- daily_interactions["retweets_count"] = [{'id': day, 'count': count}
- for day, count in sorted_daily_retweets_count.items()]
- top_re_tweets_idxs = re_tweets_counts.most_common(FilteredStats.NUM_OF_STATS)
- for idx, count in top_re_tweets_idxs:
- tweet = re_tweets[idx]
- tweet['retweet_count'] = count
- top_retweeted_tweets.append(tweet)
- statistics = []
- top_engagers = engagers.most_common(FilteredStats.NUM_OF_STATS)
- active_people = []
- for i in top_engagers:
- author_id = cassandra.get_author_id(i[0])
- active_people.append({'id': i[0], 'stats_count': i[1] , 'user_id': author_id})
- statistics.append({"ActivePeople": active_people})
- top_influencers = influencers.most_common(FilteredStats.NUM_OF_STATS)
- popular_people = []
- for i in top_influencers:
- author_id = cassandra.get_author_id(i[0])
- popular_people.append({'id': i[0], 'stats_count': i[1], 'user_id': author_id})
- statistics.append( {"PopularPeople": popular_people})
- original_tweets_top_influencers = original_tweets_influencers.most_common(FilteredStats.NUM_OF_STATS)
- original_tweets_popular_people = []
- for i in original_tweets_top_influencers:
- author_id = cassandra.get_author_id(i[0])
- original_tweets_popular_people.append({'id': i[0], 'stats_count': i[1], 'user_id': author_id})
- statistics.append({"OriginalTweetsPopularPeople": original_tweets_popular_people})
- top_verified_authors = verified_authors.most_common(FilteredStats.NUM_OF_STATS)
- verified_authors = []
- for i in top_verified_authors:
- author_id = cassandra.get_author_id(i[0])
- verified_authors.append({'id': i[0], 'stats_count': i[1] , 'user_id': author_id})
- statistics.append({"verifiedAuthors": verified_authors})
- top_languages = languages.most_common(FilteredStats.NUM_OF_STATS)
- statistics.append(
- {"LanguagesDistribution": [{'id': i[0], 'stats_count': i[1]} for i in top_languages]})
- top_countries = countriesDistribution.most_common(FilteredStats.NUM_OF_STATS)
- statistics.append(
- {"CountriesDistribution": [{'id': i[0], 'stats_count': i[1]} for i in top_countries]})
- top_cities = citiesDistribution.most_common(FilteredStats.NUM_OF_STATS)
- statistics.append(
- {"CitiesDistribution": [{'id': i[0], 'stats_count': i[1]} for i in top_cities]})
- top_topics = topics.most_common(FilteredStats.NUM_OF_STATS)
- statistics.append(
- {"Topics": [{'id': i[0], 'stats_count': i[1]} for i in top_topics]})
- top_dialects = dialects.most_common(FilteredStats.NUM_OF_STATS)
- statistics.append(
- {"Dialects": [{'id': i[0], 'stats_count': i[1]} for i in top_dialects]})
- top_accounts_types = accounts_types.most_common(FilteredStats.NUM_OF_STATS)
- statistics.append(
- {"AccountsTypes": [{'id': i[0], 'stats_count': i[1]} for i in top_accounts_types]})
- top_sources = sources.most_common(FilteredStats.NUM_OF_STATS)
- statistics.append(
- {"SourceDistribution": [{'id': i[0], 'stats_count': i[1]} for i in top_sources]})
- top_genders = genders.most_common(FilteredStats.NUM_OF_STATS)
- statistics.append(
- {"Gender": [{'id': i[0], 'stats_count': i[1]} for i in top_genders]})
- top_urls = urls.most_common(FilteredStats.NUM_OF_STATS)
- statistics.append(
- {"TopURLs": [{'id': i[0], 'stats_count': i[1]} for i in top_urls]})
- top_hashtags = hashtags.most_common(FilteredStats.NUM_OF_STATS)
- statistics.append(
- {"ActiveHashtags": [{'id': i[0], 'stats_count': i[1]} for i in top_hashtags]})
- top_keywords = top_words.most_common(FilteredStats.NUM_OF_STATS)
- statistics.append(
- {"TopKeywords": [{'id': i[0], 'stats_count': i[1]} for i in top_keywords]})
- statistics.append({'PotentialImpression': [{'id': 'PotentialImpression',
- 'stats_count': potential_impression}]})
- statistics.append({'UniquePotentialImpression': [{'id': 'UniquePotentialImpression',
- 'stats_count': unique_potential_impression}]})
- statistics.append({'UniqueAuthors': [{'id': 'UniqueAuthors', 'stats_count': unique_authors}]})
- statistics.append({'original_tweets_PotentialImpression': [{'id': 'original_tweets_PotentialImpression',
- 'stats_count': original_tweets_potential_impression}]})
- statistics.append({'original_tweets_UniquePotentialImpression': [{'id': 'original_tweets_UniquePotentialImpression',
- 'stats_count': original_tweets_unique_potential_impression}]})
- statistics.append({'original_tweets_UniqueAuthors': [{'id': 'original_tweets_UniqueAuthors', 'stats_count': original_tweets_unique_authors}]})
- top_photos = photos.most_common(FilteredStats.NUM_OF_STATS)
- statistics.append(
- {"TopImages": [{'id': meta[i[0]]['id'], 'src': meta[i[0]]['src'], 'data': meta[i[0]]['data'] + ":thumb", 'stats_count': i[1]} for i in top_photos]})
- top_videos = videos.most_common(FilteredStats.NUM_OF_STATS)
- statistics.append(
- {"TopVideos": [{'id': meta[i[0]]['url'], 'stats_count': i[1], 'data': meta[i[0]]['thumb'], 'src': meta[i[0]]['src']}
- for i in top_videos]})
- top_photos = photos_retweets.most_common(FilteredStats.NUM_OF_STATS)
- statistics.append(
- {"TopImages_retweets": [{'id': meta[i[0]]['id'], 'src': meta[i[0]]['src'], 'data': meta[i[0]]['data'] + ":thumb", 'stats_count': i[1]} for i in top_photos]})
- top_videos = videos_retweets.most_common(FilteredStats.NUM_OF_STATS)
- statistics.append(
- {"TopVideos_retweets": [{'id': meta[i[0]]['url'], 'stats_count': i[1], 'data': meta[i[0]]['thumb'], 'src': meta[i[0]]['src']}
- for i in top_videos]})
- authors_statistics = {}
- authors_statistics['percentage'] = 0
- authors_statistics['total_count'] = 0
- total_auth = unique_authors
- if total_auth > 0:
- total_verified_auth = len(verified_authors)
- authors_statistics['percentage'] = round((total_verified_auth / total_auth) * 100, 2)
- authors_statistics['total_count'] = total_verified_auth
- statistics.append({'verifiedAuthorsStatistics': authors_statistics})
- interactions = {"favorites_count" : favorites_count ,
- "retweets_count" : retweets_count }
- return {
- #'latest_posts': latest_posts[-FilteredStats.NUM_OF_STATS:],
- 'statistics': statistics,
- 'top_retweeted_tweets': top_retweeted_tweets,
- 'tweets_over_time': [{'id': day, 'stats_count': count} for day, count in sorted(daily_counts.items())],
- 'sentiment_stats': sentiment,
- 'overall_sentiment_stats': overallsentiment,
- #'topics': {'top_words': top_words, 'weights': weights},
- 'overallsentiment_overtime': overallsentiment_overtime,
- 'sentiment_overtime': sentiment_overtime,
- 'content_type': content_type,
- 'topic_discovery' : topic_discovery_result,
- 'trending_topics' : trending_topics,
- 'interactions' : interactions,
- 'daily_interactions' : daily_interactions,
- 'benchmark' : benchmark
- }
- @staticmethod
- def get_report_stats(report_id, report_name, NUM_OF_STATS=10):
- file_path = Config.REPORTS_PATH + "%s/%s" % (str(report_id), report_name)
- try:
- stats_list = json.load(open(file_path, 'rb'))
- stats = FilteredStats._merge_filtered_stats([stats_list['twitter_metrics']], NUM_OF_STATS)
- except FileNotFoundError:
- stats = {}
- return stats
- @staticmethod
- def get_filtered_stats(tracker_id, time_duration, filter_label, NUM_OF_STATS=10):
- if time_duration == 'yesterday':
- date = datetime.today() - timedelta(days=1)
- file_path = Config.monitors_metrics_filtered_path + "%s/%s/%s.pickle" \
- % ('daily', str(tracker_id), date.strftime("%Y-%m-%d"))
- try:
- stats_list = pickle.load(open(file_path, 'rb'))
- # Send one stats to merge filters to cut top X
- stats = FilteredStats._merge_filtered_stats([stats_list], NUM_OF_STATS)
- except FileNotFoundError:
- stats = {}
- elif time_duration == 'last-week':
- stats_list = []
- for i in range(7, 0, -1):
- date = datetime.today() - timedelta(days=i)
- file_path = Config.monitors_metrics_filtered_path + "%s/%s/%s.pickle" \
- % ('daily', str(tracker_id), date.strftime("%Y-%m-%d"))
- try:
- stats_list.append(pickle.load(open(file_path, 'rb')))
- except FileNotFoundError:
- # only couple of days found and rest of the week wasn't found
- continue
- stats = FilteredStats._merge_filtered_stats(stats_list, NUM_OF_STATS)
- elif time_duration == 'last-month':
- stats_list = []
- for i in range(30, 0, -1):
- date = datetime.today() - timedelta(days=i)
- file_path = Config.monitors_metrics_filtered_path + "%s/%s/%s.pickle" \
- % ('daily', str(tracker_id), date.strftime("%Y-%m-%d"))
- try:
- stats_list.append(pickle.load(open(file_path, 'rb')))
- except FileNotFoundError:
- # only couple of days found and rest of the week wasn't found
- continue
- stats = FilteredStats._merge_filtered_stats(stats_list, NUM_OF_STATS)
- else:
- stats_list = []
- file_path = Config.monitors_metrics_filtered_path + "%s/%s/%s.pickle" \
- % (time_duration, str(tracker_id), filter_label)
- try:
- stats_list.append(pickle.load(open(file_path, 'rb')))
- except FileNotFoundError:
- #This error should not happen! The file may have been deleted
- ExceptionHandler.handle_exception("Cannot load filtered stats as file "+str(file_path)+" does not exists!",
- "filtered_stats.py")
- pass
- stats = FilteredStats._merge_filtered_stats(stats_list, NUM_OF_STATS)
- return stats
- @staticmethod
- def get_filtered_stats_by_dates(tracker_id, start_date, end_date, NUM_OF_STATS=10, ignore_missing = True):
- start_date = datetime.strptime(start_date, '%Y-%m-%d')
- end_date = datetime.strptime(end_date, '%Y-%m-%d')
- stats_list = []
- while start_date <= end_date:
- file_path = Config.monitors_metrics_filtered_path + "%s/%s/%s.pickle" \
- % ('daily', str(tracker_id), start_date.strftime("%Y-%m-%d"))
- try:
- stats_list.append(pickle.load(open(file_path, 'rb')))
- except FileNotFoundError:
- print("file not found for tracker_id" + str(tracker_id) + " and date" + start_date.strftime("%Y-%m-%d"))
- if not ignore_missing:
- return {'missing_files':True}
- start_date = start_date + timedelta(days=1)
- stats = FilteredStats._merge_filtered_stats(stats_list, NUM_OF_STATS)
- return stats
- @staticmethod
- def get_account_filtered_stats_by_dates(tracker_id, start_date, end_date, ignore_missing = True, stats_name='benchmark', page_number = 0):
- # stats_name = 'audience'
- start_date = datetime.strptime(start_date, '%Y-%m-%d')
- end_date = datetime.strptime(end_date, '%Y-%m-%d')
- stats_list = []
- while start_date <= end_date:
- file_path = Config.account_monitor_metrics_filtered_path + "%s/%s/%s/%s.pickle" \
- % (stats_name.replace(" ", "_"), 'daily', str(tracker_id), start_date.strftime("%Y-%m-%d"))
- try:
- stats_list.append(pickle.load(open(file_path, 'rb')))
- except FileNotFoundError:
- print(" file not found for tracker_id" + str(tracker_id) + " and date" + start_date.strftime("%Y-%m-%d"))
- if not ignore_missing:
- return {'missing_files':True}
- start_date = start_date + timedelta(days=1)
- if stats_name == 'benchmark':
- stats = FilteredStats._merge_account_benchmark(stats_list)
- elif stats_name == 'audience':
- print("Audience")
- stats = FilteredStats._merge_account_audience_stats(stats_list)
- elif stats_name == 'content':
- stats = FilteredStats._merge_account_content_stats(stats_list)
- elif stats_name =="engagement":
- stats = FilteredStats._merge_account_engagement_stats(stats_list)
- elif stats_name == "customer care":
- stats = FilteredStats._merge_account_customercare_stats(stats_list)
- elif stats_name == "customer_care_questions_list":
- stats = FilteredStats._merge_account_questions_stats(tracker_id, stats_list, int(page_number))
- return stats
- @staticmethod
- def get_qi_predefined_filtered_stats(tracker_id, time_duration, inspect_type, inspect_name, NUM_OF_STATS=10):
- # daily_path = 'quick_inspect/daily'
- if time_duration == 'yesterday':
- date = datetime.today() - timedelta(days=1)
- # file_path = Config.monitors_metrics_filtered_path + "%s/%s/%s.pickle" \
- # % (daily_path,str(tracker_id), date.strftime("%Y-%m-%d"))
- try:
- stats_list = QuickInspector.get_inspector_stats(tracker_id, date.strftime("%Y-%m-%d"), inspect_type, 'predefinedqi', date.strftime("%Y-%m-%d"), inspect_name)
- # stats_list = pickle.load(open(file_path, 'rb'))
- # Send one stats to merge filters to cut top X
- stats = FilteredStats._merge_qi_stats([stats_list], NUM_OF_STATS)
- except FileNotFoundError:
- stats = {}
- elif time_duration == 'last-week':
- stats_list = []
- for i in range(7, 0, -1):
- date = datetime.today() - timedelta(days=i)
- # file_path = Config.monitors_metrics_filtered_path + "%s/%s/%s.pickle" \
- # % (daily_path,str(tracker_id), date.strftime("%Y-%m-%d"))
- try:
- inspect = QuickInspector.get_inspector_stats(tracker_id, date.strftime("%Y-%m-%d"), inspect_type, 'predefinedqi', date.strftime("%Y-%m-%d"), inspect_name)
- stats_list.append(inspect)
- except FileNotFoundError:
- # only couple of days found and rest of the week wasn't found
- continue
- stats = FilteredStats._merge_qi_stats(stats_list, NUM_OF_STATS)
- elif time_duration == 'last-month':
- stats_list = []
- for i in range(30, 0, -1):
- date = datetime.today() - timedelta(days=i)
- # file_path = Config.monitors_metrics_filtered_path + "%s/%s/%s.pickle" \
- # % (daily_path,str(tracker_id), date.strftime("%Y-%m-%d"))
- try:
- inspect = QuickInspector.get_inspector_stats(tracker_id, date.strftime("%Y-%m-%d"), inspect_type, 'predefinedqi', date.strftime("%Y-%m-%d"), inspect_name)
- stats_list.append(inspect)
- except FileNotFoundError:
- # only couple of days found and rest of the week wasn't found
- continue
- stats = FilteredStats._merge_qi_stats(stats_list, NUM_OF_STATS)
- else:
- ExceptionHandler.handle_exception("Error in QI filtered stats "+str(time_duration)+" does not exists!",
- "filtered_stats.py")
- return stats
- @staticmethod
- def get_metric_by_name(tracker_id, filter_type, filter_label, metric_name):
- NUM_OF_STATS = 100
- stats = FilteredStats.get_filtered_stats(tracker_id, filter_type,filter_label, NUM_OF_STATS)
- # metric names are
- if metric_name == "ActivePeople" or metric_name == "PopularPeople" or metric_name == "ActiveHashtags"or metric_name == "TopImages" or metric_name == "TopVideos" or metric_name == "TopURLs" or metric_name == "TopKeywords":
- for index in range(len(stats['statistics'])):
- for key in stats['statistics'][index]:
- if key == metric_name:
- return(stats['statistics'][index][key])
- if metric_name == "top_retweeted_tweets":
- return stats['top_retweeted_tweets']
- @staticmethod
- def _merge_qi_stats(stats_list, NUM_OF_STATS):
- cassandra = CassandraClient()
- tweet_cnt = 0
- unique_authors = 0
- engagers = collections.Counter()
- potential_impression = 0
- unique_potential_impression = 0
- hashtags = collections.Counter()
- overallsentiment = dict()
- sentiment = dict()
- hours = collections.Counter()
- re_tweets_counts = collections.Counter()
- top_retweeted_tweets = []
- re_tweets = dict()
- FilteredStats.NUM_OF_STATS = NUM_OF_STATS
- weights = []
- statistics_items_map = {
- 'ActivePeople': engagers, 'ActiveHashtags': hashtags , 'tweets_over_hours': hours
- }
- statistics_int_map = {
- 'PotentialImpression': potential_impression, 'UniquePotentialImpression':
- unique_potential_impression, 'UniqueAuthors': unique_authors, 'tweets_num': tweet_cnt
- }
- for stats in stats_list:
- if stats == {}:
- continue
- if 'statistics' in stats:
- for item in stats['statistics']:
- item_name = list(item.keys())[0]
- item_list = list(item.values())[0]
- for obj in item_list:
- if item_name in statistics_items_map:
- statistics_items_map[item_name][obj['id']] += int(obj['stats_count'])
- elif item_name in statistics_int_map:
- if item_name == 'PotentialImpression':
- potential_impression += int(obj['stats_count'])
- elif item_name == 'UniquePotentialImpression':
- unique_potential_impression += int(obj['stats_count'])
- elif item_name == 'UniqueAuthors':
- unique_authors += int(obj['stats_count'])
- if 'sentiment_stats' in stats:
- for itemName, itemValue in stats['sentiment_stats'].items():
- sentiment_label = str(itemName)
- if sentiment_label in sentiment:
- sentiment[sentiment_label] += itemValue
- else:
- sentiment[sentiment_label] = itemValue
- if 'overall_sentiment_stats' in stats:
- for itemName, itemValue in stats['overall_sentiment_stats'].items():
- sentiment_label = str(itemName)
- if sentiment_label in overallsentiment:
- overallsentiment[sentiment_label] += itemValue
- else:
- overallsentiment[sentiment_label] = itemValue
- #for item in stats['tweets_over_time']:
- # daily_counts[item['id']] = item['stats_count']
- #for item in stats['latest_posts']:
- # latest_posts.append(item)
- for tweet in stats['top_retweeted_tweets']:
- if tweet['id_str'] in re_tweets_counts:
- re_tweets[tweet['id_str']]['retweet_count'] += tweet['retweet_count']
- re_tweets_counts[tweet['id_str']] += tweet['retweet_count']
- else:
- re_tweets_counts[tweet['id_str']] = tweet['retweet_count']
- re_tweets[tweet['id_str']] = tweet
- if 'tweets_num' in stats and stats['tweets_num'] is not None:
- tweet_cnt += int(stats['tweets_num'])
- top_re_tweets_idxs = re_tweets_counts.most_common(FilteredStats.NUM_OF_STATS)
- for idx, count in top_re_tweets_idxs:
- tweet = re_tweets[idx]
- tweet['retweet_count'] = count
- top_retweeted_tweets.append(tweet)
- statistics = []
- top_hashtags = hashtags.most_common(FilteredStats.NUM_OF_STATS)
- statistics.append(
- {"ActiveHashtags": [{'id': i[0], 'stats_count': i[1]} for i in top_hashtags]})
- # top_keywords = top_words.most_common(FilteredStats.NUM_OF_STATS)
- # statistics.append(
- # {"TopKeywords": [{'id': i[0], 'stats_count': i[1]} for i in top_keywords]})
- statistics.append({'PotentialImpression': [{'id': 'PotentialImpression',
- 'stats_count': potential_impression}]})
- statistics.append({'UniquePotentialImpression': [{'id': 'UniquePotentialImpression',
- 'stats_count': unique_potential_impression}]})
- statistics.append({'UniqueAuthors': [{'id': 'UniqueAuthors', 'stats_count': unique_authors}]})
- return {
- 'tweets_num': tweet_cnt,
- 'statistics': statistics,
- 'top_retweeted_tweets': top_retweeted_tweets,
- 'sentiment_stats': sentiment,
- 'overall_sentiment_stats': overallsentiment,
- }
- @staticmethod
- def _merge_account_benchmark(stats_list):
- benchmark = dict()
- initial_followers = 0
- followers_growth = 0
- for stats in stats_list:
- if 'initial_followers' in stats['benchmark']:
- initial_followers = stats['benchmark']['initial_followers']
- if 'followers_growth' in stats['benchmark']:
- followers_growth = stats['benchmark']['followers_growth']
- for name,value in stats['benchmark'].items():
- if name in benchmark:
- benchmark[name]+= value
- else:
- benchmark[name] = value
- benchmark['initial_followers'] = initial_followers
- if 'avg_posts' in benchmark:
- benchmark['avg_posts'] = benchmark['avg_posts'] // len(stats_list)
- if 'avg_positive_posts' in benchmark:
- benchmark['avg_positive_posts'] = benchmark['avg_positive_posts'] // len(stats_list)
- if 'avg_negative_posts' in benchmark:
- benchmark['avg_negative_posts'] = benchmark['avg_negative_posts'] // len(stats_list)
- if 'response_time' in benchmark:
- benchmark['response_time'] = str(timedelta(seconds=int(benchmark['response_time']) // len(stats_list)))
- if 'response_time_positive' in benchmark:
- benchmark['response_time_positive'] = str(timedelta(seconds=int(benchmark['response_time_positive']) // len(stats_list)))
- if 'response_time_negative' in benchmark:
- benchmark['response_time_negative'] = str(timedelta(seconds=int(benchmark['response_time_negative']) // len(stats_list)))
- if 'response_rate' in benchmark:
- benchmark['response_rate'] = round(benchmark['response_rate'] / len(stats_list),2) * 100
- if 'response_rate_positive' in benchmark:
- benchmark['response_rate_positive'] = round(benchmark['response_rate_positive'] / len(stats_list),2)
- if 'response_rate_negative' in benchmark:
- benchmark['response_rate_negative'] = round(benchmark['response_rate_negative'] / len(stats_list),2)
- return {
- 'benchmark' : benchmark
- }
- @staticmethod
- def _merge_account_content_stats(stats_list):
- tweets_over_time = collections.Counter()
- content_type = collections.Counter()
- total_tweets = 0
- total_tweets_today = 0
- total_tweets_last_week = 0
- total_tweets_last_month = 0
- for stats in stats_list:
- if stats == {}:
- continue
- if 'content_type' in stats:
- for item in stats['content_type']:
- content_type[item['name']] += item['value']
- if 'tweets_over_time' in stats:
- for item in stats['tweets_over_time']:
- tweets_over_time[item['name']] += item['value']
- if 'total_tweets' in stats:
- total_tweets += stats['total_tweets']
- if 'total_tweets_today' in stats:
- total_tweets_today += stats['total_tweets_today']
- if 'total_tweets_last_week' in stats:
- total_tweets_last_week += stats['total_tweets_last_week']
- if 'total_tweets_last_month' in stats:
- total_tweets_last_month += stats['total_tweets_last_month']
- response = {}
- tweets_over_time = tweets_over_time.most_common(FilteredStats.NUM_OF_STATS)
- content_type = content_type.most_common(FilteredStats.NUM_OF_STATS)
- response["tweets_over_time"] = [{'name': i[0], 'value': i[1]} for i in tweets_over_time]
- response["content_type"] = [{'name': i[0], 'value': i[1]} for i in content_type]
- response['total_tweets_today'] = total_tweets_today
- response['total_tweets_last_week'] = total_tweets_last_week
- response['total_tweets_last_month'] = total_tweets_last_month
- return response
- @staticmethod
- def _merge_account_audience_stats(stats_list):
- statistics = []
- avgFollowersChange = 0
- maxFollowersChange = 0
- maxFollowersChangeDate = ' '
- totalFollowers = 0
- totalFollowersChange = 0
- followers_over_time = []
- TopEngagers = []
- TopInfluencers = []
- TopFans = []
- TopRetweeters = []
- for stats in stats_list:
- if stats == {}:
- continue
- if 'avgFollowersChange' in stats:
- avgFollowersChange+= stats['avgFollowersChange']
- if 'maxFollowersChange' in stats:
- maxFollowersChange = max(maxFollowersChange, stats['maxFollowersChange'])
- if (maxFollowersChange == stats['maxFollowersChange']):
- maxFollowersChangeDate = stats['maxFollowersChangeDate']
- if 'totalFollowers' in stats:
- totalFollowers+= stats['totalFollowers']
- if 'totalFollowersChange' in stats:
- totalFollowersChange+= stats['totalFollowersChange']
- if 'followers_over_time' in stats:
- for item in stats['followers_over_time']:
- followers_over_time.append(item)
- if 'statistics' in stats:
- for item in stats['statistics']:
- item_name = list(item.keys())[0]
- item_list = list(item.values())[0]
- for obj in item_list:
- if item_name == 'TopEngagers':
- item = FilteredStats.search_list_objects_by_obj(TopEngagers, obj)
- if item:
- TopEngagers.remove(item)
- item['value'] += obj['value']
- TopEngagers.append(item)
- else:
- TopEngagers.append(obj)
- if item_name == 'TopInfluencers':
- item = FilteredStats.search_list_objects_by_obj(TopInfluencers, obj)
- if item:
- TopInfluencers.remove(item)
- item['value'] += obj['value']
- TopInfluencers.append(item)
- else:
- TopInfluencers.append(obj)
- if item_name == 'TopFans':
- item = FilteredStats.search_list_objects_by_obj(TopFans, obj)
- if item:
- TopFans.remove(item)
- item['value'] += obj['value']
- TopFans.append(item)
- else:
- TopFans.append(obj)
- if item_name == 'TopRetweeters':
- item = FilteredStats.search_list_objects_by_obj(TopRetweeters, obj)
- if item:
- TopRetweeters.remove(item)
- item['value'] += obj['value']
- TopRetweeters.append(item)
- else:
- TopRetweeters.append(obj)
- avgFollowersChange = avgFollowersChange // len(stats_list)
- totalFollowers = totalFollowers // len(stats_list)
- totalFollowersChange = totalFollowersChange // len(stats_list)
- TopEngagers = sorted(TopEngagers, key=lambda k: k['value'], reverse=True)
- TopInfluencers = sorted(TopInfluencers, key=lambda k: k['value'], reverse=True)
- TopFans = sorted(TopFans, key=lambda k: k['value'], reverse=True)
- TopRetweeters = sorted(TopRetweeters, key=lambda k: k['value'], reverse=True)
- return {
- 'followers_over_time': followers_over_time,
- 'statistics': [{'TopEngagers': TopEngagers[:10]},{'TopInfluencers': TopInfluencers[:10]},{'TopFans': TopFans[:10]},{'TopRetweeters': TopRetweeters[:10]}],
- 'totalFollowers': totalFollowers,
- 'totalFollowersChange': totalFollowersChange,
- 'maxFollowersChange': maxFollowersChange,
- 'maxFollowersChangeDate': maxFollowersChangeDate,
- 'avgFollowersChange': avgFollowersChange,
- }
- @staticmethod
- def _merge_account_engagement_stats(stats_list):
- avg_interaction_per_day = 0
- max_interaction = 0
- max_interaction_date = ' '
- interactions_distribution = collections.Counter()
- posts_engagement = []
- top_engaged_posts = []
- user_activity = [
- {'name': 'Saturday', 'value': [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},
- {'name': 'Sunday', 'value': [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},
- {'name': 'Monday', 'value': [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},
- {'name': 'Tuesday', 'value': [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},
- {'name': 'Wednesday', 'value':[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},
- {'name': 'Thursday', 'value': [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},
- {'name': 'Friday', 'value': [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}]
- for stats in stats_list:
- if stats == {}:
- continue
- if 'max_interaction' in stats:
- max_interaction = max(max_interaction, stats['max_interaction'])
- if (max_interaction == stats['max_interaction']):
- max_interaction_date = stats['max_interaction_date']
- if 'avg_interaction_per_day' in stats:
- avg_interaction_per_day+= stats['avg_interaction_per_day']
- if 'interactions_distribution' in stats:
- for item in stats['interactions_distribution']:
- interactions_distribution[item['name']] += item['value']
- if 'posts_engagement' in stats:
- for item in stats['posts_engagement']:
- posts_engagement.append(item)
- if 'top_engaged_posts' in stats:
- for item in stats['top_engaged_posts']:
- top_engaged_posts.append(item)
- if 'user_activity' in stats and len(stats['user_activity']) > 0:
- for item in stats['user_activity']:
- for activity in user_activity:
- if activity['name'] == item['name']:
- total = list(map(int.__add__, activity['value'], item['value']))
- activity['value'] = total
- interactions_distribution = interactions_distribution.most_common()
- metrics_res = {
- 'top_engaged_posts': top_engaged_posts,
- 'interactions_distribution': [{'name': i[0], 'value': i[1]} for i in interactions_distribution],
- 'user_activity': user_activity,
- 'posts_engagement': posts_engagement[:10],
- 'max_interaction': max_interaction,
- 'max_interaction_date': max_interaction_date,
- 'avg_interaction_per_day': avg_interaction_per_day
- }
- return metrics_res
- @staticmethod
- def _merge_account_questions_stats(tracker_id, stats_list, page_number):
- tweets_per_page = 10
- first_index = (page_number - 1) * tweets_per_page
- last_index = page_number * tweets_per_page
- list_size = 0
- tweets_ids_list = []
- for stats in stats_list:
- if stats == {}:
- continue
- if 'tweet_ids' in stats:
- for tweet_id in stats['tweet_ids']:
- tweets_ids_list.append(tweet_id)
- list_size = len(tweets_ids_list)
- tweets_ids_list = tweets_ids_list[first_index:last_index]
- question_list = []
- if len(tweets_ids_list) > 0:
- psql = PostgresCLient()
- tweets = psql.search(tracker_id, None, None, None, None, {'tweet_id': tweets_ids_list})
- if len(tweets) > 0:
- for tweet in tweets:
- tweet['reply_count'] = 0
- tweet['interaction_count'] = tweet['reply_count'] + tweet['retweet_count'] + tweet['favorite_count']
- question_list.append(tweet)
- else:
- warnings.warn(" question is not found, for tracker " + str(tracker_id))
- response = dict()
- response['pages_count'] = math.ceil(list_size / tweets_per_page)
- question_list = sorted(question_list, key=lambda x: datetime.strptime(x["created_at"],'%a %b %d %H:%M:%S +0000 %Y'), reverse=True)
- response['tweets'] = question_list
- return response
- @staticmethod
- def _merge_account_customercare_stats(stats_list):
- response_avg_time = 0
- total_questions = 0
- response_rate = collections.Counter()
- response_time_segments = collections.Counter()
- for stats in stats_list:
- if stats == {}:
- continue
- if 'response_avg_time' in stats:
- response_avg_time += pytimeparse.parse(stats['response_avg_time'])
- if 'total_questions' in stats:
- total_questions += stats['total_questions']
- if 'response_rate' in stats:
- for item in stats['response_rate']:
- response_rate[item['name']] += item['value']
- if 'response_time_segments' in stats:
- for item in stats['response_time_segments']:
- response_time_segments[item['name']] += int(item['value'])
- response_time_segments = response_time_segments.most_common()
- response_rate = response_rate.most_common()
- metrics_res = {
- 'response_time_segments': [{'name': i[0], 'value': i[1]} for i in response_time_segments],
- 'response_rate': [{'name': i[0], 'value': i[1]} for i in response_rate],
- 'response_avg_time': str(timedelta(seconds=int(response_avg_time // len(stats_list)))),
- 'total_questions': total_questions,
- }
- return metrics_res
- @staticmethod
- def search_list_objects_by_obj(object_list, current_obj):
- for item in object_list:
- if 'user_id' in item and item['user_id'] != {}:
- if item['user_id'] == current_obj['user_id']:
- return item
- else:
- if item['name'] == current_obj['name']:
- return item
- if __name__ == "__main__":
- try:
- skip_qi = "True" == sys.argv[2]
- ExceptionHandler.debug_channel("Started filtered stats for "+sys.argv[1]+" day(s) and skip_qi = "+str(skip_qi),"filtered stats")
- FilteredStats(number_of_days=int(sys.argv[1]),skip_qi=skip_qi)
- ExceptionHandler.debug_channel("Finished filtered stats for " + sys.argv[1] + " day(s) and skip_qi = "+str(skip_qi), "filtered stats")
- except Exception as e:
- ExceptionHandler.handle_exception(e, "FilteredStats")
- traceback.print_exc()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement