Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Time-series impressions (DOW, HOD, etc) (0 = Sunday... 6 = Saturday)
- gmt_offset = -4
- # Create proper datetime column, apply local GMT offset
- tweet_df['ts'] = pd.to_datetime(tweet_df['time'])
- tweet_df['ts'] = tweet_df.ts + pd.to_timedelta(gmt_offset, unit='h')
- # Add hour of day and day of week columns
- tweet_df['hod'] = [t.hour for t in tweet_df.ts]
- tweet_df['dow'] = [t.dayofweek for t in tweet_df.ts]
- hod_dict = {}
- hod_count = {}
- dow_dict = {}
- dow_count = {}
- weekday_dict = {0: 'Mon', 1: 'Tue', 2: 'Wed', 3: 'Thu', 4: 'Fri', 5: 'Sat', 6: 'Sun'}
- # Process tweets, collect stats
- for i in tweet_df.index:
- hod = tweet_df.ix[i]['hod']
- dow = tweet_df.ix[i]['dow']
- imp = tweet_df.ix[i]['impressions']
- if hod in hod_dict:
- hod_dict[hod] += int(imp)
- hod_count[hod] += 1
- else:
- hod_dict[hod] = int(imp)
- hod_count[hod] = 1
- if dow in dow_dict:
- dow_dict[dow] += int(imp)
- dow_count[dow] += 1
- else:
- dow_dict[dow] = int(imp)
- dow_count[dow] = 1
- print 'Average impressions per tweet by hour tweeted:'
- print '----------------------------------------------'
- for hod in hod_dict:
- print hod, '-', hod+1, ':', hod_dict[hod]/hod_count[hod], '=>', hod_count[hod], 'tweets'
- print '\nAverage impressions per tweet by day of week tweeted:'
- print '-----------------------------------------------------'
- for dow in dow_dict:
- print weekday_dict[dow], ':', dow_dict[dow]/dow_count[dow], '=>', dow_count[dow], ' tweets'
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement