Advertisement
add1ctus

extract_features.py

Apr 14th, 2016
128
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.65 KB | None | 0 0
  1. #!/usr/bin/env python
  2.  
  3. import bz2
  4. import json
  5. import pandas
  6. import collections
  7. import argparse
  8.  
  9.  
  10. def last_value(series, times, time_point=60*5):
  11.     values = [v for t, v in zip(times, series) if t <= time_point]
  12.     return values[-1] if len(values) > 0 else 0
  13.  
  14. def filter_events(events, time_point=60*5):
  15.     return [event for event in events if event['time'] <= time_point]
  16.  
  17. def extract_match_features(match, time_point=None):
  18.     extract_items_time = [
  19.         (41, 'bottle'),
  20.         (45, 'courier'),
  21.         (84, 'flying_courier'),
  22.     ]
  23.     extract_items_count = [
  24.         (46, 'tpscroll'),
  25.         (29, 'boots'),
  26.         (42, 'ward_observer'),
  27.         (43, 'ward_sentry'),
  28.     ]
  29.    
  30.     feats = [
  31.         ('match_id', match['match_id']),
  32.         ('start_time', match['start_time']),
  33.         ('lobby_type', match['lobby_type']),
  34.     ]
  35.    
  36.     # player features
  37.    
  38.     times = match['times']
  39.     for player_index, player in enumerate(match['players']):
  40.         player_id = ('r%d' % (player_index+1)) if player_index < 5 else ('d%d' % (player_index-4))
  41.        
  42.         feats += [
  43.             (player_id + '_hero', player['hero_id']),
  44.             (player_id + '_level', max([0] + [entry['level'] for entry in filter_events(player['ability_upgrades'], time_point)])),
  45.             (player_id + '_xp', last_value(player['xp_t'], times, time_point)),
  46.             (player_id + '_gold', last_value(player['gold_t'], times, time_point)),
  47.             (player_id + '_lh', last_value(player['lh_t'], times, time_point)),
  48.             (player_id + '_kills', len(filter_events(player['kills_log'], time_point))),
  49.             (player_id + '_deaths', len([
  50.                     1
  51.                     for other_player in match['players']
  52.                     for event in filter_events(other_player['kills_log'], time_point)
  53.                     if event['player'] == player_index  
  54.                 ])),
  55.             (player_id + '_items', len(filter_events(player['purchase_log'], time_point))),
  56.         ]
  57.        
  58.     # first blood
  59.     first_blood_objectives = filter_events([obj for obj in match['objectives'] if obj['type'] == 'firstblood'], time_point)
  60.     fb = first_blood_objectives[0] if len(first_blood_objectives) > 0 else {}
  61.     feats += [
  62.         ('first_blood_time', fb.get('time')),
  63.         ('first_blood_team', int(fb['player1'] >= 5) if fb.get('player1') is not None else None),
  64.         ('first_blood_player1', fb.get('player1')),
  65.         ('first_blood_player2', fb.get('player2')),
  66.     ]
  67.    
  68.     # team features
  69.     radiant_players = match['players'][:5]
  70.     dire_players = match['players'][5:]
  71.    
  72.     for team, team_players in (('radiant', radiant_players), ('dire', dire_players)):
  73.         for item_id, item_name in extract_items_time:
  74.             item_times = [
  75.                 entry['time']
  76.                 for player in team_players
  77.                 for entry in filter_events(player['purchase_log'], time_point)
  78.                 if entry['item_id'] == item_id
  79.             ]
  80.             first_item_time = min(item_times) if len(item_times) > 0 else None
  81.             feats += [
  82.                 ('%s_%s_time' % (team, item_name), first_item_time)
  83.             ]
  84.            
  85.         for item_id, item_name in extract_items_count:
  86.             item_count = sum([
  87.                 1
  88.                 for player in team_players
  89.                 for entry in filter_events(player['purchase_log'], time_point)
  90.                 if entry['item_id'] == item_id
  91.             ])
  92.             feats += [
  93.                 ('%s_%s_count' % (team, item_name), item_count)
  94.             ]
  95.            
  96.         team_wards = filter_events([
  97.             entry
  98.             for player in team_players
  99.             for entry in (player['obs_log'] + player['sen_log'])
  100.         ], time_point)
  101.        
  102.         feats += [
  103.             ('%s_first_ward_time' % team, min([entry['time'] for entry in team_wards]) if len(team_wards) > 0 else None),
  104.         ]
  105.  
  106.     if 'finish' in match:
  107.         finish = match['finish']
  108.         feats += [
  109.             ('duration', finish['duration']),
  110.             ('radiant_win', int(finish['radiant_win'])),
  111.             ('tower_status_radiant', finish['tower_status_radiant']),
  112.             ('tower_status_dire', finish['tower_status_dire']),
  113.             ('barracks_status_radiant', finish['barracks_status_radiant']),
  114.             ('barracks_status_dire', finish['barracks_status_dire']),
  115.         ]
  116.  
  117.     return collections.OrderedDict(feats)
  118.  
  119.  
  120. def iterate_matches(matches_filename):
  121.     with bz2.BZ2File(matches_filename) as f:
  122.         for n, line in enumerate(f):
  123.             match = json.loads(line.decode('utf-8'))
  124.             yield match
  125.             if (n+1) % 1000 == 0:
  126.                 print ('Processed ',(n+1),' matches')
  127.  
  128.                
  129. def create_table(matches_filename, time_point):
  130.     df = {}
  131.     fields = None
  132.     for match in iterate_matches(matches_filename):
  133.         features = extract_match_features(match, time_point)
  134.         if fields is None:
  135.             fields = features.keys()
  136.             df = {key: [] for key in fields}    
  137.         for key, value in features.items():
  138.             df[key].append(value)
  139.     df = pandas.DataFrame.from_records(df).ix[:, fields].set_index('match_id').sort_index()
  140.     return df
  141.  
  142.  
  143. if __name__ == '__main__':
  144.     parser = argparse.ArgumentParser(description='Extract features from matches data')
  145.     parser.add_argument('input_matches')
  146.     parser.add_argument('output_csv')
  147.     parser.add_argument('--time', type=int, default=5*60)
  148.     args = parser.parse_args()
  149.    
  150.     features_table = create_table(args.input_matches, args.time)
  151.     features_table.to_csv(args.output_csv)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement