SHARE
TWEET

Untitled

a guest Feb 21st, 2020 102 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. import sys
  4.  
  5. import getopt
  6. from datetime import datetime
  7.  
  8. import pandas as pd
  9.  
  10. from sqlalchemy import create_engine
  11.  
  12. if __name__ == "__main__":
  13.  
  14.     #input params setup
  15.     unixOptions = "sdt:edt"  
  16.     gnuOptions = ["start_dt=", "end_dt="]
  17.  
  18.     fullCmdArguments = sys.argv
  19.     argumentList = fullCmdArguments[1:]    #excluding script name
  20.  
  21.     try:  
  22.         arguments, values = getopt.getopt(argumentList, unixOptions, gnuOptions)
  23.     except getopt.error as err:  
  24.         # output error, and return with an error code
  25.         print (str(err))
  26.         sys.exit(2)
  27.  
  28.     start_dt = '1981-01-01'
  29.     end_dt = '1998-01-01'    
  30.     for currentArgument, currentValue in arguments:  
  31.         if currentArgument in ("-sdt", "--start_dt"):
  32.             start_dt = currentValue                                  
  33.         elif currentArgument in ("-edt", "--end_dt"):
  34.             end_dt = currentValue  
  35.  
  36.     db_config = {'user': 'my_user',
  37.                  'pwd': 'my_user_password',
  38.                  'host': 'localhost',
  39.                  'port': 5432,
  40.                  'db': 'games'}  
  41.     connection_string = 'postgresql://{}:{}@{}:{}/{}'.format(db_config['user'],
  42.                                                              db_config['pwd'],
  43.                                                              db_config['host'],
  44.                                                              db_config['port'],
  45.                                                              db_config['db'])
  46.  
  47.     engine = create_engine(connection_string)
  48.            
  49.     query = ''' SELECT *
  50.                 FROM data_raw
  51.                 WHERE year_of_release::TIMESTAMP BETWEEN '{}'::TIMESTAMP AND '{}'::TIMESTAMP
  52.             '''.format(start_dt, end_dt)
  53.  
  54.     data_raw = pd.io.sql.read_sql(query, con = engine, index_col = 'game_id')
  55.  
  56.     columns_str = ['name', 'platform', 'genre', 'rating']
  57.     columns_numeric = ['na_players', 'eu_players', 'jp_players', 'other_players', 'critic_score', 'user_score']
  58.     columns_datetime = ['year_of_release']
  59.  
  60.     for column in columns_str: data_raw[column] = data_raw[column].astype(str)  
  61.     for column in columns_numeric: data_raw[column] = pd.to_numeric(data_raw[column], errors='coerce')
  62.     for column in columns_datetime: data_raw[column] = pd.to_datetime(data_raw[column])  
  63.     data_raw['total_copies_sold'] = data_raw[['na_players', 'eu_players', 'jp_players', 'other_players']].sum(axis = 1)
  64.  
  65.     agg_games_year_genre_platform = data_raw.groupby(['year_of_release', 'genre', 'platform']).agg({'name': 'count', 'total_copies_sold': 'sum'})
  66.     agg_games_year_score  = data_raw.groupby(['year_of_release', 'genre', 'platform']).agg({'critic_score': 'mean', 'user_score': 'mean'})
  67.  
  68.     agg_games_year_genre_platform = agg_games_year_genre_platform.rename(columns = {'name': 'count', 'total_copies_sold':'sum'})
  69.     agg_games_year_score = agg_games_year_score.rename(columns = {'critic_score': 'avg_critic_score', 'user_score': 'avg_user_score'})
  70.  
  71.     agg_games_year_genre_platform = agg_games_year_genre_platform.fillna(0).reset_index()
  72.     agg_games_year_score = agg_games_year_score.fillna(0).reset_index()  
  73.  
  74.     tables = {'agg_games_year_genre_platform': 'agg_games_year_genre_platform',
  75.               'agg_games_year_score': 'agg_games_year_score'}
  76.  
  77.     for table_name, table_data in tables.items():  
  78.  
  79.         query = '''
  80.                   DELETE FROM {} WHERE year_of_release BETWEEN '{}'::TIMESTAMP AND '{}'::TIMESTAMP
  81.                 '''.format(table_name, start_dt, end_dt)
  82.         engine.execute(query)
  83.  
  84.         table_data.to_sql(name = table_name, con = engine, if_exists = 'append', index = False)
  85.  
  86.     print('All done.')
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
Top