Advertisement
semen_kamagurov

Untitled

Feb 17th, 2020
141
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.68 KB | None | 0 0
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. import sys
  4.  
  5. import getopt
  6. from datetime import datetime
  7.  
  8. import pandas as pd
  9.  
  10. from sqlalchemy import create_engine
  11.  
  12. if __name__ == "__main__":
  13.  
  14.     #input params setup
  15.     unixOptions = "sdt:edt"  
  16.     gnuOptions = ["start_dt=", "end_dt="]
  17.  
  18.     fullCmdArguments = sys.argv
  19.     argumentList = fullCmdArguments[1:]    #excluding script name
  20.  
  21.     try:  
  22.         arguments, values = getopt.getopt(argumentList, unixOptions, gnuOptions)
  23.     except getopt.error as err:  
  24.         # output error, and return with an error code
  25.         print (str(err))
  26.         sys.exit(2)
  27.  
  28.     start_dt = '1981-01-01'
  29.     end_dt = '1998-01-01'    
  30.     for currentArgument, currentValue in arguments:  
  31.         if currentArgument in ("-sdt", "--start_dt"):
  32.             start_dt = currentValue                                  
  33.         elif currentArgument in ("-edt", "--end_dt"):
  34.             end_dt = currentValue  
  35.  
  36.     db_config = {'user': 'my_user',
  37.                  'pwd': 'my_user_password',
  38.                  'host': 'localhost',
  39.                  'port': 5432,
  40.                  'db': 'games'}  
  41.     connection_string = 'postgresql://{}:{}@{}:{}/{}'.format(db_config['user'],
  42.                                                              db_config['pwd'],
  43.                                                              db_config['host'],
  44.                                                              db_config['port'],
  45.                                                              db_config['db'])
  46.  
  47.     engine = create_engine(connection_string)
  48.            
  49.     query = ''' SELECT *
  50.                FROM data_raw
  51.                WHERE year_of_release::TIMESTAMP BETWEEN '{}'::TIMESTAMP AND '{}'::TIMESTAMP
  52.            '''.format(start_dt, end_dt)
  53.  
  54.     data_raw = pd.io.sql.read_sql(query, con = engine, index_col = 'game_id')
  55.  
  56.     columns_str = ['name', 'platform', 'genre', 'rating']
  57.     columns_numeric = ['na_players', 'eu_players', 'jp_players', 'other_players', 'critic_score', 'user_score']
  58.     columns_datetime = ['year_of_release']
  59.  
  60.     for column in columns_str: data_raw[column] = data_raw[column].astype(str)  
  61.     for column in columns_numeric: data_raw[column] = pd.to_numeric(data_raw[column], errors='coerce')
  62.     for column in columns_datetime: data_raw[column] = pd.to_datetime(data_raw[column])  
  63.     data_raw['total_copies_sold'] = data_raw[['na_players', 'eu_players', 'jp_players', 'other_players']].sum(axis = 1)
  64.  
  65.     agg_games_year_genre_platform = data_raw.groupby(['year_of_release', 'genre', 'platform']).agg({'name': 'count', 'total_copies_sold': 'sum'})
  66.     agg_games_year_score  = data_raw.groupby(['year_of_release', 'genre', 'platform']).agg({'critic_score': 'mean', 'user_score': 'mean'})
  67.  
  68.     agg_games_year_genre_platform = agg_games_year_genre_platform.rename(columns = {'name': 'games'})
  69.     agg_games_year_score = agg_games_year_score.rename(columns = {'critic_score': 'avg_critic_score', 'user_score': 'avg_user_score'})
  70.  
  71.     agg_games_year_genre_platform = agg_games_year_genre_platform.fillna(0).reset_index()
  72.     agg_games_year_score = agg_games_year_score.fillna(0).reset_index()  
  73.  
  74.     tables = {'agg_games_year_genre_platform': 'agg_games_year_genre_platform',
  75.               'agg_games_year_score': 'agg_games_year_score'}
  76.  
  77.     for table_name, table_data in tables.items():  
  78.  
  79.         query = '''
  80.                  DELETE FROM {} WHERE year_of_release BETWEEN '{}'::TIMESTAMP AND '{}'::TIMESTAMP
  81.                '''.format(table_name, start_dt, end_dt)
  82.         engine.execute(query)
  83.  
  84.         data_raw.to_sql(name = 'table_name', con = engine, if_exists = 'append', index = False)
  85.  
  86.     print('All done.')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement