Advertisement
Guest User

Untitled

a guest
Feb 21st, 2020
1,073
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.70 KB | None | 0 0
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. import sys
  4.  
  5. import getopt
  6. from datetime import datetime
  7.  
  8. import pandas as pd
  9.  
  10. from sqlalchemy import create_engine
  11.  
  12. if __name__ == "__main__":
  13.  
  14. #input params setup
  15. unixOptions = "sdt:edt"
  16. gnuOptions = ["start_dt=", "end_dt="]
  17.  
  18. fullCmdArguments = sys.argv
  19. argumentList = fullCmdArguments[1:] #excluding script name
  20.  
  21. try:
  22. arguments, values = getopt.getopt(argumentList, unixOptions, gnuOptions)
  23. except getopt.error as err:
  24. # output error, and return with an error code
  25. print (str(err))
  26. sys.exit(2)
  27.  
  28. start_dt = '1981-01-01'
  29. end_dt = '1998-01-01'
  30. for currentArgument, currentValue in arguments:
  31. if currentArgument in ("-sdt", "--start_dt"):
  32. start_dt = currentValue
  33. elif currentArgument in ("-edt", "--end_dt"):
  34. end_dt = currentValue
  35.  
  36. db_config = {'user': 'my_user',
  37. 'pwd': 'my_user_password',
  38. 'host': 'localhost',
  39. 'port': 5432,
  40. 'db': 'games'}
  41. connection_string = 'postgresql://{}:{}@{}:{}/{}'.format(db_config['user'],
  42. db_config['pwd'],
  43. db_config['host'],
  44. db_config['port'],
  45. db_config['db'])
  46.  
  47. engine = create_engine(connection_string)
  48.  
  49. query = ''' SELECT *
  50. FROM data_raw
  51. WHERE year_of_release::TIMESTAMP BETWEEN '{}'::TIMESTAMP AND '{}'::TIMESTAMP
  52. '''.format(start_dt, end_dt)
  53.  
  54. data_raw = pd.io.sql.read_sql(query, con = engine, index_col = 'game_id')
  55.  
  56. columns_str = ['name', 'platform', 'genre', 'rating']
  57. columns_numeric = ['na_players', 'eu_players', 'jp_players', 'other_players', 'critic_score', 'user_score']
  58. columns_datetime = ['year_of_release']
  59.  
  60. for column in columns_str: data_raw[column] = data_raw[column].astype(str)
  61. for column in columns_numeric: data_raw[column] = pd.to_numeric(data_raw[column], errors='coerce')
  62. for column in columns_datetime: data_raw[column] = pd.to_datetime(data_raw[column])
  63. data_raw['total_copies_sold'] = data_raw[['na_players', 'eu_players', 'jp_players', 'other_players']].sum(axis = 1)
  64.  
  65. agg_games_year_genre_platform = data_raw.groupby(['year_of_release', 'genre', 'platform']).agg({'name': 'count', 'total_copies_sold': 'sum'})
  66. agg_games_year_score = data_raw.groupby(['year_of_release', 'genre', 'platform']).agg({'critic_score': 'mean', 'user_score': 'mean'})
  67.  
  68. agg_games_year_genre_platform = agg_games_year_genre_platform.rename(columns = {'name': 'count', 'total_copies_sold':'sum'})
  69. agg_games_year_score = agg_games_year_score.rename(columns = {'critic_score': 'avg_critic_score', 'user_score': 'avg_user_score'})
  70.  
  71. agg_games_year_genre_platform = agg_games_year_genre_platform.fillna(0).reset_index()
  72. agg_games_year_score = agg_games_year_score.fillna(0).reset_index()
  73.  
  74. tables = {'agg_games_year_genre_platform': 'agg_games_year_genre_platform',
  75. 'agg_games_year_score': 'agg_games_year_score'}
  76.  
  77. for table_name, table_data in tables.items():
  78.  
  79. query = '''
  80. DELETE FROM {} WHERE year_of_release BETWEEN '{}'::TIMESTAMP AND '{}'::TIMESTAMP
  81. '''.format(table_name, start_dt, end_dt)
  82. engine.execute(query)
  83.  
  84. table_data.to_sql(name = table_name, con = engine, if_exists = 'append', index = False)
  85.  
  86. print('All done.')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement