Advertisement
12311k

Untitled

Sep 1st, 2021
41
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.76 KB | None | 0 0
  1. #!/usr/bin/python
  2. import sys
  3.  
  4. import getopt
  5. from datetime import datetime
  6.  
  7. import pandas as pd
  8.  
  9. from sqlalchemy import create_engine
  10.  
  11. if __name__ == "__main__":
  12.  
  13. unixOptions = "sdt:edt:"
  14. gnuOptions = ["start_dt=", "end_dt="]
  15.  
  16. fullCmdArguments = sys.argv
  17. argumentList = fullCmdArguments[1:]
  18.  
  19. try:
  20. arguments, values = getopt.getopt(argumentList, unixOptions, gnuOptions)
  21. except getopt.error as err:
  22. print (str(err))
  23. sys.exit(2)
  24.  
  25. start_dt = '1981-01-01'
  26. end_dt = '1998-01-01'
  27. for currentArgument, currentValue in arguments:
  28. if currentArgument in ("-sdt", "--start_dt"):
  29. start_dt = currentValue
  30. elif currentArgument in ("-edt", "--end_dt"):
  31. end_dt = currentValue
  32.  
  33. db_config = {'user': 'my_user',
  34. 'pwd': 'my_user_password',
  35. 'host': 'localhost',
  36. 'port': 5432,
  37. 'db': 'games'}
  38. connection_string = 'postgresql://{}:{}@{}:{}/{}'.format(db_config['user'],
  39. db_config['pwd'],
  40. db_config['host'],
  41. db_config['port'],
  42. db_config['db'])
  43.  
  44. engine = create_engine(connection_string)
  45.  
  46. query = ''' SELECT *
  47. FROM data_raw
  48. WHERE year_of_release::TIMESTAMP BETWEEN '{}'::TIMESTAMP AND '{}'::TIMESTAMP
  49. '''.format(start_dt, end_dt)
  50.  
  51. data_raw = pd.io.sql.read_sql(query, con = engine, index_col = 'game_id')
  52.  
  53. columns_str = ['name', 'platform', 'genre', 'rating']
  54. columns_numeric = ['na_players', 'eu_players', 'jp_players', 'other_players', 'critic_score', 'user_score']
  55. columns_datetime = ['year_of_release']
  56.  
  57. for column in columns_str: data_raw[column] = data_raw[column].astype(str)
  58. for column in columns_numeric: data_raw[column] = pd.to_numeric(data_raw[column], errors='coerce')
  59. for column in columns_datetime: data_raw[column] = pd.to_datetime(data_raw[column])
  60. data_raw['total_copies_sold'] = data_raw[['na_players', 'eu_players', 'jp_players', 'other_players']].sum(axis = 1)
  61.  
  62. agg_games_year_genre_platform = data_raw.groupby(['year_of_release', 'genre', 'platform']).agg({'name': 'count', 'total_copies_sold': 'sum'})
  63. agg_games_year_score = data_raw.groupby(['year_of_release', 'genre', 'platform']).agg({'critic_score': 'mean', 'user_score': 'mean'})
  64.  
  65. print(data_raw.info())
  66. print(agg_games_year_genre_platform.head(5))
  67. print(agg_games_year_score.head(5))
  68.  
  69. print(data_raw.info())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement