Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- import sys
- import getopt
- from datetime import datetime
- import pandas as pd
- from sqlalchemy import create_engine
- if __name__ == "__main__":
- unixOptions = "sdt:edt"
- gnuOptions = ["start_dt=", "end_dt="]
- fullCmdArguments = sys.argv
- argumentList = fullCmdArguments[1:]
- try:
- arguments, values = getopt.getopt(argumentList, unixOptions, gnuOptions)
- except getopt.error as err:
- print(str(err))
- sys.exit(2)
- start_dt = '1981-01-01'
- end_dt = '1998-01-01'
- for currentArgument, currentValue in arguments:
- if currentArgument in ("-sdt", "--start_dt"):
- start_dt = currentValue
- elif currentArgument in ("-edt", "--end_dt"):
- end_dt = currentValue
- db_config = {'user': 'my_user',
- 'pwd': 'my_user_password',
- 'host': 'localhost',
- 'port': 5432,
- 'db': 'games'}
- connection_string = 'postgresql://{}:{}@{}:{}/{}'.format(db_config['user'],
- db_config['pwd'],
- db_config['host'],
- db_config['port'],
- db_config['db'])
- engine = create_engine(connection_string)
- query = ''' SELECT *
- FROM data_raw
- WHERE year_of_release::TIMESTAMP BETWEEN '{}'::TIMESTAMP AND '{}'::TIMESTAMP
- '''.format(start_dt, end_dt)
- data_raw = pd.io.sql.read_sql(query, con=engine, index_col='game_id')
- columns_str = [...]
- columns_numeric = [...]
- columns_datetime = [...]
- for column in columns_str: data_raw[column] = data_raw[column].astype(str)
- for column in columns_numeric: data_raw[column] = pd.to_numeric(data_raw[column], errors='coerce')
- for column in columns_datetime: data_raw[column] = pd.to_datetime(data_raw[column])
- data_raw['total_copies_sold'] = data_raw[[...]].sum(axis=1)
- agg_games_year_genre_platform = data_raw.groupby([...]).agg({'name': 'count', 'total_copies_sold': 'sum'})
- agg_games_year_score = data_raw.groupby([...]).agg(
- {'critic_score': 'mean', 'user_score': 'mean', 'total_copies_sold': 'sum'})
- print(data_raw.info())
- print(agg_games_year_genre_platform.head(5))
- print(agg_games_year_score.head(5))
- print(data_raw.info())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement