Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import sys
- import getopt
- from datetime import datetime, timedelta
- import pandas as pd
- from sqlalchemy import create_engine
- # python test.py --start_dt='2019-09-24 18:00:00' --end_dt='2019-09-24 19:00:00'
- if __name__ == "__main__":
- unixOptions = "sdt:edt:"
- gnuOptions = ["start_dt=", "end_dt="]
- fullCmdArguments = sys.argv
- argumentList = fullCmdArguments[1:]
- try:
- arguments, values = getopt.getopt(argumentList, unixOptions, gnuOptions)
- except getopt.error as err:
- print (str(err))
- sys.exit(2)
- start_dt = ''
- end_dt = ''
- for currentArgument, currentValue in arguments:
- if currentArgument in ("-sdt", "--start_dt"):
- start_dt = currentValue
- elif currentArgument in ("-edt", "--end_dt"):
- end_dt = currentValue
- db_config = {'user': 'my_user',
- 'pwd': 'my_pass',
- 'host': 'localhost',
- 'port': 5432,
- 'db': 'zen'}
- engine = create_engine('postgresql://{}:{}@{}:{}/{}'.format(db_config['user'],
- db_config['pwd'],
- db_config['host'],
- db_config['port'],
- db_config['db']))
- query = '''
- SELECT event_id,
- age_segment,
- event,
- item_id,
- item_type,
- item_topic,
- source_id,
- source_type,
- source_topic,
- ts,
- TO_TIMESTAMP(ts / 1000) AT TIME ZONE 'Etc/UTC' AS dt,
- user_id
- FROM log_raw
- WHERE TO_TIMESTAMP(ts / 1000) AT TIME ZONE 'Etc/UTC' BETWEEN '{}'::TIMESTAMP AND '{}'::TIMESTAMP
- '''.format(start_dt, end_dt)
- log_raw = pd.io.sql.read_sql(query, con = engine, index_col = 'event_id')
- log_raw['dt'] = pd.to_datetime(log_raw['ts']).dt.round('min')
- print('start_dt=', start_dt, 'end_dt=', end_dt)
- print(log_raw.head(5))
- dash_visits = (log_raw.groupby(['item_topic',
- 'source_topic',
- 'age_segment',
- 'dt'])
- .agg({'user_id':'count'})
- .rename(columns = {'user_id':'visits'})
- .reset_index())
- dash_engagement = (log_raw.groupby(['item_topic',
- 'dt',
- 'event',
- 'age_segment'])
- .agg({'user_id':'sum'})
- .rename(columns = {'user_id':'unique_users'}))
- tables = {'dash_engagement' : dash_engagement, 'dash_visits' : dash_visits}
- print('Cool!done')
- '''
- for table_name, table_data in tables.items():
- query =
- DELETE FROM {} WHERE dt BETWEEN '{}'::TIMESTAMP AND '{}'::TIMESTAMP
- .format(table_name, start_dt, end_dt)
- engine.execute(query)
- table_data.to_sql(name = table_name,
- con = engine,
- if_exists = 'append',
- index = False)
- '''
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement