Advertisement
Guest User

Untitled

a guest
Nov 18th, 2019
145
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.77 KB | None | 0 0
  1. import sys
  2. import getopt
  3. from datetime import datetime, timedelta
  4. import pandas as pd
  5. from sqlalchemy import create_engine
  6.  
  7. # python test.py --start_dt='2019-09-24 18:00:00' --end_dt='2019-09-24 19:00:00'
  8.  
  9. if __name__ == "__main__":
  10.  
  11. unixOptions = "sdt:edt:"
  12. gnuOptions = ["start_dt=", "end_dt="]
  13.  
  14. fullCmdArguments = sys.argv
  15. argumentList = fullCmdArguments[1:]
  16.  
  17. try:
  18. arguments, values = getopt.getopt(argumentList, unixOptions, gnuOptions)
  19.  
  20. except getopt.error as err:
  21. print (str(err))
  22. sys.exit(2)
  23.  
  24. start_dt = ''
  25. end_dt = ''
  26.  
  27. for currentArgument, currentValue in arguments:
  28. if currentArgument in ("-sdt", "--start_dt"):
  29. start_dt = currentValue
  30. elif currentArgument in ("-edt", "--end_dt"):
  31. end_dt = currentValue
  32.  
  33. db_config = {'user': 'my_user',
  34. 'pwd': 'my_pass',
  35. 'host': 'localhost',
  36. 'port': 5432,
  37. 'db': 'zen'}
  38. engine = create_engine('postgresql://{}:{}@{}:{}/{}'.format(db_config['user'],
  39. db_config['pwd'],
  40. db_config['host'],
  41. db_config['port'],
  42. db_config['db']))
  43. query = '''
  44. SELECT event_id,
  45. age_segment,
  46. event,
  47. item_id,
  48. item_type,
  49. item_topic,
  50. source_id,
  51. source_type,
  52. source_topic,
  53. ts,
  54. TO_TIMESTAMP(ts / 1000) AT TIME ZONE 'Etc/UTC' AS dt,
  55. user_id
  56. FROM log_raw
  57. WHERE TO_TIMESTAMP(ts / 1000) AT TIME ZONE 'Etc/UTC' BETWEEN '{}'::TIMESTAMP AND '{}'::TIMESTAMP
  58. '''.format(start_dt, end_dt)
  59.  
  60. log_raw = pd.io.sql.read_sql(query, con = engine, index_col = 'event_id')
  61. log_raw['dt'] = pd.to_datetime(log_raw['ts']).dt.round('min')
  62.  
  63. print('start_dt=', start_dt, 'end_dt=', end_dt)
  64. print(log_raw.head(5))
  65.  
  66.  
  67. dash_visits = (log_raw.groupby(['item_topic',
  68. 'source_topic',
  69. 'age_segment',
  70. 'dt'])
  71. .agg({'user_id':'count'})
  72. .rename(columns = {'user_id':'visits'})
  73. .reset_index())
  74.  
  75.  
  76. dash_engagement = (log_raw.groupby(['item_topic',
  77. 'dt',
  78. 'event',
  79. 'age_segment'])
  80. .agg({'user_id':'sum'})
  81. .rename(columns = {'user_id':'unique_users'}))
  82. tables = {'dash_engagement' : dash_engagement, 'dash_visits' : dash_visits}
  83. print('Cool!done')
  84.  
  85.  
  86. '''
  87. for table_name, table_data in tables.items():
  88. query =
  89. DELETE FROM {} WHERE dt BETWEEN '{}'::TIMESTAMP AND '{}'::TIMESTAMP
  90. .format(table_name, start_dt, end_dt)
  91. engine.execute(query)
  92.  
  93. table_data.to_sql(name = table_name,
  94. con = engine,
  95. if_exists = 'append',
  96. index = False)
  97. '''
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement