Advertisement
Guest User

Untitled

a guest
Feb 20th, 2020
86
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.14 KB | None | 0 0
  1. # Теперь выберем из таблицы только те строки,
  2.     # которые были выпущены между start_dt и end_dt
  3.     query = ''' SELECT *, TO_TIMESTAMP(ts / 1000) AT TIME ZONE 'Etc/UTC' AS dt
  4.            FROM log_raw
  5.            WHERE TO_TIMESTAMP(ts / 1000) AT TIME ZONE 'Etc/UTC' BETWEEN '{}'::TIMESTAMP AND '{}'::TIMESTAMP
  6.        '''.format(start_dt, end_dt)
  7.    
  8.     data_raw = pd.io.sql.read_sql(query, con = engine, index_col = 'event_id')
  9.  
  10.  
  11.   #pd.to_datetime(df['date_time_column']).dt.round('min')
  12.  
  13.     columns_datetime = ['dt']
  14.  
  15.     for column in columns_datetime: data_raw[column] = pd.to_datetime(data_raw[column]).dt.round('min')
  16.  
  17.     #### агрегирующие таблицы
  18.    
  19.     dash_visits = data_raw.groupby(['item_topic', 'source_topic', 'age_segment', 'dt']).agg({'event': 'count'})
  20.    
  21.     dash_visits = dash_visits.rename(columns = {'event': 'visits'})
  22.  
  23.     dash_engagement = data_raw.groupby(['dt', 'item_topic', 'event', 'age_segment']).agg({'user_id': lambda x: x.nunique()})
  24.  
  25.     dash_engagement = dash_engagement.rename(columns = {'user_id': 'unique_users'})
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement