Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- SET hivevar:START_DATETIME='2017-11-01';
- SET hivevar:END_DATETIME='2017-11-08';
- CREATE TABLE sandbox.yurbasov_proto12
- AS
- SELECT A.*, B.start_mode_time, B.stop_mode_time FROM
- (SELECT hash_msisdn
- ,hash_mac
- ,route_id
- ,start_station_id
- ,start_station_nm
- ,start_line_id
- ,start_line_nm
- ,stop_station_id
- ,stop_station_nm
- ,stop_line_id
- ,stop_line_nm
- ,day_of_week_num
- ,day_of_week_nm
- ,round(avg(session_dur), 2) AS avg_session_dur
- ,COUNT(start_session_dttm) AS total_trip_cnt
- ,COUNT(DISTINCT start_session_dt) AS total_day_cnt
- ,percentile(CAST(prev_session_delta*100 AS BIGINT), 0.5) * 0.01 AS med_prev_session_delta
- ,percentile(CAST(next_session_delta*100 AS BIGINT), 0.5) * 0.01 AS med_next_session_delta
- ,SUM(first_one) AS first_session_cnt
- ,SUM(last_one) AS last_session_cnt
- ,MIN(start_session_time) AS start_min_time
- ,MAX(start_session_time) AS start_max_time
- ,from_unixtime(CAST(percentile(unix_timestamp(start_session_time), 0.5) AS BIGINT)) AS start_med_time
- ,MIN(stop_session_time) AS stop_min_time
- ,MAX(stop_session_time) AS stop_max_time
- ,from_unixtime(CAST(percentile(unix_timestamp(stop_session_time), 0.5) AS BIGINT)) AS stop_med_time
- ,CAST(${hivevar:START_DATETIME} AS DATE) AS process_from_dttm
- ,CAST(${hivevar:END_DATETIME} AS DATE) AS process_to_dttm
- ,CURRENT_TIMESTAMP() AS process_dttm
- FROM (
- SELECT *
- , CASE WHEN session_num = 1 THEN 1 ELSE 0 END AS first_one
- , CASE WHEN last_session_flag = 1 THEN 1 ELSE 0 END AS last_one
- FROM ic_calc.maxima_f_subway_trip
- WHERE start_session_dttm >= ${hivevar:START_DATETIME}
- AND start_session_dttm <= ${hivevar:END_DATETIME}
- ) AS T
- GROUP BY
- hash_msisdn
- ,hash_mac
- ,route_id
- ,start_station_id
- ,start_station_nm
- ,start_line_id
- ,start_line_nm
- ,stop_station_id
- ,stop_station_nm
- ,stop_line_id
- ,stop_line_nm
- ,day_of_week_num
- ,day_of_week_nm
- ) AS A
- LEFT JOIN
- (SELECT hash_msisdn
- ,hash_mac
- ,route_id
- ,start_station_id
- ,start_station_nm
- ,start_line_id
- ,start_line_nm
- ,stop_station_id
- ,stop_station_nm
- ,stop_line_id
- ,stop_line_nm
- ,day_of_week_num
- ,day_of_week_nm
- ,start_mode
- ,stop_mode
- ,first_value(start_mode) OVER (partition BY hash_msisdn
- ,hash_mac
- ,route_id
- ,start_station_id
- ,start_station_nm
- ,start_line_id
- ,start_line_nm
- ,stop_station_id
- ,stop_station_nm
- ,stop_line_id
- ,stop_line_nm
- ,day_of_week_num
- ,day_of_week_nm ORDER BY start_cnt DESC) AS start_mode_time
- ,first_value(stop_mode) OVER (partition BY hash_msisdn
- ,hash_mac
- ,route_id
- ,start_station_id
- ,start_station_nm
- ,start_line_id
- ,start_line_nm
- ,stop_station_id
- ,stop_station_nm
- ,stop_line_id
- ,stop_line_nm
- ,day_of_week_num
- ,day_of_week_nm ORDER BY stop_cnt DESC) AS stop_mode_time
- FROM
- (SELECT hash_msisdn
- ,hash_mac
- ,route_id
- ,start_station_id
- ,start_station_nm
- ,start_line_id
- ,start_line_nm
- ,stop_station_id
- ,stop_station_nm
- ,stop_line_id
- ,stop_line_nm
- ,day_of_week_num
- ,day_of_week_nm
- ,start_mode
- ,stop_mode
- ,COUNT(start_mode) AS start_cnt
- ,COUNT(stop_mode) AS stop_cnt
- FROM (
- SELECT from_unixtime(CAST(unix_timestamp(start_session_time) / 900 AS BIGINT) * 900) AS start_mode
- ,from_unixtime(CAST(unix_timestamp(stop_session_time) / 900 AS BIGINT) * 900) AS stop_mode
- ,hash_msisdn
- ,hash_mac
- ,route_id
- ,start_station_id
- ,start_station_nm
- ,start_line_id
- ,start_line_nm
- ,stop_station_id
- ,stop_station_nm
- ,stop_line_id
- ,stop_line_nm
- ,day_of_week_num
- ,day_of_week_nm
- FROM ic_calc.maxima_f_subway_trip
- ) AS BT
- GROUP BY hash_msisdn
- ,hash_mac
- ,route_id
- ,start_station_id
- ,start_station_nm
- ,start_line_id
- ,start_line_nm
- ,stop_station_id
- ,stop_station_nm
- ,stop_line_id
- ,stop_line_nm
- ,day_of_week_num
- ,day_of_week_nm
- ,start_mode
- ,stop_mode
- ) AS BX
- ) AS B
- ON A.hash_msisdn = B.hash_msisdn
- AND A.hash_mac = B.hash_mac
- AND A.route_id = B.route_id
- AND A.start_station_id = B.start_station_id
- AND A.start_station_nm = B.start_station_nm
- AND A.start_line_id = B.start_line_id
- AND A.start_line_nm = B.start_line_nm
- AND A.stop_station_id = B.stop_station_id
- AND A.stop_station_nm = B.stop_station_nm
- AND A.stop_line_id = B.stop_line_id
- AND A.stop_line_nm = B.stop_line_nm
- AND A.day_of_week_num = B.day_of_week_num
- AND A.day_of_week_nm = B.day_of_week_nm
Advertisement
Add Comment
Please, Sign In to add comment