GuestRT

Algo-12-proto

May 25th, 2018
191
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
SQL 4.48 KB | None | 0 0
  1. SET hivevar:START_DATETIME='2017-11-01';
  2. SET hivevar:END_DATETIME='2017-11-08';
  3. CREATE TABLE sandbox.yurbasov_proto12
  4. AS
  5. SELECT A.*, B.start_mode_time, B.stop_mode_time FROM
  6. (SELECT hash_msisdn
  7.     ,hash_mac
  8.     ,route_id
  9.     ,start_station_id
  10.     ,start_station_nm
  11.     ,start_line_id
  12.     ,start_line_nm
  13.     ,stop_station_id
  14.     ,stop_station_nm
  15.     ,stop_line_id
  16.     ,stop_line_nm
  17.     ,day_of_week_num
  18.     ,day_of_week_nm
  19.     ,round(avg(session_dur), 2) AS avg_session_dur
  20.     ,COUNT(start_session_dttm) AS total_trip_cnt
  21.     ,COUNT(DISTINCT start_session_dt) AS total_day_cnt
  22.     ,percentile(CAST(prev_session_delta*100 AS BIGINT), 0.5) * 0.01 AS med_prev_session_delta
  23.     ,percentile(CAST(next_session_delta*100 AS BIGINT), 0.5) * 0.01 AS med_next_session_delta
  24.     ,SUM(first_one) AS first_session_cnt
  25.     ,SUM(last_one) AS last_session_cnt
  26.     ,MIN(start_session_time) AS start_min_time
  27.     ,MAX(start_session_time) AS start_max_time
  28.     ,from_unixtime(CAST(percentile(unix_timestamp(start_session_time), 0.5) AS BIGINT)) AS start_med_time
  29.     ,MIN(stop_session_time) AS stop_min_time
  30.     ,MAX(stop_session_time) AS stop_max_time
  31.     ,from_unixtime(CAST(percentile(unix_timestamp(stop_session_time), 0.5) AS BIGINT)) AS stop_med_time
  32.     ,CAST(${hivevar:START_DATETIME} AS DATE) AS process_from_dttm
  33.     ,CAST(${hivevar:END_DATETIME} AS DATE) AS process_to_dttm
  34.     ,CURRENT_TIMESTAMP() AS process_dttm
  35. FROM (
  36.     SELECT *
  37.     , CASE WHEN session_num = 1 THEN 1 ELSE 0 END AS first_one
  38.     , CASE WHEN last_session_flag = 1 THEN 1 ELSE 0 END AS last_one
  39.         FROM ic_calc.maxima_f_subway_trip
  40.         WHERE start_session_dttm >= ${hivevar:START_DATETIME}
  41.         AND start_session_dttm <= ${hivevar:END_DATETIME}
  42.     ) AS T
  43. GROUP BY
  44.     hash_msisdn
  45.     ,hash_mac
  46.     ,route_id
  47.     ,start_station_id
  48.     ,start_station_nm
  49.     ,start_line_id
  50.     ,start_line_nm
  51.     ,stop_station_id
  52.     ,stop_station_nm
  53.     ,stop_line_id
  54.     ,stop_line_nm
  55.     ,day_of_week_num
  56.     ,day_of_week_nm
  57. ) AS A
  58. LEFT JOIN
  59. (SELECT hash_msisdn
  60.             ,hash_mac
  61.             ,route_id
  62.             ,start_station_id
  63.             ,start_station_nm
  64.             ,start_line_id
  65.             ,start_line_nm
  66.             ,stop_station_id
  67.             ,stop_station_nm
  68.             ,stop_line_id
  69.             ,stop_line_nm
  70.             ,day_of_week_num
  71.             ,day_of_week_nm
  72.             ,start_mode
  73.             ,stop_mode
  74.                 ,first_value(start_mode) OVER (partition BY hash_msisdn
  75.                 ,hash_mac
  76.                 ,route_id
  77.                 ,start_station_id
  78.                 ,start_station_nm
  79.                 ,start_line_id
  80.                 ,start_line_nm
  81.                 ,stop_station_id
  82.                 ,stop_station_nm
  83.                 ,stop_line_id
  84.                 ,stop_line_nm
  85.                 ,day_of_week_num
  86.                 ,day_of_week_nm ORDER BY start_cnt DESC) AS start_mode_time
  87.                 ,first_value(stop_mode) OVER (partition BY hash_msisdn
  88.                 ,hash_mac
  89.                 ,route_id
  90.                 ,start_station_id
  91.                 ,start_station_nm
  92.                 ,start_line_id
  93.                 ,start_line_nm
  94.                 ,stop_station_id
  95.                 ,stop_station_nm
  96.                 ,stop_line_id
  97.                 ,stop_line_nm
  98.                 ,day_of_week_num
  99.                 ,day_of_week_nm ORDER BY stop_cnt DESC) AS stop_mode_time
  100.     FROM
  101.     (SELECT hash_msisdn
  102.             ,hash_mac
  103.             ,route_id
  104.             ,start_station_id
  105.             ,start_station_nm
  106.             ,start_line_id
  107.             ,start_line_nm
  108.             ,stop_station_id
  109.             ,stop_station_nm
  110.             ,stop_line_id
  111.             ,stop_line_nm
  112.             ,day_of_week_num
  113.             ,day_of_week_nm
  114.             ,start_mode
  115.             ,stop_mode
  116.             ,COUNT(start_mode) AS start_cnt
  117.             ,COUNT(stop_mode) AS stop_cnt
  118.         FROM (
  119.             SELECT from_unixtime(CAST(unix_timestamp(start_session_time) / 900 AS BIGINT) * 900) AS start_mode
  120.                 ,from_unixtime(CAST(unix_timestamp(stop_session_time) / 900 AS BIGINT) * 900) AS stop_mode
  121.                 ,hash_msisdn
  122.                 ,hash_mac
  123.                 ,route_id
  124.                 ,start_station_id
  125.                 ,start_station_nm
  126.                 ,start_line_id
  127.                 ,start_line_nm
  128.                 ,stop_station_id
  129.                 ,stop_station_nm
  130.                 ,stop_line_id
  131.                 ,stop_line_nm
  132.                 ,day_of_week_num
  133.                 ,day_of_week_nm
  134.             FROM ic_calc.maxima_f_subway_trip
  135.         ) AS BT
  136.         GROUP BY hash_msisdn
  137.                 ,hash_mac
  138.                 ,route_id
  139.                 ,start_station_id
  140.                 ,start_station_nm
  141.                 ,start_line_id
  142.                 ,start_line_nm
  143.                 ,stop_station_id
  144.                 ,stop_station_nm
  145.                 ,stop_line_id
  146.                 ,stop_line_nm
  147.                 ,day_of_week_num
  148.                 ,day_of_week_nm
  149.                 ,start_mode
  150.                 ,stop_mode
  151.     ) AS BX
  152. ) AS B
  153. ON A.hash_msisdn = B.hash_msisdn
  154.     AND A.hash_mac = B.hash_mac
  155.     AND A.route_id = B.route_id
  156.     AND A.start_station_id = B.start_station_id
  157.     AND A.start_station_nm = B.start_station_nm
  158.     AND A.start_line_id = B.start_line_id
  159.     AND A.start_line_nm = B.start_line_nm
  160.     AND A.stop_station_id = B.stop_station_id
  161.     AND A.stop_station_nm = B.stop_station_nm
  162.     AND A.stop_line_id = B.stop_line_id
  163.     AND A.stop_line_nm = B.stop_line_nm
  164.     AND A.day_of_week_num = B.day_of_week_num
  165.     AND A.day_of_week_nm = B.day_of_week_nm
Advertisement
Add Comment
Please, Sign In to add comment