jroakes

bigquery_forecasting_load

Jun 28th, 2021 (edited)
325
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
MySQL 2.72 KB | None | 0 0
  1. -- Coding is For Losers is Awesome: https://github.com/coding-is-for-losers/ga4-bigquery-starter
  2.  
  3. -- Build ARIMA model
  4. CREATE OR REPLACE MODEL `forecasting.forecast_client_name`
  5.   OPTIONS(
  6.     MODEL_TYPE='ARIMA_PLUS',
  7.     TIME_SERIES_TIMESTAMP_COL='date',
  8.     TIME_SERIES_DATA_COL='sessions',
  9.     HOLIDAY_REGION='US'
  10.   ) AS
  11.   SELECT
  12.     date,
  13.     sessions,
  14.  
  15.   FROM
  16.     (
  17.         WITH raw_ga_4 AS (
  18.  
  19.             SELECT
  20.             * except(row)
  21.             FROM (
  22.             SELECT
  23.                 -- extracts date from source table
  24.                 parse_date('%Y%m%d',regexp_extract(_table_suffix,'[0-9]+')) as table_date,
  25.                 -- flag to indicate if source table is `events_intraday_`
  26.                 case when _table_suffix like '%intraday%' then true else false end as is_intraday,
  27.                 *,
  28.                 row_number() over (partition by user_pseudo_id, event_name, event_timestamp order by event_timestamp) as row
  29.             FROM
  30.                 `adapt-ml.analytics_XXXXXXXXXXXXXXX.events_*`
  31.                
  32.                 )
  33.             WHERE
  34.             row = 1
  35.  
  36.             ),
  37.  
  38.         pageviews AS (
  39.             SELECT
  40.                 parse_date("%Y%m%d", event_date) event_date,
  41.                 event_timestamp,
  42.                 user_pseudo_id,
  43.                 user_first_touch_timestamp,
  44.                 device.category as device_category,
  45.                 device.language as device_language,
  46.                 device.web_info.browser as device_browser,
  47.                 geo.continent as geo_continent,
  48.                 geo.country as geo_country,
  49.                 max(if(params.key = 'ga_session_id', params.value.int_value, null)) ga_session_id,
  50.                 max(if(params.key = 'ga_session_number', params.value.int_value, null)) ga_session_number,
  51.                 cast(max(if(params.key = 'session_engaged', params.value.string_value, null)) as int64) session_engaged,
  52.                 max(if(params.key = 'page_title', params.value.string_value, null)) page_title,
  53.                 max(if(params.key = 'page_location', params.value.string_value, null)) page_location,
  54.                 max(if(params.key = 'source', params.value.string_value, null)) utm_source,
  55.                 max(if(params.key = 'medium', params.value.string_value, null)) utm_medium,
  56.                 max(if(params.key = 'campaign', params.value.string_value, null)) utm_campaign,
  57.                 max(if(params.key = 'page_referrer', params.value.string_value, null)) utm_referrer
  58.                 FROM raw_ga_4,
  59.                 UNNEST(event_params) AS params
  60.                 WHERE event_name = 'page_view'
  61.  
  62.                 GROUP BY event_date, event_timestamp, user_pseudo_id, user_first_touch_timestamp, device_category, device_language, device_browser, geo_continent, geo_country
  63.                 )
  64.  
  65.  
  66.         SELECT
  67.         DISTINCT(event_date) as date,
  68.         COUNT(DISTINCT(ga_session_id)) AS sessions,
  69.         COUNT(DISTINCT(user_pseudo_id)) AS users
  70.         FROM pageviews
  71.         WHERE utm_medium = "organic"
  72.         GROUP BY event_date
  73.     )
Add Comment
Please, Sign In to add comment