Advertisement
GiacomoGalanti

Attribution_Model GA4

Jan 5th, 2024
42
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.33 KB | None | 0 0
  1. with variables as (
  2. select
  3. 30 as conversion_period,
  4. 90 as attribution_window),
  5.  
  6. conversions as (
  7. select
  8. user_pseudo_id,
  9. (select value.int_value from unnest(event_params) where key = 'ga_session_id') as session_id,
  10. concat(user_pseudo_id,event_timestamp) as conversion_id,
  11. max(timestamp_micros(event_timestamp)) as conversion_timestamp,
  12. row_number() over (partition by user_pseudo_id order by max(timestamp_micros(event_timestamp)) desc) as conversion_rank
  13. from
  14. `ga4bigquery.analytics_250794857.events_*`,
  15. variables
  16. where
  17. _table_suffix between format_date('%Y%m%d',date_sub(current_date(), interval conversion_period day))
  18. and format_date('%Y%m%d',date_sub(current_date(), interval 1 day))
  19. and event_name = 'purchase'
  20. group by
  21. user_pseudo_id,
  22. session_id,
  23. conversion_id
  24. qualify
  25. conversion_rank = 1),
  26.  
  27. sessions as (
  28. select
  29. user_pseudo_id,
  30. (select value.int_value from unnest(event_params) where key = 'ga_session_id') as session_id,
  31. min(timestamp_micros(event_timestamp)) as session_start_timestamp,
  32. max(concat(coalesce((select value.string_value from unnest(event_params) where key = 'source'),'(direct)'),' / ',coalesce((select value.string_value from unnest(event_params) where key = 'medium'),'(none)'))) as source_medium
  33. from
  34. `ga4bigquery.analytics_250794857.events_*`,
  35. variables
  36. where
  37. _table_suffix between format_date('%Y%m%d',date_sub(current_date(), interval attribution_window + conversion_period day))
  38. and format_date('%Y%m%d',date_sub(current_date(), interval 1 day))
  39. and user_pseudo_id in (select distinct user_pseudo_id from conversions)
  40. group by
  41. user_pseudo_id,
  42. session_id),
  43.  
  44. sessions_joined as (
  45. select
  46. sessions.*,
  47. conversions.conversion_id,
  48. conversions.conversion_timestamp
  49. from
  50. sessions
  51. left join conversions on sessions.user_pseudo_id = conversions.user_pseudo_id
  52. and sessions.session_id = conversions.session_id),
  53.  
  54. attribution_raw as (
  55. select
  56. *,
  57. count(distinct session_id) over (partition by user_pseudo_id) as total_sessions_per_user,
  58. rank() over (partition by user_pseudo_id order by session_start_timestamp) as session_number
  59. from
  60. sessions_joined as sessions,
  61. variables
  62. where
  63. session_start_timestamp <= (select max(conversion_timestamp) from sessions_joined where sessions.user_pseudo_id = sessions_joined.user_pseudo_id)
  64. and session_start_timestamp >= timestamp_add((select max(conversion_timestamp) from sessions_joined where sessions.user_pseudo_id = sessions_joined.user_pseudo_id),interval - attribution_window day)
  65. order by
  66. user_pseudo_id,
  67. session_number),
  68.  
  69. first_click as (
  70. select
  71. user_pseudo_id,
  72. conversion_id,
  73. first_value(source_medium) over (partition by user_pseudo_id order by session_start_timestamp) as source_medium,
  74. 1 as attribution_weight
  75. from
  76. attribution_raw
  77. where
  78. conversion_id is not null),
  79.  
  80. last_click as (
  81. select
  82. user_pseudo_id,
  83. conversion_id,
  84. source_medium,
  85. 1 as attribution_weight
  86. from
  87. attribution_raw
  88. where
  89. conversion_id is not null),
  90.  
  91. prep_last_non_direct_click as (
  92. select
  93. user_pseudo_id,
  94. conversion_id,
  95. case
  96. when source_medium != '(direct) / (none)' then source_medium
  97. when source_medium = '(direct) / (none)' then last_value(nullif(source_medium,'(direct) / (none)') ignore nulls) over (partition by user_pseudo_id order by session_number)
  98. end as source_medium,
  99. 1 as attribution_weight
  100. from
  101. attribution_raw),
  102.  
  103. last_non_direct_click as (
  104. select
  105. user_pseudo_id,
  106. conversion_id,
  107. coalesce(source_medium,'(direct) / (none)') as source_medium,
  108. attribution_weight
  109. from prep_last_non_direct_click
  110. where
  111. conversion_id is not null),
  112.  
  113. linear as (
  114. select
  115. user_pseudo_id,
  116. source_medium,
  117. last_value(conversion_id) over (partition by user_pseudo_id order by conversion_timestamp asc rows between current row and unbounded following) as conversion,
  118. 1 / (select max(session_number) from attribution_raw where conversion_id is not null and raw.user_pseudo_id = user_pseudo_id) as attribution_weight
  119. from
  120. attribution_raw as raw),
  121.  
  122. time_decay as (
  123. select
  124. user_pseudo_id,
  125. source_medium,
  126. last_value(conversion_id) over (partition by user_pseudo_id order by conversion_timestamp asc rows between current row and unbounded following) as conversion,
  127. case when total_sessions_per_user = 1 then 1
  128. else safe_divide(power(2,session_number / total_sessions_per_user),sum(power(2,session_number/total_sessions_per_user)) over (partition by user_pseudo_id)) end as attribution_weight
  129. from
  130. attribution_raw),
  131.  
  132. position_based as (
  133. select
  134. user_pseudo_id,
  135. source_medium,
  136. last_value(conversion_id) over (partition by user_pseudo_id order by conversion_timestamp asc rows between current row and unbounded following) as conversion,
  137. case when total_sessions_per_user = 1 then 1
  138. when total_sessions_per_user = 2 then 0.5
  139. when total_sessions_per_user > 2 then (
  140. case when session_number = 1 then 0.4
  141. when session_number = total_sessions_per_user then 0.4
  142. else 0.2 / (total_sessions_per_user - 2) end)
  143. end as attribution_weight
  144. from
  145. attribution_raw)
  146.  
  147. select
  148. 'first_click' as attribution_model,
  149. source_medium,
  150. sum(attribution_weight) as attribution_weight,
  151. from
  152. first_click
  153. group by
  154. attribution_model,
  155. source_medium
  156. union all
  157. select
  158. 'last_click' as attribution_model,
  159. source_medium,
  160. sum(attribution_weight) as attribution_weight,
  161. from
  162. last_click
  163. group by
  164. attribution_model,
  165. source_medium
  166. union all
  167. select
  168. 'last_non_direct_click' as attribution_model,
  169. source_medium,
  170. sum(attribution_weight) as attribution_weight,
  171. from
  172. last_non_direct_click
  173. group by
  174. attribution_model,
  175. source_medium
  176. union all
  177. select
  178. 'linear' as attribution_model,
  179. source_medium,
  180. cast(round(sum(attribution_weight),0) as integer) as attribution_weight,
  181. from
  182. linear
  183. group by
  184. attribution_model,
  185. source_medium
  186. union all
  187. select
  188. 'time_decay' as attribution_model,
  189. source_medium,
  190. cast(round(sum(attribution_weight),0) as integer) as attribution_weight,
  191. from
  192. time_decay
  193. group by
  194. attribution_model,
  195. source_medium
  196. union all
  197. select
  198. 'position_based' as attribution_model,
  199. source_medium,
  200. cast(round(sum(attribution_weight),0) as integer) as attribution_weight,
  201. from
  202. position_based
  203. group by
  204. attribution_model,
  205. source_medium
  206. order by
  207. attribution_weight desc
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement