Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- CREATE OR REPLACE MODEL `bigquery_ml.title_test`
- OPTIONS(model_type='kmeans',
- num_clusters = 5,
- DISTANCE_TYPE = 'cosine',
- kmeans_init_method = 'KMEANS++') AS
- (
- WITH
- raw_ga_4 AS (
- SELECT
- * except(row)
- FROM (
- SELECT
- -- extracts date from source table
- parse_date('%Y%m%d',regexp_extract(_table_suffix,'[0-9]+')) as table_date,
- -- flag to indicate if source table is `events_intraday_`
- case when _table_suffix like '%intraday%' then true else false end as is_intraday,
- *,
- row_number() over (partition by user_pseudo_id, event_name, event_timestamp order by event_timestamp) as row
- FROM
- `project.analytics_XXXXXXXXXXXXXX.events_*`
- )
- WHERE
- row = 1
- ),
- pageviews AS (
- SELECT
- parse_date("%Y%m%d", event_date) event_date,
- event_timestamp,
- user_pseudo_id,
- user_first_touch_timestamp,
- device.category as device_category,
- device.language as device_language,
- device.web_info.browser as device_browser,
- geo.continent as geo_continent,
- geo.country as geo_country,
- max(if(params.key = 'ga_session_id', params.value.int_value, null)) ga_session_id,
- max(if(params.key = 'ga_session_number', params.value.int_value, null)) ga_session_number,
- cast(max(if(params.key = 'session_engaged', params.value.string_value, null)) as int64) session_engaged,
- max(if(params.key = 'page_title', params.value.string_value, null)) page_title,
- max(if(params.key = 'page_location', params.value.string_value, null)) page_location,
- max(if(params.key = 'source', params.value.string_value, null)) utm_source,
- max(if(params.key = 'medium', params.value.string_value, null)) utm_medium,
- max(if(params.key = 'campaign', params.value.string_value, null)) utm_campaign,
- max(if(params.key = 'page_referrer', params.value.string_value, null)) utm_referrer
- FROM raw_ga_4,
- UNNEST(event_params) AS params
- WHERE event_name = 'page_view'
- GROUP BY event_date, event_timestamp, user_pseudo_id, user_first_touch_timestamp, device_category, device_language, device_browser, geo_continent, geo_country
- ),
- unique_titles AS (
- SELECT
- DISTINCT(page_title)
- FROM
- pageviews
- LIMIT 200
- ),
- embeddings AS (
- SELECT
- text,
- encoder AS embedding
- FROM ML.PREDICT(MODEL `project.bigquery_ml.embedding_model`,(SELECT REGEXP_REPLACE(page_title, '[^\\w\\s]+', '') AS text FROM unique_titles))
- JOIN (SELECT page_title, REGEXP_REPLACE(page_title, '[^\\w\\s]+', '') AS text FROM unique_titles) USING (text)
- ),
- arrays AS (
- SELECT
- -- 256 dimension embeddings
- embedding[OFFSET(0)] as embed0,
- embedding[OFFSET(1)] as embed1,
- embedding[OFFSET(2)] as embed2,
- embedding[OFFSET(3)] as embed3,
- embedding[OFFSET(4)] as embed4,
- embedding[OFFSET(5)] as embed5,
- embedding[OFFSET(6)] as embed6,
- embedding[OFFSET(7)] as embed7,
- embedding[OFFSET(8)] as embed8,
- embedding[OFFSET(9)] as embed9,
- embedding[OFFSET(10)] as embed10,
- embedding[OFFSET(11)] as embed11,
- embedding[OFFSET(12)] as embed12,
- embedding[OFFSET(13)] as embed13,
- embedding[OFFSET(14)] as embed14,
- embedding[OFFSET(15)] as embed15,
- embedding[OFFSET(16)] as embed16,
- embedding[OFFSET(17)] as embed17,
- embedding[OFFSET(18)] as embed18,
- embedding[OFFSET(19)] as embed19,
- embedding[OFFSET(20)] as embed20,
- embedding[OFFSET(21)] as embed21,
- embedding[OFFSET(22)] as embed22,
- embedding[OFFSET(23)] as embed23,
- embedding[OFFSET(24)] as embed24,
- embedding[OFFSET(25)] as embed25,
- embedding[OFFSET(26)] as embed26,
- embedding[OFFSET(27)] as embed27,
- embedding[OFFSET(28)] as embed28,
- embedding[OFFSET(29)] as embed29,
- embedding[OFFSET(30)] as embed30,
- embedding[OFFSET(31)] as embed31,
- embedding[OFFSET(32)] as embed32,
- embedding[OFFSET(33)] as embed33,
- embedding[OFFSET(34)] as embed34,
- embedding[OFFSET(35)] as embed35,
- embedding[OFFSET(36)] as embed36,
- embedding[OFFSET(37)] as embed37,
- embedding[OFFSET(38)] as embed38,
- embedding[OFFSET(39)] as embed39,
- embedding[OFFSET(40)] as embed40,
- embedding[OFFSET(41)] as embed41,
- embedding[OFFSET(42)] as embed42,
- embedding[OFFSET(43)] as embed43,
- embedding[OFFSET(44)] as embed44,
- embedding[OFFSET(45)] as embed45,
- embedding[OFFSET(46)] as embed46,
- embedding[OFFSET(47)] as embed47,
- embedding[OFFSET(48)] as embed48,
- embedding[OFFSET(49)] as embed49,
- embedding[OFFSET(50)] as embed50,
- embedding[OFFSET(51)] as embed51,
- embedding[OFFSET(52)] as embed52,
- embedding[OFFSET(53)] as embed53,
- embedding[OFFSET(54)] as embed54,
- embedding[OFFSET(55)] as embed55,
- embedding[OFFSET(56)] as embed56,
- embedding[OFFSET(57)] as embed57,
- embedding[OFFSET(58)] as embed58,
- embedding[OFFSET(59)] as embed59,
- embedding[OFFSET(60)] as embed60,
- embedding[OFFSET(61)] as embed61,
- embedding[OFFSET(62)] as embed62,
- embedding[OFFSET(63)] as embed63,
- embedding[OFFSET(64)] as embed64,
- embedding[OFFSET(65)] as embed65,
- embedding[OFFSET(66)] as embed66,
- embedding[OFFSET(67)] as embed67,
- embedding[OFFSET(68)] as embed68,
- embedding[OFFSET(69)] as embed69,
- embedding[OFFSET(70)] as embed70,
- embedding[OFFSET(71)] as embed71,
- embedding[OFFSET(72)] as embed72,
- embedding[OFFSET(73)] as embed73,
- embedding[OFFSET(74)] as embed74,
- embedding[OFFSET(75)] as embed75,
- embedding[OFFSET(76)] as embed76,
- embedding[OFFSET(77)] as embed77,
- embedding[OFFSET(78)] as embed78,
- embedding[OFFSET(79)] as embed79,
- embedding[OFFSET(80)] as embed80,
- embedding[OFFSET(81)] as embed81,
- embedding[OFFSET(82)] as embed82,
- embedding[OFFSET(83)] as embed83,
- embedding[OFFSET(84)] as embed84,
- embedding[OFFSET(85)] as embed85,
- embedding[OFFSET(86)] as embed86,
- embedding[OFFSET(87)] as embed87,
- embedding[OFFSET(88)] as embed88,
- embedding[OFFSET(89)] as embed89,
- embedding[OFFSET(90)] as embed90,
- embedding[OFFSET(91)] as embed91,
- embedding[OFFSET(92)] as embed92,
- embedding[OFFSET(93)] as embed93,
- embedding[OFFSET(94)] as embed94,
- embedding[OFFSET(95)] as embed95,
- embedding[OFFSET(96)] as embed96,
- embedding[OFFSET(97)] as embed97,
- embedding[OFFSET(98)] as embed98,
- embedding[OFFSET(99)] as embed99,
- embedding[OFFSET(100)] as embed100,
- embedding[OFFSET(101)] as embed101,
- embedding[OFFSET(102)] as embed102,
- embedding[OFFSET(103)] as embed103,
- embedding[OFFSET(104)] as embed104,
- embedding[OFFSET(105)] as embed105,
- embedding[OFFSET(106)] as embed106,
- embedding[OFFSET(107)] as embed107,
- embedding[OFFSET(108)] as embed108,
- embedding[OFFSET(109)] as embed109,
- embedding[OFFSET(110)] as embed110,
- embedding[OFFSET(111)] as embed111,
- embedding[OFFSET(112)] as embed112,
- embedding[OFFSET(113)] as embed113,
- embedding[OFFSET(114)] as embed114,
- embedding[OFFSET(115)] as embed115,
- embedding[OFFSET(116)] as embed116,
- embedding[OFFSET(117)] as embed117,
- embedding[OFFSET(118)] as embed118,
- embedding[OFFSET(119)] as embed119,
- embedding[OFFSET(120)] as embed120,
- embedding[OFFSET(121)] as embed121,
- embedding[OFFSET(122)] as embed122,
- embedding[OFFSET(123)] as embed123,
- embedding[OFFSET(124)] as embed124,
- embedding[OFFSET(125)] as embed125,
- embedding[OFFSET(126)] as embed126,
- embedding[OFFSET(127)] as embed127,
- embedding[OFFSET(128)] as embed128,
- embedding[OFFSET(129)] as embed129,
- embedding[OFFSET(130)] as embed130,
- embedding[OFFSET(131)] as embed131,
- embedding[OFFSET(132)] as embed132,
- embedding[OFFSET(133)] as embed133,
- embedding[OFFSET(134)] as embed134,
- embedding[OFFSET(135)] as embed135,
- embedding[OFFSET(136)] as embed136,
- embedding[OFFSET(137)] as embed137,
- embedding[OFFSET(138)] as embed138,
- embedding[OFFSET(139)] as embed139,
- embedding[OFFSET(140)] as embed140,
- embedding[OFFSET(141)] as embed141,
- embedding[OFFSET(142)] as embed142,
- embedding[OFFSET(143)] as embed143,
- embedding[OFFSET(144)] as embed144,
- embedding[OFFSET(145)] as embed145,
- embedding[OFFSET(146)] as embed146,
- embedding[OFFSET(147)] as embed147,
- embedding[OFFSET(148)] as embed148,
- embedding[OFFSET(149)] as embed149,
- embedding[OFFSET(150)] as embed150,
- embedding[OFFSET(151)] as embed151,
- embedding[OFFSET(152)] as embed152,
- embedding[OFFSET(153)] as embed153,
- embedding[OFFSET(154)] as embed154,
- embedding[OFFSET(155)] as embed155,
- embedding[OFFSET(156)] as embed156,
- embedding[OFFSET(157)] as embed157,
- embedding[OFFSET(158)] as embed158,
- embedding[OFFSET(159)] as embed159,
- embedding[OFFSET(160)] as embed160,
- embedding[OFFSET(161)] as embed161,
- embedding[OFFSET(162)] as embed162,
- embedding[OFFSET(163)] as embed163,
- embedding[OFFSET(164)] as embed164,
- embedding[OFFSET(165)] as embed165,
- embedding[OFFSET(166)] as embed166,
- embedding[OFFSET(167)] as embed167,
- embedding[OFFSET(168)] as embed168,
- embedding[OFFSET(169)] as embed169,
- embedding[OFFSET(170)] as embed170,
- embedding[OFFSET(171)] as embed171,
- embedding[OFFSET(172)] as embed172,
- embedding[OFFSET(173)] as embed173,
- embedding[OFFSET(174)] as embed174,
- embedding[OFFSET(175)] as embed175,
- embedding[OFFSET(176)] as embed176,
- embedding[OFFSET(177)] as embed177,
- embedding[OFFSET(178)] as embed178,
- embedding[OFFSET(179)] as embed179,
- embedding[OFFSET(180)] as embed180,
- embedding[OFFSET(181)] as embed181,
- embedding[OFFSET(182)] as embed182,
- embedding[OFFSET(183)] as embed183,
- embedding[OFFSET(184)] as embed184,
- embedding[OFFSET(185)] as embed185,
- embedding[OFFSET(186)] as embed186,
- embedding[OFFSET(187)] as embed187,
- embedding[OFFSET(188)] as embed188,
- embedding[OFFSET(189)] as embed189,
- embedding[OFFSET(190)] as embed190,
- embedding[OFFSET(191)] as embed191,
- embedding[OFFSET(192)] as embed192,
- embedding[OFFSET(193)] as embed193,
- embedding[OFFSET(194)] as embed194,
- embedding[OFFSET(195)] as embed195,
- embedding[OFFSET(196)] as embed196,
- embedding[OFFSET(197)] as embed197,
- embedding[OFFSET(198)] as embed198,
- embedding[OFFSET(199)] as embed199,
- embedding[OFFSET(200)] as embed200,
- embedding[OFFSET(201)] as embed201,
- embedding[OFFSET(202)] as embed202,
- embedding[OFFSET(203)] as embed203,
- embedding[OFFSET(204)] as embed204,
- embedding[OFFSET(205)] as embed205,
- embedding[OFFSET(206)] as embed206,
- embedding[OFFSET(207)] as embed207,
- embedding[OFFSET(208)] as embed208,
- embedding[OFFSET(209)] as embed209,
- embedding[OFFSET(210)] as embed210,
- embedding[OFFSET(211)] as embed211,
- embedding[OFFSET(212)] as embed212,
- embedding[OFFSET(213)] as embed213,
- embedding[OFFSET(214)] as embed214,
- embedding[OFFSET(215)] as embed215,
- embedding[OFFSET(216)] as embed216,
- embedding[OFFSET(217)] as embed217,
- embedding[OFFSET(218)] as embed218,
- embedding[OFFSET(219)] as embed219,
- embedding[OFFSET(220)] as embed220,
- embedding[OFFSET(221)] as embed221,
- embedding[OFFSET(222)] as embed222,
- embedding[OFFSET(223)] as embed223,
- embedding[OFFSET(224)] as embed224,
- embedding[OFFSET(225)] as embed225,
- embedding[OFFSET(226)] as embed226,
- embedding[OFFSET(227)] as embed227,
- embedding[OFFSET(228)] as embed228,
- embedding[OFFSET(229)] as embed229,
- embedding[OFFSET(230)] as embed230,
- embedding[OFFSET(231)] as embed231,
- embedding[OFFSET(232)] as embed232,
- embedding[OFFSET(233)] as embed233,
- embedding[OFFSET(234)] as embed234,
- embedding[OFFSET(235)] as embed235,
- embedding[OFFSET(236)] as embed236,
- embedding[OFFSET(237)] as embed237,
- embedding[OFFSET(238)] as embed238,
- embedding[OFFSET(239)] as embed239,
- embedding[OFFSET(240)] as embed240,
- embedding[OFFSET(241)] as embed241,
- embedding[OFFSET(242)] as embed242,
- embedding[OFFSET(243)] as embed243,
- embedding[OFFSET(244)] as embed244,
- embedding[OFFSET(245)] as embed245,
- embedding[OFFSET(246)] as embed246,
- embedding[OFFSET(247)] as embed247,
- embedding[OFFSET(248)] as embed248,
- embedding[OFFSET(249)] as embed249,
- embedding[OFFSET(250)] as embed250,
- embedding[OFFSET(251)] as embed251,
- embedding[OFFSET(252)] as embed252,
- embedding[OFFSET(253)] as embed253,
- embedding[OFFSET(254)] as embed254,
- embedding[OFFSET(255)] as embed255
- FROM embeddings
- )
- SELECT * FROM arrays
- )
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement