Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- CREATE TEMPORARY FUNCTION date_int_to_date(date_int INT64) AS (CAST(CONCAT(SUBSTR(CAST(date_int AS STRING), 1, 4), "-", SUBSTR(CAST(date_int AS STRING), 5, 2), "-", SUBSTR(CAST(date_int AS STRING), 7, 2)) AS DATE));
- CREATE TEMPORARY FUNCTION is_new_period(date DATE, date_last DATE) AS (CAST(date_last IS NULL OR DATE_DIFF(date, date_last, DAY) >= 14 AS INT64));
- CREATE TEMPORARY FUNCTION zamer_id_to_nomer_zayavki(zamer_id STRING) AS (REPLACE(zamer_id, "#Missed zamer ", ""));
- CREATE TEMPORARY FUNCTION date_to_date_int(dat DATE) AS (CAST(CONCAT(SUBSTR(CAST(dat AS STRING), 1, 4), SUBSTR(CAST(dat AS STRING), 6, 2), SUBSTR(CAST(dat AS STRING), 9, 2)) AS INT64));
- CREATE TEMPORARY FUNCTION get_subchains(sources STRING) RETURNS ARRAY<STRING> LANGUAGE js AS """
- var result = [];
- var f = function(prefix, chars) {
- for (var i = 0; i < chars.length; i++) {
- result.push(prefix + chars[i]);
- f(prefix + chars[i] +';', chars.slice(i + 1));
- }
- }
- f('', sources.split(";"));
- return result;
- """;
- CREATE TEMPORARY FUNCTION get_shapley(chain_joined STRING, data ARRAY<STRUCT<subchain STRING,
- hit_conversions INT64,
- contact_conversions INT64,
- zamer_conversions INT64,
- dogovor_conversions INT64>>)
- RETURNS ARRAY<STRUCT<splited STRING,
- contact_shapley FLOAT64,
- zamer_shapley FLOAT64,
- dogovor_shapley FLOAT64>>
- LANGUAGE js AS """
- var chain_joined = chain_joined;
- var true_subchains = [];
- var hit_conversions = [];
- var contact_conversions = [];
- var zamer_conversions = [];
- var dogovor_conversions = [];
- var split = chain_joined.split(';');
- split = split.sort();
- for (var i = 0; i < data.length; i++) {
- var true_subchain = data[i].subchain.split(';');
- sorted_subchain = true_subchain.sort();
- sorted_subchain = sorted_subchain.join(';');
- true_subchains.push(sorted_subchain);
- hit_conversions.push(data[i].hit_conversions);
- contact_conversions.push(data[i].contact_conversions);
- zamer_conversions.push(data[i].zamer_conversions);
- dogovor_conversions.push(data[i].dogovor_conversions);
- }
- _debug = false;
- var tests = [];
- Array.prototype.flatten = function() {
- var ret = [];
- for(var i = 0; i < this.length; i++) {
- if(Array.isArray(this[i])) {
- ret = ret.concat(this[i].flatten());
- } else {
- ret.push(this[i]);
- }
- }
- return ret;
- };
- function isFloat(n) {
- return n === +n && n !== (n|0);
- }
- var test_objects = function(obj1, obj2) {
- if (Array.isArray(obj1)){
- obj1 = obj1.flatten();
- obj2 = obj2.flatten();
- }
- if (isFloat((obj1[0]))){
- obj1 = obj1.map(function (x) {return Number(x.toFixed(4))});
- obj2 = obj2.map(function (x) {return Number(x.toFixed(4))});
- }
- return JSON.stringify(obj1) === JSON.stringify(obj2);
- };
- var sum = function(mass) {
- var add = function(a, b) {
- return a + b;
- };
- return mass.reduce(add, 0);
- };
- var if_undefined = function(x) {return typeof(x) === 'undefined'};
- tests.push(["if_undefined", function() {
- return test_objects(if_undefined(undefined), true)
- && test_objects(if_undefined(false), false)
- && test_objects(if_undefined(3), false);
- }]);
- var get_combinations = function(chars) {
- var result = [];
- var f = function(prefix, chars) {
- for (var i = 0; i < chars.length; i++) {
- result.push(prefix + chars[i]);
- f(prefix + chars[i] +';', chars.slice(i + 1));
- }
- };
- f('', chars);
- return result;
- };
- tests.push(["get_combinations", function() {
- return test_objects(get_combinations(['a', 'b', 'c']),
- ["a", "a;b", "a;b;c", "a;c", "b", "b;c", "c"]);
- }]);
- var get_permutation = function (array) {
- if (array.length > 1) {
- var firstElement = array[0];
- var returnedArray = get_permutation(array.slice(1));
- var permutedArray= [];
- var temporaryArray = [];
- var elementLength = returnedArray[0].length;
- for (var i = 0; i < returnedArray.length; i++)
- for (var j = 0; j <= elementLength; j++){
- temporaryArray = returnedArray[i].slice(0);
- temporaryArray.splice(j,0,firstElement);
- permutedArray.push(temporaryArray);
- }
- return permutedArray;
- } else {
- return [array];
- }
- };
- tests.push(["get_permutation", function() {
- return test_objects(get_permutation(['a', 'b', 'c']),
- [ [ 'a', 'b', 'c' ],
- [ 'b', 'a', 'c' ],
- [ 'b', 'c', 'a' ],
- [ 'a', 'c', 'b' ],
- [ 'c', 'a', 'b' ],
- [ 'c', 'b', 'a' ] ]);
- }]);
- var average_array = function (arrays, length) {
- var new_array = arrays.map(sum);
- new_array = new_array.map(function (x) {return x/length});
- if (sum(new_array) === 0) {
- new_array.fill(1)
- }
- return new_array
- };
- tests.push(["average_array", function() {
- return test_objects([average_array([[1,1],[-1,-1]], 2), average_array([[1,1],[2,2]], 2)],
- [[1,1], [1,2]]);
- }]);
- var norm_array = function (array) {
- var new_array = [];
- for (var i = 0; i < array.length; i++) {
- if (array[i] < 0) {
- array[i] = 0;
- }
- }
- var array_sum = sum(array);
- for (var i = 0; i < array.length; i++){
- new_array.push(array[i] / array_sum)
- }
- return new_array;
- };
- tests.push(["norm_array", function() {
- return test_objects([norm_array([-1, 1]), norm_array([1,1,2]), norm_array([-1,1,1])],
- [[0, 1], [0.25, 0.25, 0.5], [0, 0.5, 0.5]]);
- }]);
- var find_small_subsets = function (element, permutations) {
- var small_subsets = [];
- for (var j = 0; j < permutations.length; j++) { //рассчитывает значение для каждой перестановки
- var small_subset = [];
- var k = 0;
- while (element !== permutations[j][k]) { //пока не нашли номер компоненты в перестановке, пополняем множество
- small_subset.push(permutations[j][k]);
- k++;
- }
- small_subsets.push(small_subset)
- }
- return small_subsets;
- };
- tests.push(["find_small_subsets", function() {
- return test_objects(find_small_subsets('a', [['a', 'b', 'c'], ['b', 'c', 'a'], ['c', 'a', 'b']]),
- [ [], [ 'b', 'c' ], [ 'c' ] ]);
- }]);
- var sum_dicts = function (dict1, dict2) {
- var keys = Object.keys(dict1);
- var new_dict = {};
- for (var i = 0; i < keys.length; i++) {
- new_dict[keys[i]] = dict1[keys[i]] + dict2[keys[i]]
- }
- return new_dict
- };
- tests.push(["sum_dicts", function() {
- return test_objects(sum_dicts({'a':10, 'b':20}, {'a':1, 'b':2}),
- {'a':11, 'b':22});
- }]);
- var sub_dicts = function (dict1, dict2) {
- var keys = Object.keys(dict1);
- var new_dict = {};
- for (var i = 0; i < keys.length; i++) {
- new_dict[keys[i]] = dict1[keys[i]] - dict2[keys[i]]
- }
- return new_dict
- };
- tests.push(["sub_dicts", function() {
- return test_objects(sub_dicts({'a':10, 'b':20}, {'a':1, 'b':2}),
- {'a':9, 'b':18});
- }]);
- var compute_convs = function (subchains) {
- var exclude_sources = ['yandex / cpc / y80',
- 'yandex / cpc / y80_night',
- 'google / cpc / Brand_google',
- 'yandex / organic / (not set)',
- 'google / organic / (not set)'];
- var sources = Object.keys(subchains);
- var new_subchains = {'': {'hits': 0, 'contacts_conv': 0, 'zamers_conv': 0, 'dogovors_conv':0}};
- for (var i = 0; i < sources.length; i++) {
- var source = sources[i];
- if (exclude_sources.indexOf(source) === -1) {
- var included_sources = sources.filter(function(x){return x.includes(source)});
- var included_subchains = [];
- for (var j = 0; j < included_sources.length; j++) {
- included_subchains.push(subchains[included_sources[j]]);
- }
- var recounted_subchain = included_subchains.reduce(sum_dicts);
- if (recounted_subchain['hits'] === 0) {
- recounted_subchain['contacts_conv'] = 0;
- recounted_subchain['zamers_conv'] = 0;
- recounted_subchain['dogovors_conv'] = 0;
- } else {
- recounted_subchain['contacts_conv'] = recounted_subchain['contacts'] / recounted_subchain['hits'];
- recounted_subchain['zamers_conv'] = recounted_subchain['zamers'] / recounted_subchain['hits'];
- recounted_subchain['dogovors_conv'] = recounted_subchain['dogovors'] / recounted_subchain['hits'];
- }
- } else {
- recounted_subchain = {}
- recounted_subchain['contacts_conv'] = 0;
- recounted_subchain['zamers_conv'] = 0;
- recounted_subchain['dogovors_conv'] = 0;
- }
- new_subchains[source] = recounted_subchain;
- }
- return new_subchains;
- };
- tests.push(["compute_convs", function() {
- return test_objects(compute_convs({'a': {'hits':100, 'contacts':30, 'zamers': 20, 'dogovors':10},
- 'b': {'hits':100, 'contacts':0, 'zamers': 20, 'dogovors':10},
- 'a;b': {'hits':100, 'contacts':40, 'zamers': 20, 'dogovors':10}})['a'],
- {'hits': 200,
- 'contacts': 70,
- 'zamers': 40,
- 'dogovors': 20,
- 'contacts_conv': 0.35,
- 'zamers_conv': 0.2,
- 'dogovors_conv': 0.1 });
- }]);
- var compute_shapley = function(split, subchains) {
- var shapley_vec = {'contacts': [], 'zamers': [], 'dogovors': []};
- var permutations = get_permutation(split);
- var convs = compute_convs(subchains);
- for (var i = 0; i < split.length; i++) { //заполняет компоненты вектора Шепли (суммирование потом)
- shapley_vec['contacts'].push([]);
- shapley_vec['zamers'].push([]);
- shapley_vec['dogovors'].push([]);
- var element = split[i];
- var small_subsets = find_small_subsets(element, permutations);
- var big_subsets = JSON.parse(JSON.stringify(small_subsets));
- big_subsets.map(function (x) {return x.push(element)});
- small_subsets = small_subsets.map(function(x) {return x.sort().join(';')});
- big_subsets = big_subsets.map(function(x) {return x.sort().join(';')});
- for (var j = 0; j < small_subsets.length; j++) {
- var small_subset = small_subsets[j];
- var big_subset = big_subsets[j];
- var convs_sub = sub_dicts(convs[big_subset], convs[small_subset]);
- shapley_vec['contacts'][i].push(convs_sub['contacts_conv']);
- shapley_vec['zamers'][i].push(convs_sub['zamers_conv']);
- shapley_vec['dogovors'][i].push(convs_sub['dogovors_conv']);
- }
- }
- //Усреднить значения подмассивов shapley_vec
- shapley_vec['contacts'] = average_array(shapley_vec['contacts'], permutations.length);
- shapley_vec['zamers'] = average_array(shapley_vec['zamers'], permutations.length);
- shapley_vec['dogovors'] = average_array(shapley_vec['dogovors'], permutations.length);
- //Отнормировать получившиеся значения
- shapley_vec['contacts'] = norm_array(shapley_vec['contacts']);
- shapley_vec['zamers'] = norm_array(shapley_vec['zamers']);
- shapley_vec['dogovors'] = norm_array(shapley_vec['dogovors']);
- return shapley_vec
- };
- tests.push(["compute_shapley", function() {
- return test_objects(compute_shapley(['a', 'b'],{'a': {'hits':100, 'contacts':30, 'zamers': 20, 'dogovors':10},
- 'b': {'hits':100, 'contacts':0, 'zamers': 20, 'dogovors':10},
- 'a;b': {'hits':100, 'contacts':40, 'zamers': 20, 'dogovors':10}})['contacts'],
- [0.6875, 0.3125]);
- }]);
- var hit_conversions_test = [100, 100];
- var contact_conversions_test = [30, 0];
- var zamer_conversions_test = [20, 20];
- var dogovor_conversions_test = [10, 10];
- var fill_subchains = function(split, true_subchains, hit_conversions,
- contact_conversions, zamer_conversions, dogovor_conversions) {
- var subchains = {};
- var all_combinations = get_combinations(split);
- for (var i = 0; i < true_subchains.length; i++) {
- subchains[true_subchains[i]] = {name: true_subchains[i],
- hits: hit_conversions[i],
- contacts: contact_conversions[i],
- zamers: zamer_conversions[i],
- dogovors: dogovor_conversions[i]}
- }
- //Инициализируем несуществующуие комбинации
- for (var i = 0; i < all_combinations.length; i++) {
- if (if_undefined(subchains[all_combinations[i]])) {
- subchains[all_combinations[i]] = {name: all_combinations[i],
- contacts: 0,
- zamers: 0,
- dogovors: 0,
- hits: 0};
- }
- }
- return subchains;
- };
- tests.push(["fill_subchains", function() {
- return test_objects(fill_subchains(['a', 'b'], ['a', 'a;b'], hit_conversions_test,
- contact_conversions_test, zamer_conversions_test, dogovor_conversions_test),
- { a: { name: 'a', hits: 100, contacts: 30, zamers: 20, dogovors: 10 },
- 'a;b': { name: 'a;b', hits: 100, contacts: 0, zamers: 20, dogovors: 10 },
- b: { name: 'b', contacts: 0, zamers: 0, dogovors: 0, hits: 0 } }
- );
- }]);
- /*===============================================================*/
- var test_all = function() {
- var single_test = function(name, fun) {
- console.log("Executing test " + name);
- try {
- if (fun()) {
- console.log("ok");
- return true;
- } else {
- console.log("FAILED");
- return false;
- }
- } catch (e) {
- console.log("ERROR");
- console.log(e.message);
- return false;
- }
- };
- var someFailed = false;
- tests.forEach(function(t) {
- someFailed = !single_test(t[0], t[1]) || someFailed;
- });
- console.log(someFailed ? "Some Failed!!!": "OK");
- };
- if (_debug) test_all();
- var subchains = fill_subchains(split, true_subchains, hit_conversions,
- contact_conversions, zamer_conversions, dogovor_conversions);
- var shapley_vec = compute_shapley(split, subchains);
- result = [];
- for (var i = 0; i < split.length; i++) {
- result.push({"splited": split[i],
- "contact_shapley": shapley_vec['contacts'][i],
- "zamer_shapley": shapley_vec['zamers'][i],
- "dogovor_shapley": shapley_vec['dogovors'][i]});
- };
- return result;
- """;
- WITH
- #Оставляем только первые хиты в сессии
- master_without_all_hits AS (SELECT * REPLACE(ARRAY(SELECT AS STRUCT * FROM
- (SELECT *, CONCAT(indirect_source, indirect_medium, indirect_campaign)=IFNULL(LAG(CONCAT(indirect_source, indirect_medium, indirect_campaign)) OVER(ORDER BY time), '') and
- DATE_DIFF(DATE(TIMESTAMP_SECONDS(time)), DATE(TIMESTAMP_SECONDS(LAG(time) OVER(ORDER BY time))), DAY) < 14 as hitToRemove FROM UNNEST(hit))
- WHERE NOT hitToRemove) as hit) FROM fabrika21.master),
- # Выписываем в отдельный список все хиты и контакты. Напротив каждого ставим время и дату и приоритет: напротив хита - 0, напротив грязного контакта - 1, напротив чистого контакта - 2.
- master_events AS (SELECT *, ARRAY(SELECT STRUCT(type, id, date_int_to_date(date_int) AS dat, time, priority)
- FROM (SELECT "hit" AS type, hitId AS id, date_int, time, 0 AS priority FROM UNNEST(hit)
- UNION ALL SELECT "contact" AS type, contact_card_id AS id, date_int, time, IF(is_clear AND length(call_center_manager)>2, 2, 1) AS priority FROM UNNEST(contact))) AS events FROM master_without_all_hits),
- # Находим те хиты или контакты, которым предшествовал 14-дневный перерыв - это будет начало нового периода
- master_new_period AS (SELECT * REPLACE (ARRAY(SELECT STRUCT(type, id, dat, time, priority, is_new_period(dat, LAG(dat) OVER(ORDER BY time, priority)) AS new_period)
- FROM UNNEST(events)) AS events) FROM master_events),
- # Все хиты и контакты нумеруем по периодам
- master_period_number AS (SELECT * REPLACE(ARRAY(SELECT STRUCT(type, id, dat, time, priority, SUM(new_period) OVER (ORDER BY time, priority) AS period_number)
- FROM UNNEST(events)) AS events) FROM master_new_period),
- # Для каждого договора находим соответствующий замер (по номеру), его время и дату
- master_dogovor_matching AS (SELECT *, ARRAY(SELECT STRUCT(z.date_int AS date_int, z.time AS time, nomer AS id)
- FROM UNNEST(dogovor) d JOIN UNNEST(zamer) z ON d.nomerZayavki = zamer_id_to_nomer_zayavki(z.zamer_id)) AS dogovor_matching FROM master_period_number),
- # Добавляем замеры и договоры в список к хитам и контактам. При этом в качестве даты договора ставим дату соответствующего замера. У замера приоритет 3, у договора - 4.
- master_zamers_and_dogovors AS (SELECT * REPLACE(ARRAY_CONCAT(events, ARRAY(SELECT STRUCT(type, id, date_int_to_date(date_int) AS dat, time, priority, NULL AS period_number)
- FROM (SELECT "zamer" AS type, zamer_id AS id, date_int, time, 3 AS priority FROM UNNEST(zamer)
- UNION ALL SELECT "dogovor" AS type, id, date_int, time, 4 AS priority FROM UNNEST(dogovor_matching)))) AS events) FROM master_dogovor_matching),
- # Приписываем каждому замеру и договору номер периода непосредственно предшествующего хита или контакта
- master_events_classified AS (SELECT * REPLACE(ARRAY(SELECT STRUCT(type, id, dat, priority,
- IFNULL(period_number, MAX(period_number) OVER(ORDER BY time, priority)) AS period_number,
- IFNULL(time, MAX(time) OVER(ORDER BY time, priority)) AS time)
- FROM UNNEST(events)) AS events) FROM master_zamers_and_dogovors),
- # В следующих двух запросах удаляем все хиты и контакты в периоде, кроме первого
- master_unregular_hits_contacts AS (SELECT * REPLACE(ARRAY(SELECT STRUCT(type, id, dat, priority, period_number, time,
- max(priority) over (PARTITION BY period_number ORDER BY time, priority ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS check_priority)
- from unnest(events)) as events)
- from master_events_classified),
- #Удаляем контакты, но оставляем хиты. Первые хиты помечаются в колонке is_first_hit
- master_remove_unregular_contacts AS (SELECT * REPLACE(ARRAY(SELECT STRUCT(type, id, dat, priority, period_number, time, check_priority,
- (check_priority is NUll OR priority > check_priority OR priority >= 3) and type='hit' as is_first_hit)
- from unnest(events) where check_priority is NUll OR priority > check_priority OR priority >= 3 OR type = 'hit') AS events)
- from master_unregular_hits_contacts),
- first_hit_in_period AS (SELECT * REPLACE(ARRAY(SELECT STRUCT(type, id, dat, time, priority, period_number, is_first_hit,
- MIN(IF(type = "hit" and is_first_hit, time, NULL)) OVER(PARTITION BY period_number) AS first_hit_time,
- MIN(IF(type = "contact", time, NULL)) OVER(PARTITION BY period_number) AS first_contact_time) FROM UNNEST(events)) AS events) FROM master_remove_unregular_contacts),
- fill_data AS (SELECT * REPLACE(ARRAY(SELECT STRUCT(type, id, dat, time, priority, period_number,
- IFNULL(first_hit_time, MAX(first_hit_time) OVER(ORDER BY time, priority)) AS first_hit_time,IF(first_contact_time is NUll AND priority >0, MAX(first_contact_time) OVER(ORDER BY time, priority),first_contact_time) AS first_contact_time, SUM(summa) OVER(PARTITION BY period_number) AS summa)
- FROM UNNEST(events) AS e LEFT JOIN UNNEST(dogovor) AS d ON IF(type = "dogovor", id, NULL) = nomer order by dat asc) AS events) FROM first_hit_in_period),
- generate_periods AS (SELECT *, ARRAY(SELECT STRUCT(period_number,
- ANY_VALUE(h.landing) AS landing,
- date_int_to_date(ANY_VALUE(h.date_int)) AS date_int,
- date_int_to_date(IFNULL(ANY_VALUE(fc.date_int),ANY_VALUE(h.date_int))) AS contact_date_int,
- date_int_to_date(IFNULL(MIN(IF(type = "zamer", date_to_date_int(dat), NULL)),IFNULL(ANY_VALUE(fc.date_int),ANY_VALUE(h.date_int)))) AS zamer_date_int,
- date_int_to_date(IFNULL(MIN(IF(type = "dogovor", d.date_int , NULL)),IFNULL(MIN(IF(type = "zamer", date_to_date_int(dat), NULL)),IFNULL(ANY_VALUE(fc.date_int),ANY_VALUE(h.date_int))))) AS dogovor_date_int,
- TIMESTAMP_SECONDS(ANY_VALUE(h.time)) AS hit_time,
- TIMESTAMP_SECONDS(IFNULL(ANY_VALUE(fc.time),ANY_VALUE(h.time))) AS contact_time,
- TIMESTAMP_SECONDS(IFNULL(MIN(IF(type = "zamer", e.time, NULL)),IFNULL(ANY_VALUE(fc.time),ANY_VALUE(h.time)))) AS zamer_time,
- TIMESTAMP_SECONDS(IFNULL(MIN(IF(type = "dogovor", UNIX_SECONDS(TIMESTAMP_ADD(TIMESTAMP (date_int_to_date(d.date_int)), INTERVAL 12 hour)), NULL)),IFNULL(MIN(IF(type = "zamer", e.time, NULL)),IFNULL(ANY_VALUE(fc.time),ANY_VALUE(h.time))))) AS dogovor_time,
- ANY_VALUE(h.is_mobile) AS is_mobile,
- ANY_VALUE(h.indirect_source) AS source,
- ANY_VALUE(h.indirect_medium) AS medium,
- ANY_VALUE(h.indirect_campaign) AS campaign,
- ANY_VALUE(h.indirect_keyword) AS keyword,
- ANY_VALUE(h.indirect_adContent) AS adContent,
- CAST(regexp_extract(ANY_VALUE(h.indirect_adContent) , "[|]?gr_([^|]+)") AS STRING) AS group_number,
- LOGICAL_OR(type = "contact") AS period_contact,
- ANY_VALUE(fc.phone) AS period_phone,
- ANY_VALUE(fc.in_phone) AS in_phone,
- ANY_VALUE(fc.one_c_source) AS one_c_source,
- ANY_VALUE(fc.product_type) AS product_type,
- ANY_VALUE(fc.coldcall) AS coldcall,
- ANY_VALUE(fc.meeting) AS meeting,
- ANY_VALUE(fc.webrequest) AS webrequest,
- ANY_VALUE(fc.incall) AS incall,
- ANY_VALUE(fc.webcall) AS webcall,
- (select manager_source from unnest(client_card) limit 1) as manager_source,
- NOT(LOGICAL_OR(c.is_service = FALSE)) AS is_service,
- ANY_VALUE(fc.call_center_manager) AS call_center_manager,
- is_clear AND LOGICAL_OR(LENGTH(fc.call_center_manager) > 3) AS contact_clear,
- COUNT(DISTINCT IF(type = "zamer", id, NULL)) AS zamer_count,
- COUNT(DISTINCT IF(type = "dogovor", id, NULL)) AS dogovor_count,
- ANY_VALUE(fc.phone) AS phone_number,
- ANY_VALUE(e.summa) AS summa)
- FROM UNNEST(events) AS e
- LEFT JOIN UNNEST(hit) AS h ON first_hit_time = h.time
- LEFT JOIN UNNEST(contact) fc ON first_contact_time = fc.time
- LEFT JOIN UNNEST(contact) c ON id = c.contact_card_id
- LEFT JOIN UNNEST(dogovor) d ON id = d.nomer
- GROUP BY period_number order by period_number asc) AS periods FROM fill_data),
- events_chains AS (SELECT * REPLACE(ARRAY(SELECT STRUCT(type, id, e.time AS time, dat, priority, period_number,
- ARRAY_AGG(STRUCT(h.indirect_source AS source, h.indirect_medium AS medium, h.indirect_campaign AS campaign)) OVER(PARTITION BY period_number ORDER BY e.time, priority) AS chains,
- FIRST_VALUE(type) OVER(PARTITION BY period_number ORDER BY e.time, priority) = "hit" AS period_starts_with_hit)
- FROM UNNEST(events) AS e LEFT JOIN UNNEST(hit) AS h ON IF(type="hit", id, NULL) = h.hitId) AS events) FROM generate_periods),
- events_last_period_number AS (SELECT * REPLACE(ARRAY(SELECT STRUCT(type, id, time, dat, priority, period_number,
- ARRAY(SELECT STRUCT(source, medium, campaign) FROM (SELECT DISTINCT source, medium, campaign FROM UNNEST(chains))) AS chains,
- MAX(IF(period_starts_with_hit, period_number, NULL)) OVER(ORDER BY time, priority) AS last_period_number) FROM UNNEST(events)) AS events) FROM events_chains),
- events_all_chains AS (SELECT * REPLACE(ARRAY(SELECT STRUCT(e1.type AS type, e1.id AS id, e1.time AS time, e1.dat AS dat, e1.priority AS priority, e1.period_number AS period_number,
- ARRAY_CONCAT_AGG(e2.chains) AS chains) FROM UNNEST(events) e1 JOIN UNNEST(events) AS e2 ON e2.period_number BETWEEN e1.last_period_number AND e1.period_number
- AND (e1.period_number != e2.period_number OR e2.time <= e1.time)
- GROUP BY e1.type, e1.id, e1.time, e1.dat, e1.priority, e1.period_number) AS events) FROM events_last_period_number),
- events_distinct_chains AS (SELECT * REPLACE(ARRAY(SELECT STRUCT(type, id, time, dat, priority, period_number, ARRAY(SELECT STRUCT(source, medium, campaign) FROM (SELECT DISTINCT * FROM UNNEST(chains)
- WHERE source IS NOT NULL ORDER BY source, medium, campaign LIMIT 6)) AS chains, IF(type='hit', LAG(type) OVER(PARTITION BY period_number, type ORDER BY time DESC) IS NULL, LAG(type) OVER(PARTITION BY period_number, type ORDER BY time, priority) IS NULL) AS first_event_last_hit,
- MIN(dat) OVER(PARTITION BY period_number) AS period_start)
- FROM UNNEST(events) ORDER by time) AS events) FROM events_all_chains),
- event_first_hit_last AS (SELECT * REPLACE(ARRAY(SELECT STRUCT(type, id, time, dat, priority, period_number, chains, period_start,
- ARRAY_TO_STRING(ARRAY(SELECT CONCAT(source, "/", medium, "/", campaign) FROM UNNEST(chains)), ";") AS chain_joined) FROM UNNEST(events) WHERE first_event_last_hit) AS events) FROM events_distinct_chains),
- periods_chain_joined AS (SELECT * REPLACE(ARRAY(SELECT STRUCT(IF(type='hit', e.chain_joined, NULL) as hit_chain_joined,IF(type='contact', e.chain_joined, NULL) as contact_chain_joined, IF(type='zamer', e.chain_joined, NULL) as zamer_chain_joined, IF(type='dogovor', e.chain_joined, NULL) as dogovor_chain_joined,
- p.period_number as period_number, p.landing as landing, p.date_int as date_int,
- p.contact_date_int as contact_date_int, p.zamer_date_int as zamer_date_int, p.dogovor_date_int as dogovor_date_int,
- p.hit_time as hit_time, p.contact_time as contact_time, p.zamer_time as zamer_time, p.dogovor_time as dogovor_time,
- p.is_mobile as is_mobile, p.source as source, p.medium as medium, p.campaign as campaign, p.keyword as keyword,
- p.adContent as adContent, p.group_number as group_number, p.period_contact as period_contact, p.period_phone as period_phone,
- p.in_phone as in_phone, p.one_c_source as one_c_source, p.product_type as product_type, p.coldcall as coldcall,
- p.meeting as meeting, p.webrequest as webrequest, p.incall as incall, p.webcall as webcall, p.manager_source as manager_source,
- p.is_service as is_service, p.call_center_manager as call_center_manager, p.contact_clear as contact_clear, p.zamer_count as zamer_count,
- p.dogovor_count as dogovor_count, p.phone_number as phone_number, p.summa as summa)
- FROM UNNEST(periods) AS p JOIN UNNEST(events) AS e ON IF(type='contact', e.dat=p.contact_date_int, IF(type='zamer', e.dat=p.zamer_date_int, IF(type='dogovor', e.dat=p.dogovor_date_int, IF(type='hit', e.period_start=p.date_int, false)))) ) as periods) FROM event_first_hit_last),
- group_periods_chain_joined AS (SELECT * REPLACE(ARRAY(SELECT STRUCT(MAX(hit_chain_joined) as hit_chain_joined, MAX(contact_chain_joined) as contact_chain_joined, MAX(zamer_chain_joined) as zamer_chain_joined, MAX(dogovor_chain_joined) as dogovor_chain_joined,
- period_number, landing, date_int, contact_date_int, zamer_date_int, dogovor_date_int,
- hit_time, contact_time, zamer_time, dogovor_time, is_mobile, source, medium, campaign, keyword,
- adContent, group_number, period_contact, period_phone, in_phone, one_c_source, product_type, coldcall,
- meeting, webrequest, incall, webcall, manager_source, is_service, call_center_manager, contact_clear, zamer_count,
- dogovor_count, phone_number, summa)
- FROM UNNEST(periods) GROUP BY period_number, landing, date_int, contact_date_int, zamer_date_int, dogovor_date_int,
- hit_time, contact_time, zamer_time, dogovor_time, is_mobile, source, medium, campaign, keyword,
- adContent, group_number, period_contact, period_phone, in_phone, one_c_source, product_type, coldcall,
- meeting, webrequest, incall, webcall, manager_source, is_service, call_center_manager, contact_clear, zamer_count,
- dogovor_count, phone_number, summa) as periods) FROM periods_chain_joined),
- dates AS (SELECT period_dat, GENERATE_DATE_ARRAY(DATE_ADD(period_dat, INTERVAL -28 DAY), period_dat) AS other_dats
- FROM UNNEST((SELECT GENERATE_DATE_ARRAY("2016-01-01", CURRENT_DATE("+03:00")))) AS period_dat),
- chains_subchains AS (SELECT period_dat, chain_joined, get_subchains(chain_joined) AS subchains, COUNT(DISTINCT primary) AS hit_conversions,
- COUNT(DISTINCT IF(priority = 2, primary, NULL)) AS contact_conversions,
- COUNT(DISTINCT IF(priority = 3, primary, NULL)) AS zamer_conversions,
- COUNT(DISTINCT IF(priority = 4, primary, NULL)) AS dogovor_conversions
- FROM (SELECT * FROM group_periods_chain_joined, UNNEST(events)) JOIN (SELECT * FROM dates, UNNEST(other_dats) AS other_dat) ON period_start = other_dat GROUP BY period_dat, chain_joined),
- shapley_data AS (SELECT c1.period_dat AS period_dat, c1.chain_joined AS chain_joined,
- ARRAY_AGG(STRUCT(c2.chain_joined AS subchain, c2.hit_conversions AS hit_conversions, c2.contact_conversions AS contact_conversions,
- c2.zamer_conversions AS zamer_conversions, c2.dogovor_conversions AS dogovor_conversions)) AS data
- FROM (SELECT * FROM chains_subchains, UNNEST(subchains) AS subchain) AS c1 JOIN chains_subchains AS c2 ON c1.subchain = c2.chain_joined AND c1.period_dat = c2.period_dat
- GROUP BY period_dat, chain_joined),
- shapley_report as (SELECT period_dat ,chain_joined, data, get_shapley(chain_joined, data) as shapley
- FROM shapley_data)
- SELECT * FROM group_periods_chain_joined #Таблица, включающая периоды с chain_joined (записана в shapley_periods)
- --SELECT * FROM shapley_report #таблица в данными по Шепли и рассчитанными коэффициентами (записана в shapley_report)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement