Advertisement
Guest User

Untitled

a guest
Jul 22nd, 2017
70
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 32.00 KB | None | 0 0
  1. CREATE TEMPORARY FUNCTION date_int_to_date(date_int INT64) AS (CAST(CONCAT(SUBSTR(CAST(date_int AS STRING), 1, 4), "-", SUBSTR(CAST(date_int AS STRING), 5, 2), "-", SUBSTR(CAST(date_int AS STRING), 7, 2)) AS DATE));
  2. CREATE TEMPORARY FUNCTION is_new_period(date DATE, date_last DATE) AS (CAST(date_last IS NULL OR DATE_DIFF(date, date_last, DAY) >= 14 AS INT64));
  3. CREATE TEMPORARY FUNCTION zamer_id_to_nomer_zayavki(zamer_id STRING) AS (REPLACE(zamer_id, "#Missed zamer ", ""));
  4. CREATE TEMPORARY FUNCTION date_to_date_int(dat DATE) AS (CAST(CONCAT(SUBSTR(CAST(dat AS STRING), 1, 4), SUBSTR(CAST(dat AS STRING), 6, 2), SUBSTR(CAST(dat AS STRING), 9, 2)) AS INT64));
  5.  
  6. CREATE TEMPORARY FUNCTION get_subchains(sources STRING) RETURNS ARRAY<STRING> LANGUAGE js AS """
  7. var result = [];
  8. var f = function(prefix, chars) {
  9. for (var i = 0; i < chars.length; i++) {
  10. result.push(prefix + chars[i]);
  11. f(prefix + chars[i] +';', chars.slice(i + 1));
  12. }
  13. }
  14. f('', sources.split(";"));
  15. return result;
  16. """;
  17.  
  18. CREATE TEMPORARY FUNCTION get_shapley(chain_joined STRING, data ARRAY<STRUCT<subchain STRING,
  19. hit_conversions INT64,
  20. contact_conversions INT64,
  21. zamer_conversions INT64,
  22. dogovor_conversions INT64>>)
  23. RETURNS ARRAY<STRUCT<splited STRING,
  24. contact_shapley FLOAT64,
  25. zamer_shapley FLOAT64,
  26. dogovor_shapley FLOAT64>>
  27. LANGUAGE js AS """
  28.  
  29. var chain_joined = chain_joined;
  30. var true_subchains = [];
  31. var hit_conversions = [];
  32. var contact_conversions = [];
  33. var zamer_conversions = [];
  34. var dogovor_conversions = [];
  35. var split = chain_joined.split(';');
  36. split = split.sort();
  37.  
  38. for (var i = 0; i < data.length; i++) {
  39. var true_subchain = data[i].subchain.split(';');
  40. sorted_subchain = true_subchain.sort();
  41. sorted_subchain = sorted_subchain.join(';');
  42. true_subchains.push(sorted_subchain);
  43. hit_conversions.push(data[i].hit_conversions);
  44. contact_conversions.push(data[i].contact_conversions);
  45. zamer_conversions.push(data[i].zamer_conversions);
  46. dogovor_conversions.push(data[i].dogovor_conversions);
  47. }
  48.  
  49.  
  50.  
  51. _debug = false;
  52. var tests = [];
  53.  
  54.  
  55. Array.prototype.flatten = function() {
  56. var ret = [];
  57. for(var i = 0; i < this.length; i++) {
  58. if(Array.isArray(this[i])) {
  59. ret = ret.concat(this[i].flatten());
  60. } else {
  61. ret.push(this[i]);
  62. }
  63. }
  64. return ret;
  65. };
  66.  
  67. function isFloat(n) {
  68. return n === +n && n !== (n|0);
  69. }
  70.  
  71. var test_objects = function(obj1, obj2) {
  72. if (Array.isArray(obj1)){
  73. obj1 = obj1.flatten();
  74. obj2 = obj2.flatten();
  75. }
  76. if (isFloat((obj1[0]))){
  77. obj1 = obj1.map(function (x) {return Number(x.toFixed(4))});
  78. obj2 = obj2.map(function (x) {return Number(x.toFixed(4))});
  79. }
  80. return JSON.stringify(obj1) === JSON.stringify(obj2);
  81. };
  82.  
  83. var sum = function(mass) {
  84. var add = function(a, b) {
  85. return a + b;
  86. };
  87. return mass.reduce(add, 0);
  88. };
  89.  
  90. var if_undefined = function(x) {return typeof(x) === 'undefined'};
  91.  
  92. tests.push(["if_undefined", function() {
  93. return test_objects(if_undefined(undefined), true)
  94. && test_objects(if_undefined(false), false)
  95. && test_objects(if_undefined(3), false);
  96. }]);
  97.  
  98. var get_combinations = function(chars) {
  99. var result = [];
  100. var f = function(prefix, chars) {
  101. for (var i = 0; i < chars.length; i++) {
  102. result.push(prefix + chars[i]);
  103. f(prefix + chars[i] +';', chars.slice(i + 1));
  104. }
  105. };
  106. f('', chars);
  107. return result;
  108. };
  109.  
  110. tests.push(["get_combinations", function() {
  111. return test_objects(get_combinations(['a', 'b', 'c']),
  112. ["a", "a;b", "a;b;c", "a;c", "b", "b;c", "c"]);
  113. }]);
  114.  
  115. var get_permutation = function (array) {
  116. if (array.length > 1) {
  117. var firstElement = array[0];
  118. var returnedArray = get_permutation(array.slice(1));
  119. var permutedArray= [];
  120. var temporaryArray = [];
  121. var elementLength = returnedArray[0].length;
  122. for (var i = 0; i < returnedArray.length; i++)
  123. for (var j = 0; j <= elementLength; j++){
  124. temporaryArray = returnedArray[i].slice(0);
  125. temporaryArray.splice(j,0,firstElement);
  126. permutedArray.push(temporaryArray);
  127. }
  128. return permutedArray;
  129. } else {
  130. return [array];
  131. }
  132. };
  133.  
  134. tests.push(["get_permutation", function() {
  135. return test_objects(get_permutation(['a', 'b', 'c']),
  136. [ [ 'a', 'b', 'c' ],
  137. [ 'b', 'a', 'c' ],
  138. [ 'b', 'c', 'a' ],
  139. [ 'a', 'c', 'b' ],
  140. [ 'c', 'a', 'b' ],
  141. [ 'c', 'b', 'a' ] ]);
  142. }]);
  143.  
  144. var average_array = function (arrays, length) {
  145. var new_array = arrays.map(sum);
  146. new_array = new_array.map(function (x) {return x/length});
  147. if (sum(new_array) === 0) {
  148. new_array.fill(1)
  149. }
  150.  
  151. return new_array
  152. };
  153.  
  154. tests.push(["average_array", function() {
  155. return test_objects([average_array([[1,1],[-1,-1]], 2), average_array([[1,1],[2,2]], 2)],
  156. [[1,1], [1,2]]);
  157. }]);
  158.  
  159. var norm_array = function (array) {
  160. var new_array = [];
  161.  
  162. for (var i = 0; i < array.length; i++) {
  163. if (array[i] < 0) {
  164. array[i] = 0;
  165. }
  166. }
  167.  
  168. var array_sum = sum(array);
  169.  
  170. for (var i = 0; i < array.length; i++){
  171. new_array.push(array[i] / array_sum)
  172. }
  173. return new_array;
  174. };
  175.  
  176. tests.push(["norm_array", function() {
  177. return test_objects([norm_array([-1, 1]), norm_array([1,1,2]), norm_array([-1,1,1])],
  178. [[0, 1], [0.25, 0.25, 0.5], [0, 0.5, 0.5]]);
  179. }]);
  180.  
  181. var find_small_subsets = function (element, permutations) {
  182. var small_subsets = [];
  183. for (var j = 0; j < permutations.length; j++) { //рассчитывает значение для каждой перестановки
  184. var small_subset = [];
  185. var k = 0;
  186. while (element !== permutations[j][k]) { //пока не нашли номер компоненты в перестановке, пополняем множество
  187. small_subset.push(permutations[j][k]);
  188. k++;
  189. }
  190. small_subsets.push(small_subset)
  191. }
  192. return small_subsets;
  193. };
  194.  
  195. tests.push(["find_small_subsets", function() {
  196. return test_objects(find_small_subsets('a', [['a', 'b', 'c'], ['b', 'c', 'a'], ['c', 'a', 'b']]),
  197. [ [], [ 'b', 'c' ], [ 'c' ] ]);
  198. }]);
  199.  
  200. var sum_dicts = function (dict1, dict2) {
  201. var keys = Object.keys(dict1);
  202. var new_dict = {};
  203. for (var i = 0; i < keys.length; i++) {
  204. new_dict[keys[i]] = dict1[keys[i]] + dict2[keys[i]]
  205. }
  206. return new_dict
  207. };
  208.  
  209. tests.push(["sum_dicts", function() {
  210. return test_objects(sum_dicts({'a':10, 'b':20}, {'a':1, 'b':2}),
  211. {'a':11, 'b':22});
  212. }]);
  213.  
  214. var sub_dicts = function (dict1, dict2) {
  215. var keys = Object.keys(dict1);
  216. var new_dict = {};
  217. for (var i = 0; i < keys.length; i++) {
  218. new_dict[keys[i]] = dict1[keys[i]] - dict2[keys[i]]
  219. }
  220. return new_dict
  221. };
  222.  
  223. tests.push(["sub_dicts", function() {
  224. return test_objects(sub_dicts({'a':10, 'b':20}, {'a':1, 'b':2}),
  225. {'a':9, 'b':18});
  226. }]);
  227.  
  228. var compute_convs = function (subchains) {
  229. var exclude_sources = ['yandex / cpc / y80',
  230. 'yandex / cpc / y80_night',
  231. 'google / cpc / Brand_google',
  232. 'yandex / organic / (not set)',
  233. 'google / organic / (not set)'];
  234.  
  235. var sources = Object.keys(subchains);
  236. var new_subchains = {'': {'hits': 0, 'contacts_conv': 0, 'zamers_conv': 0, 'dogovors_conv':0}};
  237. for (var i = 0; i < sources.length; i++) {
  238. var source = sources[i];
  239. if (exclude_sources.indexOf(source) === -1) {
  240. var included_sources = sources.filter(function(x){return x.includes(source)});
  241. var included_subchains = [];
  242. for (var j = 0; j < included_sources.length; j++) {
  243. included_subchains.push(subchains[included_sources[j]]);
  244. }
  245. var recounted_subchain = included_subchains.reduce(sum_dicts);
  246. if (recounted_subchain['hits'] === 0) {
  247. recounted_subchain['contacts_conv'] = 0;
  248. recounted_subchain['zamers_conv'] = 0;
  249. recounted_subchain['dogovors_conv'] = 0;
  250. } else {
  251. recounted_subchain['contacts_conv'] = recounted_subchain['contacts'] / recounted_subchain['hits'];
  252. recounted_subchain['zamers_conv'] = recounted_subchain['zamers'] / recounted_subchain['hits'];
  253. recounted_subchain['dogovors_conv'] = recounted_subchain['dogovors'] / recounted_subchain['hits'];
  254. }
  255. } else {
  256. recounted_subchain = {}
  257. recounted_subchain['contacts_conv'] = 0;
  258. recounted_subchain['zamers_conv'] = 0;
  259. recounted_subchain['dogovors_conv'] = 0;
  260. }
  261. new_subchains[source] = recounted_subchain;
  262. }
  263. return new_subchains;
  264. };
  265.  
  266. tests.push(["compute_convs", function() {
  267. return test_objects(compute_convs({'a': {'hits':100, 'contacts':30, 'zamers': 20, 'dogovors':10},
  268. 'b': {'hits':100, 'contacts':0, 'zamers': 20, 'dogovors':10},
  269. 'a;b': {'hits':100, 'contacts':40, 'zamers': 20, 'dogovors':10}})['a'],
  270. {'hits': 200,
  271. 'contacts': 70,
  272. 'zamers': 40,
  273. 'dogovors': 20,
  274. 'contacts_conv': 0.35,
  275. 'zamers_conv': 0.2,
  276. 'dogovors_conv': 0.1 });
  277. }]);
  278.  
  279. var compute_shapley = function(split, subchains) {
  280. var shapley_vec = {'contacts': [], 'zamers': [], 'dogovors': []};
  281. var permutations = get_permutation(split);
  282. var convs = compute_convs(subchains);
  283. for (var i = 0; i < split.length; i++) { //заполняет компоненты вектора Шепли (суммирование потом)
  284. shapley_vec['contacts'].push([]);
  285. shapley_vec['zamers'].push([]);
  286. shapley_vec['dogovors'].push([]);
  287. var element = split[i];
  288. var small_subsets = find_small_subsets(element, permutations);
  289. var big_subsets = JSON.parse(JSON.stringify(small_subsets));
  290. big_subsets.map(function (x) {return x.push(element)});
  291. small_subsets = small_subsets.map(function(x) {return x.sort().join(';')});
  292. big_subsets = big_subsets.map(function(x) {return x.sort().join(';')});
  293. for (var j = 0; j < small_subsets.length; j++) {
  294.  
  295. var small_subset = small_subsets[j];
  296. var big_subset = big_subsets[j];
  297. var convs_sub = sub_dicts(convs[big_subset], convs[small_subset]);
  298.  
  299. shapley_vec['contacts'][i].push(convs_sub['contacts_conv']);
  300. shapley_vec['zamers'][i].push(convs_sub['zamers_conv']);
  301. shapley_vec['dogovors'][i].push(convs_sub['dogovors_conv']);
  302. }
  303. }
  304. //Усреднить значения подмассивов shapley_vec
  305. shapley_vec['contacts'] = average_array(shapley_vec['contacts'], permutations.length);
  306. shapley_vec['zamers'] = average_array(shapley_vec['zamers'], permutations.length);
  307. shapley_vec['dogovors'] = average_array(shapley_vec['dogovors'], permutations.length);
  308. //Отнормировать получившиеся значения
  309. shapley_vec['contacts'] = norm_array(shapley_vec['contacts']);
  310. shapley_vec['zamers'] = norm_array(shapley_vec['zamers']);
  311. shapley_vec['dogovors'] = norm_array(shapley_vec['dogovors']);
  312. return shapley_vec
  313. };
  314.  
  315. tests.push(["compute_shapley", function() {
  316. return test_objects(compute_shapley(['a', 'b'],{'a': {'hits':100, 'contacts':30, 'zamers': 20, 'dogovors':10},
  317. 'b': {'hits':100, 'contacts':0, 'zamers': 20, 'dogovors':10},
  318. 'a;b': {'hits':100, 'contacts':40, 'zamers': 20, 'dogovors':10}})['contacts'],
  319. [0.6875, 0.3125]);
  320. }]);
  321.  
  322. var hit_conversions_test = [100, 100];
  323. var contact_conversions_test = [30, 0];
  324. var zamer_conversions_test = [20, 20];
  325. var dogovor_conversions_test = [10, 10];
  326.  
  327. var fill_subchains = function(split, true_subchains, hit_conversions,
  328. contact_conversions, zamer_conversions, dogovor_conversions) {
  329. var subchains = {};
  330. var all_combinations = get_combinations(split);
  331.  
  332. for (var i = 0; i < true_subchains.length; i++) {
  333. subchains[true_subchains[i]] = {name: true_subchains[i],
  334. hits: hit_conversions[i],
  335. contacts: contact_conversions[i],
  336. zamers: zamer_conversions[i],
  337. dogovors: dogovor_conversions[i]}
  338. }
  339.  
  340. //Инициализируем несуществующуие комбинации
  341. for (var i = 0; i < all_combinations.length; i++) {
  342. if (if_undefined(subchains[all_combinations[i]])) {
  343. subchains[all_combinations[i]] = {name: all_combinations[i],
  344. contacts: 0,
  345. zamers: 0,
  346. dogovors: 0,
  347. hits: 0};
  348. }
  349. }
  350. return subchains;
  351. };
  352.  
  353. tests.push(["fill_subchains", function() {
  354. return test_objects(fill_subchains(['a', 'b'], ['a', 'a;b'], hit_conversions_test,
  355. contact_conversions_test, zamer_conversions_test, dogovor_conversions_test),
  356. { a: { name: 'a', hits: 100, contacts: 30, zamers: 20, dogovors: 10 },
  357. 'a;b': { name: 'a;b', hits: 100, contacts: 0, zamers: 20, dogovors: 10 },
  358. b: { name: 'b', contacts: 0, zamers: 0, dogovors: 0, hits: 0 } }
  359. );
  360. }]);
  361. /*===============================================================*/
  362.  
  363. var test_all = function() {
  364. var single_test = function(name, fun) {
  365. console.log("Executing test " + name);
  366. try {
  367. if (fun()) {
  368. console.log("ok");
  369. return true;
  370. } else {
  371. console.log("FAILED");
  372. return false;
  373. }
  374. } catch (e) {
  375. console.log("ERROR");
  376. console.log(e.message);
  377. return false;
  378. }
  379. };
  380. var someFailed = false;
  381. tests.forEach(function(t) {
  382. someFailed = !single_test(t[0], t[1]) || someFailed;
  383. });
  384. console.log(someFailed ? "Some Failed!!!": "OK");
  385. };
  386.  
  387. if (_debug) test_all();
  388.  
  389.  
  390.  
  391.  
  392.  
  393.  
  394. var subchains = fill_subchains(split, true_subchains, hit_conversions,
  395. contact_conversions, zamer_conversions, dogovor_conversions);
  396. var shapley_vec = compute_shapley(split, subchains);
  397.  
  398. result = [];
  399. for (var i = 0; i < split.length; i++) {
  400. result.push({"splited": split[i],
  401. "contact_shapley": shapley_vec['contacts'][i],
  402. "zamer_shapley": shapley_vec['zamers'][i],
  403. "dogovor_shapley": shapley_vec['dogovors'][i]});
  404. };
  405. return result;
  406. """;
  407.  
  408.  
  409. WITH
  410.  
  411. #Оставляем только первые хиты в сессии
  412. master_without_all_hits AS (SELECT * REPLACE(ARRAY(SELECT AS STRUCT * FROM
  413. (SELECT *, CONCAT(indirect_source, indirect_medium, indirect_campaign)=IFNULL(LAG(CONCAT(indirect_source, indirect_medium, indirect_campaign)) OVER(ORDER BY time), '') and
  414. DATE_DIFF(DATE(TIMESTAMP_SECONDS(time)), DATE(TIMESTAMP_SECONDS(LAG(time) OVER(ORDER BY time))), DAY) < 14 as hitToRemove FROM UNNEST(hit))
  415. WHERE NOT hitToRemove) as hit) FROM fabrika21.master),
  416.  
  417. # Выписываем в отдельный список все хиты и контакты. Напротив каждого ставим время и дату и приоритет: напротив хита - 0, напротив грязного контакта - 1, напротив чистого контакта - 2.
  418. master_events AS (SELECT *, ARRAY(SELECT STRUCT(type, id, date_int_to_date(date_int) AS dat, time, priority)
  419. FROM (SELECT "hit" AS type, hitId AS id, date_int, time, 0 AS priority FROM UNNEST(hit)
  420. UNION ALL SELECT "contact" AS type, contact_card_id AS id, date_int, time, IF(is_clear AND length(call_center_manager)>2, 2, 1) AS priority FROM UNNEST(contact))) AS events FROM master_without_all_hits),
  421.  
  422. # Находим те хиты или контакты, которым предшествовал 14-дневный перерыв - это будет начало нового периода
  423. master_new_period AS (SELECT * REPLACE (ARRAY(SELECT STRUCT(type, id, dat, time, priority, is_new_period(dat, LAG(dat) OVER(ORDER BY time, priority)) AS new_period)
  424. FROM UNNEST(events)) AS events) FROM master_events),
  425.  
  426. # Все хиты и контакты нумеруем по периодам
  427. master_period_number AS (SELECT * REPLACE(ARRAY(SELECT STRUCT(type, id, dat, time, priority, SUM(new_period) OVER (ORDER BY time, priority) AS period_number)
  428. FROM UNNEST(events)) AS events) FROM master_new_period),
  429.  
  430. # Для каждого договора находим соответствующий замер (по номеру), его время и дату
  431. master_dogovor_matching AS (SELECT *, ARRAY(SELECT STRUCT(z.date_int AS date_int, z.time AS time, nomer AS id)
  432. FROM UNNEST(dogovor) d JOIN UNNEST(zamer) z ON d.nomerZayavki = zamer_id_to_nomer_zayavki(z.zamer_id)) AS dogovor_matching FROM master_period_number),
  433.  
  434. # Добавляем замеры и договоры в список к хитам и контактам. При этом в качестве даты договора ставим дату соответствующего замера. У замера приоритет 3, у договора - 4.
  435. master_zamers_and_dogovors AS (SELECT * REPLACE(ARRAY_CONCAT(events, ARRAY(SELECT STRUCT(type, id, date_int_to_date(date_int) AS dat, time, priority, NULL AS period_number)
  436. FROM (SELECT "zamer" AS type, zamer_id AS id, date_int, time, 3 AS priority FROM UNNEST(zamer)
  437. UNION ALL SELECT "dogovor" AS type, id, date_int, time, 4 AS priority FROM UNNEST(dogovor_matching)))) AS events) FROM master_dogovor_matching),
  438.  
  439. # Приписываем каждому замеру и договору номер периода непосредственно предшествующего хита или контакта
  440. master_events_classified AS (SELECT * REPLACE(ARRAY(SELECT STRUCT(type, id, dat, priority,
  441. IFNULL(period_number, MAX(period_number) OVER(ORDER BY time, priority)) AS period_number,
  442. IFNULL(time, MAX(time) OVER(ORDER BY time, priority)) AS time)
  443. FROM UNNEST(events)) AS events) FROM master_zamers_and_dogovors),
  444.  
  445. # В следующих двух запросах удаляем все хиты и контакты в периоде, кроме первого
  446. master_unregular_hits_contacts AS (SELECT * REPLACE(ARRAY(SELECT STRUCT(type, id, dat, priority, period_number, time,
  447. max(priority) over (PARTITION BY period_number ORDER BY time, priority ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS check_priority)
  448. from unnest(events)) as events)
  449. from master_events_classified),
  450.  
  451. #Удаляем контакты, но оставляем хиты. Первые хиты помечаются в колонке is_first_hit
  452. master_remove_unregular_contacts AS (SELECT * REPLACE(ARRAY(SELECT STRUCT(type, id, dat, priority, period_number, time, check_priority,
  453. (check_priority is NUll OR priority > check_priority OR priority >= 3) and type='hit' as is_first_hit)
  454. from unnest(events) where check_priority is NUll OR priority > check_priority OR priority >= 3 OR type = 'hit') AS events)
  455. from master_unregular_hits_contacts),
  456.  
  457. first_hit_in_period AS (SELECT * REPLACE(ARRAY(SELECT STRUCT(type, id, dat, time, priority, period_number, is_first_hit,
  458. MIN(IF(type = "hit" and is_first_hit, time, NULL)) OVER(PARTITION BY period_number) AS first_hit_time,
  459. MIN(IF(type = "contact", time, NULL)) OVER(PARTITION BY period_number) AS first_contact_time) FROM UNNEST(events)) AS events) FROM master_remove_unregular_contacts),
  460.  
  461. fill_data AS (SELECT * REPLACE(ARRAY(SELECT STRUCT(type, id, dat, time, priority, period_number,
  462. IFNULL(first_hit_time, MAX(first_hit_time) OVER(ORDER BY time, priority)) AS first_hit_time,IF(first_contact_time is NUll AND priority >0, MAX(first_contact_time) OVER(ORDER BY time, priority),first_contact_time) AS first_contact_time, SUM(summa) OVER(PARTITION BY period_number) AS summa)
  463. FROM UNNEST(events) AS e LEFT JOIN UNNEST(dogovor) AS d ON IF(type = "dogovor", id, NULL) = nomer order by dat asc) AS events) FROM first_hit_in_period),
  464.  
  465. generate_periods AS (SELECT *, ARRAY(SELECT STRUCT(period_number,
  466. ANY_VALUE(h.landing) AS landing,
  467. date_int_to_date(ANY_VALUE(h.date_int)) AS date_int,
  468. date_int_to_date(IFNULL(ANY_VALUE(fc.date_int),ANY_VALUE(h.date_int))) AS contact_date_int,
  469. date_int_to_date(IFNULL(MIN(IF(type = "zamer", date_to_date_int(dat), NULL)),IFNULL(ANY_VALUE(fc.date_int),ANY_VALUE(h.date_int)))) AS zamer_date_int,
  470. date_int_to_date(IFNULL(MIN(IF(type = "dogovor", d.date_int , NULL)),IFNULL(MIN(IF(type = "zamer", date_to_date_int(dat), NULL)),IFNULL(ANY_VALUE(fc.date_int),ANY_VALUE(h.date_int))))) AS dogovor_date_int,
  471. TIMESTAMP_SECONDS(ANY_VALUE(h.time)) AS hit_time,
  472. TIMESTAMP_SECONDS(IFNULL(ANY_VALUE(fc.time),ANY_VALUE(h.time))) AS contact_time,
  473. TIMESTAMP_SECONDS(IFNULL(MIN(IF(type = "zamer", e.time, NULL)),IFNULL(ANY_VALUE(fc.time),ANY_VALUE(h.time)))) AS zamer_time,
  474. TIMESTAMP_SECONDS(IFNULL(MIN(IF(type = "dogovor", UNIX_SECONDS(TIMESTAMP_ADD(TIMESTAMP (date_int_to_date(d.date_int)), INTERVAL 12 hour)), NULL)),IFNULL(MIN(IF(type = "zamer", e.time, NULL)),IFNULL(ANY_VALUE(fc.time),ANY_VALUE(h.time))))) AS dogovor_time,
  475. ANY_VALUE(h.is_mobile) AS is_mobile,
  476. ANY_VALUE(h.indirect_source) AS source,
  477. ANY_VALUE(h.indirect_medium) AS medium,
  478. ANY_VALUE(h.indirect_campaign) AS campaign,
  479. ANY_VALUE(h.indirect_keyword) AS keyword,
  480. ANY_VALUE(h.indirect_adContent) AS adContent,
  481. CAST(regexp_extract(ANY_VALUE(h.indirect_adContent) , "[|]?gr_([^|]+)") AS STRING) AS group_number,
  482. LOGICAL_OR(type = "contact") AS period_contact,
  483. ANY_VALUE(fc.phone) AS period_phone,
  484. ANY_VALUE(fc.in_phone) AS in_phone,
  485. ANY_VALUE(fc.one_c_source) AS one_c_source,
  486. ANY_VALUE(fc.product_type) AS product_type,
  487. ANY_VALUE(fc.coldcall) AS coldcall,
  488. ANY_VALUE(fc.meeting) AS meeting,
  489. ANY_VALUE(fc.webrequest) AS webrequest,
  490. ANY_VALUE(fc.incall) AS incall,
  491. ANY_VALUE(fc.webcall) AS webcall,
  492. (select manager_source from unnest(client_card) limit 1) as manager_source,
  493. NOT(LOGICAL_OR(c.is_service = FALSE)) AS is_service,
  494. ANY_VALUE(fc.call_center_manager) AS call_center_manager,
  495. is_clear AND LOGICAL_OR(LENGTH(fc.call_center_manager) > 3) AS contact_clear,
  496. COUNT(DISTINCT IF(type = "zamer", id, NULL)) AS zamer_count,
  497. COUNT(DISTINCT IF(type = "dogovor", id, NULL)) AS dogovor_count,
  498. ANY_VALUE(fc.phone) AS phone_number,
  499. ANY_VALUE(e.summa) AS summa)
  500. FROM UNNEST(events) AS e
  501. LEFT JOIN UNNEST(hit) AS h ON first_hit_time = h.time
  502. LEFT JOIN UNNEST(contact) fc ON first_contact_time = fc.time
  503. LEFT JOIN UNNEST(contact) c ON id = c.contact_card_id
  504. LEFT JOIN UNNEST(dogovor) d ON id = d.nomer
  505. GROUP BY period_number order by period_number asc) AS periods FROM fill_data),
  506.  
  507. events_chains AS (SELECT * REPLACE(ARRAY(SELECT STRUCT(type, id, e.time AS time, dat, priority, period_number,
  508. ARRAY_AGG(STRUCT(h.indirect_source AS source, h.indirect_medium AS medium, h.indirect_campaign AS campaign)) OVER(PARTITION BY period_number ORDER BY e.time, priority) AS chains,
  509. FIRST_VALUE(type) OVER(PARTITION BY period_number ORDER BY e.time, priority) = "hit" AS period_starts_with_hit)
  510. FROM UNNEST(events) AS e LEFT JOIN UNNEST(hit) AS h ON IF(type="hit", id, NULL) = h.hitId) AS events) FROM generate_periods),
  511.  
  512. events_last_period_number AS (SELECT * REPLACE(ARRAY(SELECT STRUCT(type, id, time, dat, priority, period_number,
  513. ARRAY(SELECT STRUCT(source, medium, campaign) FROM (SELECT DISTINCT source, medium, campaign FROM UNNEST(chains))) AS chains,
  514. MAX(IF(period_starts_with_hit, period_number, NULL)) OVER(ORDER BY time, priority) AS last_period_number) FROM UNNEST(events)) AS events) FROM events_chains),
  515.  
  516. events_all_chains AS (SELECT * REPLACE(ARRAY(SELECT STRUCT(e1.type AS type, e1.id AS id, e1.time AS time, e1.dat AS dat, e1.priority AS priority, e1.period_number AS period_number,
  517. ARRAY_CONCAT_AGG(e2.chains) AS chains) FROM UNNEST(events) e1 JOIN UNNEST(events) AS e2 ON e2.period_number BETWEEN e1.last_period_number AND e1.period_number
  518. AND (e1.period_number != e2.period_number OR e2.time <= e1.time)
  519. GROUP BY e1.type, e1.id, e1.time, e1.dat, e1.priority, e1.period_number) AS events) FROM events_last_period_number),
  520.  
  521. events_distinct_chains AS (SELECT * REPLACE(ARRAY(SELECT STRUCT(type, id, time, dat, priority, period_number, ARRAY(SELECT STRUCT(source, medium, campaign) FROM (SELECT DISTINCT * FROM UNNEST(chains)
  522. WHERE source IS NOT NULL ORDER BY source, medium, campaign LIMIT 6)) AS chains, IF(type='hit', LAG(type) OVER(PARTITION BY period_number, type ORDER BY time DESC) IS NULL, LAG(type) OVER(PARTITION BY period_number, type ORDER BY time, priority) IS NULL) AS first_event_last_hit,
  523. MIN(dat) OVER(PARTITION BY period_number) AS period_start)
  524. FROM UNNEST(events) ORDER by time) AS events) FROM events_all_chains),
  525.  
  526. event_first_hit_last AS (SELECT * REPLACE(ARRAY(SELECT STRUCT(type, id, time, dat, priority, period_number, chains, period_start,
  527. ARRAY_TO_STRING(ARRAY(SELECT CONCAT(source, "/", medium, "/", campaign) FROM UNNEST(chains)), ";") AS chain_joined) FROM UNNEST(events) WHERE first_event_last_hit) AS events) FROM events_distinct_chains),
  528.  
  529. periods_chain_joined AS (SELECT * REPLACE(ARRAY(SELECT STRUCT(IF(type='hit', e.chain_joined, NULL) as hit_chain_joined,IF(type='contact', e.chain_joined, NULL) as contact_chain_joined, IF(type='zamer', e.chain_joined, NULL) as zamer_chain_joined, IF(type='dogovor', e.chain_joined, NULL) as dogovor_chain_joined,
  530. p.period_number as period_number, p.landing as landing, p.date_int as date_int,
  531. p.contact_date_int as contact_date_int, p.zamer_date_int as zamer_date_int, p.dogovor_date_int as dogovor_date_int,
  532. p.hit_time as hit_time, p.contact_time as contact_time, p.zamer_time as zamer_time, p.dogovor_time as dogovor_time,
  533. p.is_mobile as is_mobile, p.source as source, p.medium as medium, p.campaign as campaign, p.keyword as keyword,
  534. p.adContent as adContent, p.group_number as group_number, p.period_contact as period_contact, p.period_phone as period_phone,
  535. p.in_phone as in_phone, p.one_c_source as one_c_source, p.product_type as product_type, p.coldcall as coldcall,
  536. p.meeting as meeting, p.webrequest as webrequest, p.incall as incall, p.webcall as webcall, p.manager_source as manager_source,
  537. p.is_service as is_service, p.call_center_manager as call_center_manager, p.contact_clear as contact_clear, p.zamer_count as zamer_count,
  538. p.dogovor_count as dogovor_count, p.phone_number as phone_number, p.summa as summa)
  539. FROM UNNEST(periods) AS p JOIN UNNEST(events) AS e ON IF(type='contact', e.dat=p.contact_date_int, IF(type='zamer', e.dat=p.zamer_date_int, IF(type='dogovor', e.dat=p.dogovor_date_int, IF(type='hit', e.period_start=p.date_int, false)))) ) as periods) FROM event_first_hit_last),
  540.  
  541. group_periods_chain_joined AS (SELECT * REPLACE(ARRAY(SELECT STRUCT(MAX(hit_chain_joined) as hit_chain_joined, MAX(contact_chain_joined) as contact_chain_joined, MAX(zamer_chain_joined) as zamer_chain_joined, MAX(dogovor_chain_joined) as dogovor_chain_joined,
  542. period_number, landing, date_int, contact_date_int, zamer_date_int, dogovor_date_int,
  543. hit_time, contact_time, zamer_time, dogovor_time, is_mobile, source, medium, campaign, keyword,
  544. adContent, group_number, period_contact, period_phone, in_phone, one_c_source, product_type, coldcall,
  545. meeting, webrequest, incall, webcall, manager_source, is_service, call_center_manager, contact_clear, zamer_count,
  546. dogovor_count, phone_number, summa)
  547. FROM UNNEST(periods) GROUP BY period_number, landing, date_int, contact_date_int, zamer_date_int, dogovor_date_int,
  548. hit_time, contact_time, zamer_time, dogovor_time, is_mobile, source, medium, campaign, keyword,
  549. adContent, group_number, period_contact, period_phone, in_phone, one_c_source, product_type, coldcall,
  550. meeting, webrequest, incall, webcall, manager_source, is_service, call_center_manager, contact_clear, zamer_count,
  551. dogovor_count, phone_number, summa) as periods) FROM periods_chain_joined),
  552.  
  553. dates AS (SELECT period_dat, GENERATE_DATE_ARRAY(DATE_ADD(period_dat, INTERVAL -28 DAY), period_dat) AS other_dats
  554. FROM UNNEST((SELECT GENERATE_DATE_ARRAY("2016-01-01", CURRENT_DATE("+03:00")))) AS period_dat),
  555.  
  556.  
  557. chains_subchains AS (SELECT period_dat, chain_joined, get_subchains(chain_joined) AS subchains, COUNT(DISTINCT primary) AS hit_conversions,
  558. COUNT(DISTINCT IF(priority = 2, primary, NULL)) AS contact_conversions,
  559. COUNT(DISTINCT IF(priority = 3, primary, NULL)) AS zamer_conversions,
  560. COUNT(DISTINCT IF(priority = 4, primary, NULL)) AS dogovor_conversions
  561. FROM (SELECT * FROM group_periods_chain_joined, UNNEST(events)) JOIN (SELECT * FROM dates, UNNEST(other_dats) AS other_dat) ON period_start = other_dat GROUP BY period_dat, chain_joined),
  562.  
  563. shapley_data AS (SELECT c1.period_dat AS period_dat, c1.chain_joined AS chain_joined,
  564. ARRAY_AGG(STRUCT(c2.chain_joined AS subchain, c2.hit_conversions AS hit_conversions, c2.contact_conversions AS contact_conversions,
  565. c2.zamer_conversions AS zamer_conversions, c2.dogovor_conversions AS dogovor_conversions)) AS data
  566. FROM (SELECT * FROM chains_subchains, UNNEST(subchains) AS subchain) AS c1 JOIN chains_subchains AS c2 ON c1.subchain = c2.chain_joined AND c1.period_dat = c2.period_dat
  567. GROUP BY period_dat, chain_joined),
  568.  
  569.  
  570. shapley_report as (SELECT period_dat ,chain_joined, data, get_shapley(chain_joined, data) as shapley
  571. FROM shapley_data)
  572.  
  573. SELECT * FROM group_periods_chain_joined #Таблица, включающая периоды с chain_joined (записана в shapley_periods)
  574. --SELECT * FROM shapley_report #таблица в данными по Шепли и рассчитанными коэффициентами (записана в shapley_report)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement