Artemii_Kravtsov

with_arrays

Oct 10th, 2021 (edited)
366
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 13.09 KB | None | 0 0
  1. -- забирает целиком историю тех чатов, в которых что-то происходило за последнее время
  2. -- проверяет поле user на совпадение с regex'ом, который опознает mobile_profile_id
  3. WITH current_data AS (SELECT message_id, chat_id, session_id, from, timestamp, type,
  4. if(match(user, '.+-.+-.+-.+-.+'), (NULL, user), (user, NULL)) AS mobile_cids_and_operator_names,
  5. connector_id, channel_id, card_type, card_id, json_data, reply_to, text AS text_table
  6. FROM RAW.RAW_MOS_AISUZ_chatMessages
  7. WHERE chat_id IN (SELECT DISTINCT chat_id
  8. FROM RAW.RAW_MOS_AISUZ_chatMessages
  9. WHERE toDate(timestamp) >= toDate('{start_query_exec_date}')
  10. AND toDate(timestamp) <= toDate('{execution_date}'))),
  11.  
  12. -- раскладывает json_data на столбцы и джойнит с current_data,
  13. -- если значение переменной может содержаться в одном из нескольких полей, то делает coalesce
  14. -- приравнивает к NULL значения 'Пользователь' в столбце user, после чего, помимо пропусков, там
  15. -- остаются только имена операторов
  16. wide_data AS (SELECT message_id,
  17. chat_id,
  18. session_id,
  19. from,
  20. timestamp,
  21. type,
  22. connector_id,
  23. channel_id,
  24. card_type,
  25. session_data_operator,
  26. user_agent,
  27. sso_id,
  28. reply_to,
  29. action,
  30. cardFrom AS card_from,
  31. crafttalk_dialog_id,
  32. nullif(mobile_cids_and_operator_names.1, 'Пользователь') AS current_operator,
  33. mobile_cids_and_operator_names.2 AS mobile_profile_id,
  34. coalesce(toString(card_id), cardId) AS card_id,
  35. coalesce(text_table, text_json) AS text,
  36. coalesce(client_id, clientId) AS client_id,
  37. coalesce(elk_person_id, elkPersonId) AS elk_person_id,
  38. coalesce(mos_id, mosId) AS mos_id,
  39. coalesce(url, nullif(JSONExtractString(citInfo, 'location'), '')) AS url_location,
  40. coalesce(toString(rate), RatingRequestGroup1, RatingRequestGroup2, RatingRequestGroup3,
  41. RatingRequestGroup4, RatingRequestGroup5) as rate
  42. FROM (SELECT message_id,
  43. if(has(json_keys, 'action'), arrayElement(json_values, indexOf(json_keys, 'action')), NULL) AS action,
  44. if(has(json_keys, 'card-from'), arrayElement(json_values, indexOf(json_keys, 'card-from')), NULL) AS cardFrom,
  45. if(has(json_keys, 'client_id'), arrayElement(json_values, indexOf(json_keys, 'client_id')), NULL) AS client_id,
  46. if(has(json_keys, 'clientId'), arrayElement(json_values, indexOf(json_keys, 'clientId')), NULL) AS clientId,
  47. if(has(json_keys, 'card-id'), arrayElement(json_values, indexOf(json_keys, 'card-id')), NULL) AS cardId,
  48. if(has(json_keys, 'code'), arrayElement(json_values, indexOf(json_keys, 'code')), NULL) AS code,
  49. if(has(json_keys, 'crafttalk_operator_name'), arrayElement(json_values, indexOf(json_keys, 'crafttalk_operator_name')), NULL) AS session_data_operator,
  50. if(has(json_keys, 'crafttalk_dialog_id'), arrayElement(json_values, indexOf(json_keys, 'crafttalk_dialog_id')), NULL) AS crafttalk_dialog_id,
  51. if(has(json_keys, 'citInfo'), arrayElement(json_values, indexOf(json_keys, 'citInfo')), NULL) AS citInfo,
  52. if(has(json_keys, 'elk_person_id'), arrayElement(json_values, indexOf(json_keys, 'elk_person_id')), NULL) AS elk_person_id,
  53. if(has(json_keys, 'elkPersonId'), arrayElement(json_values, indexOf(json_keys, 'elkPersonId')), NULL) AS elkPersonId,
  54. if(has(json_keys, 'mos_id'), arrayElement(json_values, indexOf(json_keys, 'mos_id')), NULL) AS mos_id,
  55. if(has(json_keys, 'mosId'), arrayElement(json_values, indexOf(json_keys, 'mosId')), NULL) AS mosId,
  56. if(has(json_keys, 'rate'), arrayElement(json_values, indexOf(json_keys, 'rate')), NULL) AS rate,
  57. if(has(json_keys, 'RatingRequestGroup1'), '1', NULL) AS RatingRequestGroup1,
  58. if(has(json_keys, 'RatingRequestGroup2'), '2', NULL) AS RatingRequestGroup2,
  59. if(has(json_keys, 'RatingRequestGroup3'), '3', NULL) AS RatingRequestGroup3,
  60. if(has(json_keys, 'RatingRequestGroup4'), '4', NULL) AS RatingRequestGroup4,
  61. if(has(json_keys, 'RatingRequestGroup5'), '5', NULL) AS RatingRequestGroup5,
  62. if(has(json_keys, 'sso_id'), arrayElement(json_values, indexOf(json_keys, 'sso_id')), NULL) AS sso_id,
  63. if(has(json_keys, 'text'), arrayElement(json_values, indexOf(json_keys, 'text')), NULL) AS text_json,
  64. if(has(json_keys, 'url'), arrayElement(json_values, indexOf(json_keys, 'url')), NULL) AS url,
  65. if(has(json_keys, 'user_agent'), arrayElement(json_values, indexOf(json_keys, 'user_agent')), NULL) AS user_agent
  66. FROM (SELECT message_id,
  67. json_tuples,
  68. arrayMap(x -> x.1, json_tuples) AS json_keys,
  69. arrayMap(x -> x.2, json_tuples) AS json_values
  70. FROM (SELECT message_id,
  71. JSONExtractKeysAndValues(assumeNotNull(json_data), 'String') AS json_tuples
  72. FROM current_data
  73. WHERE json_data IS NOT NULL))) AS json
  74. RIGHT JOIN current_data USING message_id
  75. ORDER BY chat_id, timestamp ASC), -- эта сортировка гарантирует, что на следующем шаге groupArray соберёт массивы по порядку
  76.  
  77. -- делает ffill и bfill, где требуется
  78. arrays AS (SELECT message_id,
  79. open_time,
  80. last_action_time,
  81. nullif(current_operator_filled, '') AS current_operator,
  82. nullif(session_data_operator_filled, '') AS session_data_operator,
  83. nullif(connector_id_filled, '') AS connector_id,
  84. nullif(channel_id_filled, '') AS channel_id,
  85. nullif(crafttalk_dialog_id_filled, '') AS crafttalk_dialog_id,
  86. nullif(url_location_filled, '') AS url_location,
  87. nullif(client_id_filled, '') AS client_id,
  88. nullif(mobile_profile_id_filled, '') AS mobile_profile_id,
  89. nullif(elk_person_id_filled, '') AS elk_person_id,
  90. nullif(mos_id_filled, '') AS mos_id,
  91. nullif(sso_id_filled, '') AS sso_id,
  92. nullif(user_agent_filled, '') AS user_agent,
  93. toUInt8OrNull(nullif(rate_filled, '')) AS rate
  94. FROM (SELECT message_id,
  95. arrayElement(timestamp, 1) AS open_time,
  96. arrayElement(timestamp, -1) AS last_action_time,
  97. arrayFill(x -> x != '', current_operator) AS current_operator_filled,
  98. arrayFill(x -> x != '', arrayReverseFill(x -> x != '', session_data_operator)) AS session_data_operator_filled,
  99. arrayFill(x -> x != '', arrayReverseFill(x -> x != '', connector_id)) AS connector_id_filled,
  100. arrayFill(x -> x != '', arrayReverseFill(x -> x != '', channel_id)) AS channel_id_filled,
  101. arrayFill(x -> x != '', arrayReverseFill(x -> x != '', crafttalk_dialog_id)) AS crafttalk_dialog_id_filled,
  102. arrayFill(x -> x != '', arrayReverseFill(x -> x != '', url_location)) AS url_location_filled,
  103. arrayFill(x -> x != '', arrayReverseFill(x -> x != '', client_id)) AS client_id_filled,
  104. arrayFill(x -> x != '', arrayReverseFill(x -> x != '', mobile_profile_id)) AS mobile_profile_id_filled,
  105. arrayFill(x -> x != '', arrayReverseFill(x -> x != '', elk_person_id)) AS elk_person_id_filled,
  106. arrayFill(x -> x != '', arrayReverseFill(x -> x != '', mos_id)) AS mos_id_filled,
  107. arrayFill(x -> x != '', arrayReverseFill(x -> x != '', sso_id)) AS sso_id_filled,
  108. arrayFill(x -> x != '', arrayReverseFill(x -> x != '', user_agent)) AS user_agent_filled,
  109. arrayFill(x -> x != '', arrayReverseFill(x -> x != '', rate)) AS rate_filled
  110. FROM (SELECT groupArray(message_id) AS message_id,
  111. groupArray(timestamp) AS timestamp,
  112. groupArray(ifNull(current_operator, '')) AS current_operator,
  113. groupArray(ifNull(session_data_operator, '')) AS session_data_operator,
  114. groupArray(ifNull(connector_id, '')) AS connector_id,
  115. groupArray(ifNull(channel_id, '')) AS channel_id,
  116. groupArray(ifNull(crafttalk_dialog_id, '')) AS crafttalk_dialog_id,
  117. groupArray(ifNull(url_location, '')) AS url_location,
  118. groupArray(ifNull(client_id, '')) AS client_id,
  119. groupArray(ifNull(mobile_profile_id, '')) AS mobile_profile_id,
  120. groupArray(ifNull(elk_person_id, '')) AS elk_person_id,
  121. groupArray(ifNull(mos_id, '')) AS mos_id,
  122. groupArray(ifNull(sso_id, '')) AS sso_id,
  123. groupArray(ifNull(user_agent, '')) AS user_agent,
  124. groupArray(ifNull(rate, '')) AS rate
  125. FROM wide_data
  126. GROUP BY chat_id))
  127. ARRAY JOIN message_id, current_operator_filled, session_data_operator_filled, connector_id_filled, channel_id_filled, crafttalk_dialog_id_filled,
  128. url_location_filled, client_id_filled, mobile_profile_id_filled, elk_person_id_filled, mos_id_filled, sso_id_filled, user_agent_filled, rate_filled)
  129.  
  130. SELECT message_id, chat_id, session_id, timestamp, open_time, last_action_time, current_operator,
  131. session_data_operator, connector_id, channel_id, url_location, rate, crafttalk_dialog_id, client_id,
  132. mobile_profile_id, elk_person_id, mos_id, sso_id, user_agent, from, type, action, reply_to,
  133. card_id, card_type, card_from, text
  134. FROM (SELECT message_id, chat_id, session_id, from, timestamp, type, card_type, reply_to, action,
  135. card_from, card_id, text
  136. FROM wide_data) AS wide_data_reduced
  137. JOIN arrays USING message_id
Add Comment
Please, Sign In to add comment