Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- -- забирает целиком историю тех чатов, в которых что-то происходило за последнее время
- -- проверяет поле user на совпадение с regex'ом, который опознает mobile_profile_id
- WITH current_data AS (SELECT message_id, chat_id, session_id, from, timestamp, type,
- if(match(user, '.+-.+-.+-.+-.+'), (NULL, user), (user, NULL)) AS mobile_cids_and_operator_names,
- connector_id, channel_id, card_type, card_id, json_data, reply_to, text AS text_table
- FROM RAW.RAW_MOS_AISUZ_chatMessages
- WHERE chat_id IN (SELECT DISTINCT chat_id
- FROM RAW.RAW_MOS_AISUZ_chatMessages
- WHERE toDate(timestamp) >= toDate('{start_query_exec_date}')
- AND toDate(timestamp) <= toDate('{execution_date}'))),
- -- раскладывает json_data на столбцы и джойнит с current_data,
- -- если значение переменной может содержаться в одном из нескольких полей, то делает coalesce
- -- приравнивает к NULL значения 'Пользователь' в столбце user, после чего, помимо пропусков, там
- -- остаются только имена операторов
- wide_data AS (SELECT message_id,
- chat_id,
- session_id,
- from,
- timestamp,
- type,
- connector_id,
- channel_id,
- card_type,
- session_data_operator,
- user_agent,
- sso_id,
- reply_to,
- action,
- cardFrom AS card_from,
- crafttalk_dialog_id,
- nullif(mobile_cids_and_operator_names.1, 'Пользователь') AS current_operator,
- mobile_cids_and_operator_names.2 AS mobile_profile_id,
- coalesce(toString(card_id), cardId) AS card_id,
- coalesce(text_table, text_json) AS text,
- coalesce(client_id, clientId) AS client_id,
- coalesce(elk_person_id, elkPersonId) AS elk_person_id,
- coalesce(mos_id, mosId) AS mos_id,
- coalesce(url, nullif(JSONExtractString(citInfo, 'location'), '')) AS url_location,
- coalesce(toString(rate), RatingRequestGroup1, RatingRequestGroup2, RatingRequestGroup3,
- RatingRequestGroup4, RatingRequestGroup5) as rate
- FROM (SELECT message_id,
- if(has(json_keys, 'action'), arrayElement(json_values, indexOf(json_keys, 'action')), NULL) AS action,
- if(has(json_keys, 'card-from'), arrayElement(json_values, indexOf(json_keys, 'card-from')), NULL) AS cardFrom,
- if(has(json_keys, 'client_id'), arrayElement(json_values, indexOf(json_keys, 'client_id')), NULL) AS client_id,
- if(has(json_keys, 'clientId'), arrayElement(json_values, indexOf(json_keys, 'clientId')), NULL) AS clientId,
- if(has(json_keys, 'card-id'), arrayElement(json_values, indexOf(json_keys, 'card-id')), NULL) AS cardId,
- if(has(json_keys, 'code'), arrayElement(json_values, indexOf(json_keys, 'code')), NULL) AS code,
- if(has(json_keys, 'crafttalk_operator_name'), arrayElement(json_values, indexOf(json_keys, 'crafttalk_operator_name')), NULL) AS session_data_operator,
- if(has(json_keys, 'crafttalk_dialog_id'), arrayElement(json_values, indexOf(json_keys, 'crafttalk_dialog_id')), NULL) AS crafttalk_dialog_id,
- if(has(json_keys, 'citInfo'), arrayElement(json_values, indexOf(json_keys, 'citInfo')), NULL) AS citInfo,
- if(has(json_keys, 'elk_person_id'), arrayElement(json_values, indexOf(json_keys, 'elk_person_id')), NULL) AS elk_person_id,
- if(has(json_keys, 'elkPersonId'), arrayElement(json_values, indexOf(json_keys, 'elkPersonId')), NULL) AS elkPersonId,
- if(has(json_keys, 'mos_id'), arrayElement(json_values, indexOf(json_keys, 'mos_id')), NULL) AS mos_id,
- if(has(json_keys, 'mosId'), arrayElement(json_values, indexOf(json_keys, 'mosId')), NULL) AS mosId,
- if(has(json_keys, 'rate'), arrayElement(json_values, indexOf(json_keys, 'rate')), NULL) AS rate,
- if(has(json_keys, 'RatingRequestGroup1'), '1', NULL) AS RatingRequestGroup1,
- if(has(json_keys, 'RatingRequestGroup2'), '2', NULL) AS RatingRequestGroup2,
- if(has(json_keys, 'RatingRequestGroup3'), '3', NULL) AS RatingRequestGroup3,
- if(has(json_keys, 'RatingRequestGroup4'), '4', NULL) AS RatingRequestGroup4,
- if(has(json_keys, 'RatingRequestGroup5'), '5', NULL) AS RatingRequestGroup5,
- if(has(json_keys, 'sso_id'), arrayElement(json_values, indexOf(json_keys, 'sso_id')), NULL) AS sso_id,
- if(has(json_keys, 'text'), arrayElement(json_values, indexOf(json_keys, 'text')), NULL) AS text_json,
- if(has(json_keys, 'url'), arrayElement(json_values, indexOf(json_keys, 'url')), NULL) AS url,
- if(has(json_keys, 'user_agent'), arrayElement(json_values, indexOf(json_keys, 'user_agent')), NULL) AS user_agent
- FROM (SELECT message_id,
- json_tuples,
- arrayMap(x -> x.1, json_tuples) AS json_keys,
- arrayMap(x -> x.2, json_tuples) AS json_values
- FROM (SELECT message_id,
- JSONExtractKeysAndValues(assumeNotNull(json_data), 'String') AS json_tuples
- FROM current_data
- WHERE json_data IS NOT NULL))) AS json
- RIGHT JOIN current_data USING message_id
- ORDER BY chat_id, timestamp ASC), -- эта сортировка гарантирует, что на следующем шаге groupArray соберёт массивы по порядку
- -- делает ffill и bfill, где требуется
- arrays AS (SELECT message_id,
- open_time,
- last_action_time,
- nullif(current_operator_filled, '') AS current_operator,
- nullif(session_data_operator_filled, '') AS session_data_operator,
- nullif(connector_id_filled, '') AS connector_id,
- nullif(channel_id_filled, '') AS channel_id,
- nullif(crafttalk_dialog_id_filled, '') AS crafttalk_dialog_id,
- nullif(url_location_filled, '') AS url_location,
- nullif(client_id_filled, '') AS client_id,
- nullif(mobile_profile_id_filled, '') AS mobile_profile_id,
- nullif(elk_person_id_filled, '') AS elk_person_id,
- nullif(mos_id_filled, '') AS mos_id,
- nullif(sso_id_filled, '') AS sso_id,
- nullif(user_agent_filled, '') AS user_agent,
- toUInt8OrNull(nullif(rate_filled, '')) AS rate
- FROM (SELECT message_id,
- arrayElement(timestamp, 1) AS open_time,
- arrayElement(timestamp, -1) AS last_action_time,
- arrayFill(x -> x != '', current_operator) AS current_operator_filled,
- arrayFill(x -> x != '', arrayReverseFill(x -> x != '', session_data_operator)) AS session_data_operator_filled,
- arrayFill(x -> x != '', arrayReverseFill(x -> x != '', connector_id)) AS connector_id_filled,
- arrayFill(x -> x != '', arrayReverseFill(x -> x != '', channel_id)) AS channel_id_filled,
- arrayFill(x -> x != '', arrayReverseFill(x -> x != '', crafttalk_dialog_id)) AS crafttalk_dialog_id_filled,
- arrayFill(x -> x != '', arrayReverseFill(x -> x != '', url_location)) AS url_location_filled,
- arrayFill(x -> x != '', arrayReverseFill(x -> x != '', client_id)) AS client_id_filled,
- arrayFill(x -> x != '', arrayReverseFill(x -> x != '', mobile_profile_id)) AS mobile_profile_id_filled,
- arrayFill(x -> x != '', arrayReverseFill(x -> x != '', elk_person_id)) AS elk_person_id_filled,
- arrayFill(x -> x != '', arrayReverseFill(x -> x != '', mos_id)) AS mos_id_filled,
- arrayFill(x -> x != '', arrayReverseFill(x -> x != '', sso_id)) AS sso_id_filled,
- arrayFill(x -> x != '', arrayReverseFill(x -> x != '', user_agent)) AS user_agent_filled,
- arrayFill(x -> x != '', arrayReverseFill(x -> x != '', rate)) AS rate_filled
- FROM (SELECT groupArray(message_id) AS message_id,
- groupArray(timestamp) AS timestamp,
- groupArray(ifNull(current_operator, '')) AS current_operator,
- groupArray(ifNull(session_data_operator, '')) AS session_data_operator,
- groupArray(ifNull(connector_id, '')) AS connector_id,
- groupArray(ifNull(channel_id, '')) AS channel_id,
- groupArray(ifNull(crafttalk_dialog_id, '')) AS crafttalk_dialog_id,
- groupArray(ifNull(url_location, '')) AS url_location,
- groupArray(ifNull(client_id, '')) AS client_id,
- groupArray(ifNull(mobile_profile_id, '')) AS mobile_profile_id,
- groupArray(ifNull(elk_person_id, '')) AS elk_person_id,
- groupArray(ifNull(mos_id, '')) AS mos_id,
- groupArray(ifNull(sso_id, '')) AS sso_id,
- groupArray(ifNull(user_agent, '')) AS user_agent,
- groupArray(ifNull(rate, '')) AS rate
- FROM wide_data
- GROUP BY chat_id))
- ARRAY JOIN message_id, current_operator_filled, session_data_operator_filled, connector_id_filled, channel_id_filled, crafttalk_dialog_id_filled,
- url_location_filled, client_id_filled, mobile_profile_id_filled, elk_person_id_filled, mos_id_filled, sso_id_filled, user_agent_filled, rate_filled)
- SELECT message_id, chat_id, session_id, timestamp, open_time, last_action_time, current_operator,
- session_data_operator, connector_id, channel_id, url_location, rate, crafttalk_dialog_id, client_id,
- mobile_profile_id, elk_person_id, mos_id, sso_id, user_agent, from, type, action, reply_to,
- card_id, card_type, card_from, text
- FROM (SELECT message_id, chat_id, session_id, from, timestamp, type, card_type, reply_to, action,
- card_from, card_id, text
- FROM wide_data) AS wide_data_reduced
- JOIN arrays USING message_id
Add Comment
Please, Sign In to add comment