Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- --- Тестовая таблица для проверки расчета метрик дисциплины ---
- CREATE TEMP TABLE dwh_discipline AS
- SELECT
- tutor_id,
- ROUND(
- GREATEST(
- 1 - SUM(discipline_score) * 1.0 / COUNT(lesson_id),
- 0
- ),
- 2
- ) AS discipline_all_time,
- ROUND(
- GREATEST(
- 1 - SUM(discipline_score) FILTER (WHERE tutor_lesson_number_desc <= 25) * 1.0
- / COUNT(lesson_id) FILTER (WHERE tutor_lesson_number_desc <= 25),
- 0
- ),
- 2
- ) AS discipline_25_last_lessons,
- ROUND(
- GREATEST(
- 1 - SUM(discipline_score) FILTER (
- WHERE lesson_started_at BETWEEN tutor_approved_date AND COALESCE(adaptation_complete_date, CURRENT_DATE)
- ) * 1.0
- / COUNT(lesson_id) FILTER (
- WHERE lesson_started_at BETWEEN tutor_approved_date AND COALESCE(adaptation_complete_date, CURRENT_DATE)
- ),
- 0
- ),
- 2
- ) AS discipline_in_adaptation
- FROM sandbox.dwh_1179_core_tutor_lesson_counter
- JOIN mart.tutors USING (tutor_id)
- WHERE TRUE = TRUE
- AND tutor_lesson_number_desc IS NOT NULL
- GROUP BY tutor_id;
- --- Отформатированный код из задачи для расчета дисциплины ---
- CREATE TEMP TABLE original_discipline AS
- WITH needed_lessons AS (
- SELECT
- tutor_id,
- lesson_id,
- lesson_state,
- start_time,
- pupil_id,
- -- Пометим флагом своеврем/несвоеврем отмены и прогулы. Всегда опираемся на статус урока первым делом.
- -- Если статус cancelled и первый отменивший - препод, ставим флаг своевременной отмены.
- COALESCE(lesson_state = 'cancelled' AND cancellation_made_by_role = 'tutor', FALSE)::INT AS is_ok_tutor_cancel,
- -- Если статус прогул П/обоих и есть попытка отмены до начала урока - поздняя отмена преподавателем.
- COALESCE(lesson_state IN ('tutor_no_show', 'both_no_show')
- AND first_cancellation_datetime < start_time, FALSE)::INT AS is_late_tutor_cancel,
- -- Если статус прогул П/обоих и нет попытки отмены до начала урока - прогул преподавателя.
- COALESCE(lesson_state IN ('tutor_no_show', 'both_no_show')
- AND (first_cancellation_datetime >= start_time OR first_cancellation_datetime IS NULL), FALSE)::INT AS is_no_tutor_cancel,
- -- Найдем последние 50 уроков преподавателя в любом статусе.
- ROW_NUMBER() OVER (PARTITION BY tutor_id ORDER BY start_time DESC, lesson_id) AS rn_tutor_desc
- FROM
- mart.events_with_lessons
- WHERE
- manager_of_intro_lesson IS FALSE
- AND is_regular_lesson + is_intro_lesson = 1
- AND lesson_state != 'new'
- AND lesson_state IS NOT NULL
- AND is_tetrika_lesson IS TRUE
- AND start_time < '2024-12-01'
- ),
- -- Посчитаем количество отмен с одним учеником.
- finding_cancels_with_same_pupil AS (
- SELECT
- tutor_id,
- lesson_id,
- pupil_id,
- start_time,
- lesson_state,
- -- Найдем количество "плохих" уроков с одним учеником.
- COUNT(lesson_id) OVER (PARTITION BY tutor_id, pupil_id ORDER BY start_time DESC, lesson_id) AS bad_lessons_with_pupil_count
- FROM
- needed_lessons
- WHERE
- (is_ok_tutor_cancel + is_late_tutor_cancel + is_no_tutor_cancel) > 0
- ),
- all_cancels_together AS (
- SELECT
- needed_lessons.tutor_id,
- needed_lessons.lesson_id,
- needed_lessons.lesson_state,
- needed_lessons.start_time,
- needed_lessons.pupil_id,
- needed_lessons.is_ok_tutor_cancel,
- needed_lessons.is_late_tutor_cancel,
- needed_lessons.is_no_tutor_cancel,
- needed_lessons.rn_tutor_desc,
- -- Где есть информация о прогулах с одним учеником, возьмем её. Где нет - подставим 0.
- COALESCE(finding_cancels_with_same_pupil.bad_lessons_with_pupil_count, 0) AS bad_lessons_with_pupil_count,
- -- Итоговую стоимость каждого прогула рассчитываем так:
- -- своевременная отмена - вес 1, несвоевременная отмена - вес 1.5, прогул - вес 2.
- -- Если несколько плохих уроков с одним учеником, то каждый последующий плохой урок весит больше:
- -- 1-й как 1, второй - как 2, третий - как 3.
- is_ok_tutor_cancel * 1 * COALESCE(finding_cancels_with_same_pupil.bad_lessons_with_pupil_count, 0) +
- is_late_tutor_cancel * 1.5 * COALESCE(finding_cancels_with_same_pupil.bad_lessons_with_pupil_count, 0) +
- is_no_tutor_cancel * 2 * COALESCE(finding_cancels_with_same_pupil.bad_lessons_with_pupil_count, 0) AS final_score
- FROM
- needed_lessons
- LEFT JOIN
- finding_cancels_with_same_pupil USING (lesson_id)
- )
- SELECT
- tutor_id,
- -- Дисциплина за всё время
- ROUND(
- GREATEST(
- 1 - SUM(final_score) * 1.0 / COUNT(lesson_id),
- 0
- ),
- 2
- ) AS discipline_all_time,
- -- Дисциплина за последние 25 уроков
- ROUND(
- GREATEST(
- 1 - SUM(final_score) FILTER (WHERE rn_tutor_desc <= 25) * 1.0
- / COUNT(lesson_id) FILTER (WHERE rn_tutor_desc <= 25),
- 0
- ),
- 2
- ) AS discipline_25_last_lessons,
- -- Дисциплина с даты выпуска до даты окончания адаптации
- ROUND(
- GREATEST(
- 1 - SUM(final_score) FILTER (
- WHERE start_time BETWEEN tutor_approved_date AND COALESCE(adaptation_complete_date, CURRENT_DATE)
- ) * 1.0
- / COUNT(lesson_id) FILTER (
- WHERE start_time BETWEEN tutor_approved_date AND COALESCE(adaptation_complete_date, CURRENT_DATE)
- ),
- 0
- ),
- 2
- ) AS discipline_in_adaptation
- FROM all_cancels_together
- JOIN mart.tutors USING (tutor_id)
- GROUP BY 1;
- --- Проверяем матчатся ли метрики ---
- SELECT
- original_discipline.tutor_id,
- dwh_discipline.tutor_id,
- original_discipline.discipline_all_time,
- dwh_discipline.discipline_all_time,
- original_discipline.discipline_25_last_lessons,
- dwh_discipline.discipline_25_last_lessons,
- original_discipline.discipline_in_adaptation,
- dwh_discipline.discipline_in_adaptation
- FROM original_discipline
- LEFT JOIN dwh_discipline
- ON original_discipline.tutor_id = dwh_discipline.tutor_id
- WHERE
- original_discipline.discipline_all_time IS DISTINCT FROM dwh_discipline.discipline_all_time
- OR original_discipline.discipline_25_last_lessons IS DISTINCT FROM dwh_discipline.discipline_25_last_lessons
- OR original_discipline.discipline_25_last_lessons IS DISTINCT FROM dwh_discipline.discipline_25_last_lessons;
Advertisement
Add Comment
Please, Sign In to add comment