--- Тестовая таблица для проверки расчета метрик дисциплины --- CREATE TEMP TABLE dwh_discipline AS SELECT tutor_id, ROUND( GREATEST( 1 - SUM(discipline_score) * 1.0 / COUNT(lesson_id), 0 ), 2 ) AS discipline_all_time, ROUND( GREATEST( 1 - SUM(discipline_score) FILTER (WHERE tutor_lesson_number_desc <= 25) * 1.0 / COUNT(lesson_id) FILTER (WHERE tutor_lesson_number_desc <= 25), 0 ), 2 ) AS discipline_25_last_lessons, ROUND( GREATEST( 1 - SUM(discipline_score) FILTER ( WHERE lesson_started_at BETWEEN tutor_approved_date AND COALESCE(adaptation_complete_date, CURRENT_DATE) ) * 1.0 / COUNT(lesson_id) FILTER ( WHERE lesson_started_at BETWEEN tutor_approved_date AND COALESCE(adaptation_complete_date, CURRENT_DATE) ), 0 ), 2 ) AS discipline_in_adaptation FROM sandbox.dwh_1179_core_tutor_lesson_counter JOIN mart.tutors USING (tutor_id) WHERE TRUE = TRUE AND tutor_lesson_number_desc IS NOT NULL GROUP BY tutor_id; --- Отформатированный код из задачи для расчета дисциплины --- CREATE TEMP TABLE original_discipline AS WITH needed_lessons AS ( SELECT tutor_id, lesson_id, lesson_state, start_time, pupil_id, -- Пометим флагом своеврем/несвоеврем отмены и прогулы. Всегда опираемся на статус урока первым делом. -- Если статус cancelled и первый отменивший - препод, ставим флаг своевременной отмены. COALESCE(lesson_state = 'cancelled' AND cancellation_made_by_role = 'tutor', FALSE)::INT AS is_ok_tutor_cancel, -- Если статус прогул П/обоих и есть попытка отмены до начала урока - поздняя отмена преподавателем. COALESCE(lesson_state IN ('tutor_no_show', 'both_no_show') AND first_cancellation_datetime < start_time, FALSE)::INT AS is_late_tutor_cancel, -- Если статус прогул П/обоих и нет попытки отмены до начала урока - прогул преподавателя. COALESCE(lesson_state IN ('tutor_no_show', 'both_no_show') AND (first_cancellation_datetime >= start_time OR first_cancellation_datetime IS NULL), FALSE)::INT AS is_no_tutor_cancel, -- Найдем последние 50 уроков преподавателя в любом статусе. ROW_NUMBER() OVER (PARTITION BY tutor_id ORDER BY start_time DESC, lesson_id) AS rn_tutor_desc FROM mart.events_with_lessons WHERE manager_of_intro_lesson IS FALSE AND is_regular_lesson + is_intro_lesson = 1 AND lesson_state != 'new' AND lesson_state IS NOT NULL AND is_tetrika_lesson IS TRUE AND start_time < '2024-12-01' ), -- Посчитаем количество отмен с одним учеником. finding_cancels_with_same_pupil AS ( SELECT tutor_id, lesson_id, pupil_id, start_time, lesson_state, -- Найдем количество "плохих" уроков с одним учеником. COUNT(lesson_id) OVER (PARTITION BY tutor_id, pupil_id ORDER BY start_time DESC, lesson_id) AS bad_lessons_with_pupil_count FROM needed_lessons WHERE (is_ok_tutor_cancel + is_late_tutor_cancel + is_no_tutor_cancel) > 0 ), all_cancels_together AS ( SELECT needed_lessons.tutor_id, needed_lessons.lesson_id, needed_lessons.lesson_state, needed_lessons.start_time, needed_lessons.pupil_id, needed_lessons.is_ok_tutor_cancel, needed_lessons.is_late_tutor_cancel, needed_lessons.is_no_tutor_cancel, needed_lessons.rn_tutor_desc, -- Где есть информация о прогулах с одним учеником, возьмем её. Где нет - подставим 0. COALESCE(finding_cancels_with_same_pupil.bad_lessons_with_pupil_count, 0) AS bad_lessons_with_pupil_count, -- Итоговую стоимость каждого прогула рассчитываем так: -- своевременная отмена - вес 1, несвоевременная отмена - вес 1.5, прогул - вес 2. -- Если несколько плохих уроков с одним учеником, то каждый последующий плохой урок весит больше: -- 1-й как 1, второй - как 2, третий - как 3. is_ok_tutor_cancel * 1 * COALESCE(finding_cancels_with_same_pupil.bad_lessons_with_pupil_count, 0) + is_late_tutor_cancel * 1.5 * COALESCE(finding_cancels_with_same_pupil.bad_lessons_with_pupil_count, 0) + is_no_tutor_cancel * 2 * COALESCE(finding_cancels_with_same_pupil.bad_lessons_with_pupil_count, 0) AS final_score FROM needed_lessons LEFT JOIN finding_cancels_with_same_pupil USING (lesson_id) ) SELECT tutor_id, -- Дисциплина за всё время ROUND( GREATEST( 1 - SUM(final_score) * 1.0 / COUNT(lesson_id), 0 ), 2 ) AS discipline_all_time, -- Дисциплина за последние 25 уроков ROUND( GREATEST( 1 - SUM(final_score) FILTER (WHERE rn_tutor_desc <= 25) * 1.0 / COUNT(lesson_id) FILTER (WHERE rn_tutor_desc <= 25), 0 ), 2 ) AS discipline_25_last_lessons, -- Дисциплина с даты выпуска до даты окончания адаптации ROUND( GREATEST( 1 - SUM(final_score) FILTER ( WHERE start_time BETWEEN tutor_approved_date AND COALESCE(adaptation_complete_date, CURRENT_DATE) ) * 1.0 / COUNT(lesson_id) FILTER ( WHERE start_time BETWEEN tutor_approved_date AND COALESCE(adaptation_complete_date, CURRENT_DATE) ), 0 ), 2 ) AS discipline_in_adaptation FROM all_cancels_together JOIN mart.tutors USING (tutor_id) GROUP BY 1; --- Проверяем матчатся ли метрики --- SELECT original_discipline.tutor_id, dwh_discipline.tutor_id, original_discipline.discipline_all_time, dwh_discipline.discipline_all_time, original_discipline.discipline_25_last_lessons, dwh_discipline.discipline_25_last_lessons, original_discipline.discipline_in_adaptation, dwh_discipline.discipline_in_adaptation FROM original_discipline LEFT JOIN dwh_discipline ON original_discipline.tutor_id = dwh_discipline.tutor_id WHERE original_discipline.discipline_all_time IS DISTINCT FROM dwh_discipline.discipline_all_time OR original_discipline.discipline_25_last_lessons IS DISTINCT FROM dwh_discipline.discipline_25_last_lessons OR original_discipline.discipline_25_last_lessons IS DISTINCT FROM dwh_discipline.discipline_25_last_lessons;