Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #' Wrap hdbscan with default settings except the minPts
- #'
- #' @param minPts min size of the cluster
- #' @param dt input data for hdbscan
- #'
- #' @return a dbscan obejct
- #' @export
- hdbscan_by_minPts <- function(minPts, dt) {
- dbscan::hdbscan(dt, minPts = minPts)
- }
- #' Put hdbscan list(object-based) output into a tibble
- #'
- #' @param hdbobj
- #'
- #' @return a tibble with all object-based results from hdbscan
- #'
- get_obj_score <- function(hdbobj) {
- tibble::tibble(
- object_id = as.character(seq(1:length(hdbobj$cluster))),
- cluster_id = as.character(hdbobj$cluster),
- min_points = hdbobj$minPts,
- member_prob = hdbobj$membership_prob,
- outlier_score = hdbobj$outlier_scores
- )
- }
- #' Put hdbscan list(cluster-based) output into a tibble
- #'
- #' @param hdbobj
- #'
- #' @return a tibble with cluster-based results from hdbscan
- #'
- get_cluster_stability <- function(hdbobj) {
- tibble::enframe(hdbobj$cluster_scores, name = "cluster_id", value = "stability_score")
- }
- #' Collect all hdbscan object results
- #'
- #' @param hdbobj
- #'
- #' @return
- #' @export
- #'
- get_hdbscan_result <- function(hdbobj) {
- get_obj_score(hdbobj) %>%
- dplyr::left_join(
- get_cluster_stability(hdbobj), by = "cluster_id"
- )
- }
- #' Calculate mean cluster stability score based on minPts
- #'
- #' @param hdbscan_res a tibble from get_hdbscan_result()
- #'
- #' @return
- #' @export
- #'
- #'
- check_stability_score_by_minPts <- function(hdbscan_res) {
- hdbscan_res %>%
- dplyr::distinct(cluster_id, min_points, stability_score) %>%
- na.omit() %>%
- dplyr::group_by(min_points) %>%
- dplyr::summarize(mean_stability_score = mean(stability_score))
- }
- #' Calculate mean membership probability (first by cluster) score based on minPts
- #'
- #' @param hdbscan_res a tibble from get_hdbscan_result()
- #'
- #' @return
- #' @export
- #'
- check_member_prob_score_by_minPts <- function(hdbscan_res) {
- hdbscan_res %>%
- dplyr::group_by(
- cluster_id, min_points
- ) %>%
- dplyr::summarize(
- mean_cluster_memb_prob = mean(member_prob)
- ) %>%
- dplyr::filter(
- cluster_id != 0
- ) %>%
- dplyr::group_by(min_points) %>%
- dplyr::summarize(
- mean_memb = mean(mean_cluster_memb_prob)
- )
- }
- #' Calculate mean outlier score (first by cluster) based on minPts
- #'
- #' @param hdbscan_res a tibble from get_hdbscan_result()
- #'
- #' @return
- #' @export
- #'
- check_outlier_score_by_minPts <- function(hdbscan_res) {
- hdbscan_res %>%
- dplyr::group_by(
- cluster_id, min_points
- ) %>%
- dplyr::summarize(
- mean_cluster_outlier_score = mean(outlier_score)
- ) %>%
- dplyr::filter(
- cluster_id != 0
- ) %>%
- dplyr::group_by(min_points) %>%
- dplyr::summarize(
- mean_outlier = mean(mean_cluster_outlier_score)
- )
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement