daily pastebin goal
8%
SHARE
TWEET

Untitled

a guest Mar 20th, 2019 82 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #' Wrap hdbscan with default settings except the minPts
  2. #'
  3. #' @param minPts min size of the cluster
  4. #' @param dt input data for hdbscan
  5. #'
  6. #' @return a dbscan obejct
  7. #' @export
  8. hdbscan_by_minPts <- function(minPts, dt) {
  9.   dbscan::hdbscan(dt, minPts = minPts)
  10. }
  11.  
  12.  
  13.  
  14. #' Put hdbscan list(object-based) output into a tibble
  15. #'
  16. #' @param hdbobj
  17. #'
  18. #' @return a tibble with all object-based results from hdbscan
  19. #'
  20. get_obj_score <- function(hdbobj) {
  21.   tibble::tibble(
  22.     object_id = as.character(seq(1:length(hdbobj$cluster))),
  23.     cluster_id = as.character(hdbobj$cluster),
  24.     min_points = hdbobj$minPts,
  25.     member_prob = hdbobj$membership_prob,
  26.     outlier_score = hdbobj$outlier_scores
  27.   )
  28. }
  29.  
  30.  
  31. #' Put hdbscan list(cluster-based) output into a tibble
  32. #'
  33. #' @param hdbobj
  34. #'
  35. #' @return a tibble with cluster-based results from hdbscan
  36. #'
  37. get_cluster_stability <- function(hdbobj) {
  38.   tibble::enframe(hdbobj$cluster_scores, name = "cluster_id", value = "stability_score")
  39. }
  40.  
  41.  
  42. #' Collect all hdbscan object results
  43. #'
  44. #' @param hdbobj
  45. #'
  46. #' @return
  47. #' @export
  48. #'
  49. get_hdbscan_result <- function(hdbobj) {
  50.   get_obj_score(hdbobj) %>%
  51.     dplyr::left_join(
  52.       get_cluster_stability(hdbobj), by = "cluster_id"
  53.     )
  54. }
  55.  
  56.  
  57. #' Calculate mean cluster stability score based on minPts
  58. #'
  59. #' @param hdbscan_res a tibble from get_hdbscan_result()
  60. #'
  61. #' @return
  62. #' @export
  63. #'
  64. #'
  65. check_stability_score_by_minPts <- function(hdbscan_res) {
  66.   hdbscan_res %>%
  67.     dplyr::distinct(cluster_id, min_points, stability_score) %>%
  68.     na.omit() %>%
  69.     dplyr::group_by(min_points) %>%
  70.     dplyr::summarize(mean_stability_score = mean(stability_score))
  71. }
  72.  
  73.  
  74.  
  75. #' Calculate mean membership probability (first by cluster) score based on minPts
  76. #'
  77. #' @param hdbscan_res a tibble from get_hdbscan_result()
  78. #'
  79. #' @return
  80. #' @export
  81. #'
  82. check_member_prob_score_by_minPts <- function(hdbscan_res) {
  83.   hdbscan_res %>%
  84.     dplyr::group_by(
  85.       cluster_id, min_points
  86.     ) %>%
  87.     dplyr::summarize(
  88.       mean_cluster_memb_prob = mean(member_prob)
  89.     ) %>%
  90.     dplyr::filter(
  91.       cluster_id != 0
  92.     ) %>%
  93.     dplyr::group_by(min_points) %>%
  94.     dplyr::summarize(
  95.       mean_memb = mean(mean_cluster_memb_prob)
  96.     )
  97. }
  98.  
  99.  
  100.  
  101. #' Calculate mean outlier score (first by cluster)  based on minPts
  102. #'
  103. #' @param hdbscan_res a tibble from get_hdbscan_result()
  104. #'
  105. #' @return
  106. #' @export
  107. #'
  108. check_outlier_score_by_minPts <- function(hdbscan_res) {
  109.   hdbscan_res %>%
  110.     dplyr::group_by(
  111.       cluster_id, min_points
  112.     ) %>%
  113.     dplyr::summarize(
  114.       mean_cluster_outlier_score = mean(outlier_score)
  115.     ) %>%
  116.     dplyr::filter(
  117.       cluster_id != 0
  118.     ) %>%
  119.     dplyr::group_by(min_points) %>%
  120.     dplyr::summarize(
  121.       mean_outlier = mean(mean_cluster_outlier_score)
  122.     )
  123. }
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top