Advertisement
Guest User

Untitled

a guest
Mar 20th, 2019
125
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.68 KB | None | 0 0
  1. #' Wrap hdbscan with default settings except the minPts
  2. #'
  3. #' @param minPts min size of the cluster
  4. #' @param dt input data for hdbscan
  5. #'
  6. #' @return a dbscan obejct
  7. #' @export
  8. hdbscan_by_minPts <- function(minPts, dt) {
  9. dbscan::hdbscan(dt, minPts = minPts)
  10. }
  11.  
  12.  
  13.  
  14. #' Put hdbscan list(object-based) output into a tibble
  15. #'
  16. #' @param hdbobj
  17. #'
  18. #' @return a tibble with all object-based results from hdbscan
  19. #'
  20. get_obj_score <- function(hdbobj) {
  21. tibble::tibble(
  22. object_id = as.character(seq(1:length(hdbobj$cluster))),
  23. cluster_id = as.character(hdbobj$cluster),
  24. min_points = hdbobj$minPts,
  25. member_prob = hdbobj$membership_prob,
  26. outlier_score = hdbobj$outlier_scores
  27. )
  28. }
  29.  
  30.  
  31. #' Put hdbscan list(cluster-based) output into a tibble
  32. #'
  33. #' @param hdbobj
  34. #'
  35. #' @return a tibble with cluster-based results from hdbscan
  36. #'
  37. get_cluster_stability <- function(hdbobj) {
  38. tibble::enframe(hdbobj$cluster_scores, name = "cluster_id", value = "stability_score")
  39. }
  40.  
  41.  
  42. #' Collect all hdbscan object results
  43. #'
  44. #' @param hdbobj
  45. #'
  46. #' @return
  47. #' @export
  48. #'
  49. get_hdbscan_result <- function(hdbobj) {
  50. get_obj_score(hdbobj) %>%
  51. dplyr::left_join(
  52. get_cluster_stability(hdbobj), by = "cluster_id"
  53. )
  54. }
  55.  
  56.  
  57. #' Calculate mean cluster stability score based on minPts
  58. #'
  59. #' @param hdbscan_res a tibble from get_hdbscan_result()
  60. #'
  61. #' @return
  62. #' @export
  63. #'
  64. #'
  65. check_stability_score_by_minPts <- function(hdbscan_res) {
  66. hdbscan_res %>%
  67. dplyr::distinct(cluster_id, min_points, stability_score) %>%
  68. na.omit() %>%
  69. dplyr::group_by(min_points) %>%
  70. dplyr::summarize(mean_stability_score = mean(stability_score))
  71. }
  72.  
  73.  
  74.  
  75. #' Calculate mean membership probability (first by cluster) score based on minPts
  76. #'
  77. #' @param hdbscan_res a tibble from get_hdbscan_result()
  78. #'
  79. #' @return
  80. #' @export
  81. #'
  82. check_member_prob_score_by_minPts <- function(hdbscan_res) {
  83. hdbscan_res %>%
  84. dplyr::group_by(
  85. cluster_id, min_points
  86. ) %>%
  87. dplyr::summarize(
  88. mean_cluster_memb_prob = mean(member_prob)
  89. ) %>%
  90. dplyr::filter(
  91. cluster_id != 0
  92. ) %>%
  93. dplyr::group_by(min_points) %>%
  94. dplyr::summarize(
  95. mean_memb = mean(mean_cluster_memb_prob)
  96. )
  97. }
  98.  
  99.  
  100.  
  101. #' Calculate mean outlier score (first by cluster) based on minPts
  102. #'
  103. #' @param hdbscan_res a tibble from get_hdbscan_result()
  104. #'
  105. #' @return
  106. #' @export
  107. #'
  108. check_outlier_score_by_minPts <- function(hdbscan_res) {
  109. hdbscan_res %>%
  110. dplyr::group_by(
  111. cluster_id, min_points
  112. ) %>%
  113. dplyr::summarize(
  114. mean_cluster_outlier_score = mean(outlier_score)
  115. ) %>%
  116. dplyr::filter(
  117. cluster_id != 0
  118. ) %>%
  119. dplyr::group_by(min_points) %>%
  120. dplyr::summarize(
  121. mean_outlier = mean(mean_cluster_outlier_score)
  122. )
  123. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement