celestialgod

data with kmeans result

Mar 5th, 2017
283
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 3.58 KB | None | 0 0
  1. library(dplyr)
  2. library(tidyr)
  3. library(pipeR)
  4.  
  5. ngrp <- 2L
  6. numSamples <- 200L
  7. DF <- data.frame(V1 = sample(1L:3L, numSamples, TRUE), V2 = sample(1L:2L, numSamples, TRUE),
  8.                  V3 = rnorm(numSamples), V4 = rnorm(numSamples), V5 = rnorm(numSamples))
  9. DF %>>% group_by(V1, V2) %>>%
  10.   summarise(
  11.     oriData = list(data.frame(V3 = V3, V4 = V4, V5 = V5)),
  12.     kmeansRes = list(kmeans(data.frame(V3_center = V3, V4_center = V4, V5_center = V5), ngrp))) %>%
  13.   rowwise %>>%
  14.   do(V1 = .$V1, V2 = .$V2, oriData = .$oriData, grps = .$kmeansRes$cluster,
  15.      centers = as.data.frame(.$kmeansRes$centers[.$kmeansRes$cluster, ])) %>>%
  16.   mutate(V1 = unlist(V1), V2 = unlist(V2)) %>>%
  17.   unnest(grps, oriData, centers)
  18. # # A tibble: 200 × 9
  19. #       V1    V2  grps          V3          V4         V5   V3_center V4_center  V5_center
  20. #    <int> <int> <int>       <dbl>       <dbl>      <dbl>       <dbl>     <dbl>      <dbl>
  21. # 1      1     1     1  1.11016253  0.02722608  0.3027893 -0.35019463 0.1903187  0.9848824
  22. # 2      1     1     1  0.70820726 -2.19334026  0.3242983 -0.35019463 0.1903187  0.9848824
  23. # 3      1     1     2 -0.33493165 -0.07425543 -0.6383053 -0.02633612 0.3241840 -1.0112687
  24. # 4      1     1     2 -2.03926090  0.24728959  0.1302806 -0.02633612 0.3241840 -1.0112687
  25. # 5      1     1     2 -0.04744358  0.20338375 -1.3591982 -0.02633612 0.3241840 -1.0112687
  26. # 6      1     1     2  0.12768265  1.34077790 -1.4590170 -0.02633612 0.3241840 -1.0112687
  27. # 7      1     1     2  1.08012650  1.95067610  1.3336783 -0.02633612 0.3241840 -1.0112687
  28. # 8      1     1     2  0.85677265  0.46973309 -0.6577587 -0.02633612 0.3241840 -1.0112687
  29. # 9      1     1     1  0.90455630  2.44301533 -1.0511750 -0.35019463 0.1903187  0.9848824
  30. # 10     1     1     2  1.07202152 -0.05310248 -1.4048170 -0.02633612 0.3241840 -1.0112687
  31. # # ... with 190 more rows
  32.  
  33. # data.table
  34. library(data.table)
  35. ngrp <- 2L
  36. numSamples <- 200L
  37. DT <- data.table(V1 = sample(1L:3L, numSamples, TRUE), V2 = sample(1L:2L, numSamples, TRUE),
  38.                  V3 = rnorm(numSamples), V4 = rnorm(numSamples), V5 = rnorm(numSamples))
  39.  
  40.  
  41. DT_kmeans <- DT[ , .(
  42.   oriData = list(data.table(V3, V4, V5)),
  43.   kmeansRes = list(kmeans(data.table(V3, V4, V5) %>>%
  44.                             setnames(paste0(names(.), "_center")), ngrp) %>>%
  45.                             {data.table(grp = .$cluster, .$centers[.$cluster, ])})),
  46.                  by = .(V1, V2)]
  47. cbind(DT_kmeans[ , .(V1, V2)][rep(1L:nrow(DT_kmeans), sapply(DT_kmeans$oriData, nrow)), ],
  48.       rbindlist(DT_kmeans$oriData),
  49.       rbindlist(DT_kmeans$kmeansRes))
  50. #      V1 V2         V3         V4         V5 grp  V3_center  V4_center    V5_center
  51. #   1:  1  1 -1.5910524  2.1274208 -1.3464532   1  0.1155321  0.9482175  0.290026606
  52. #   2:  1  1  0.3280774 -0.1150860 -1.2363502   2 -0.1138527 -1.1174780 -0.603929772
  53. #   3:  1  1 -0.2026653 -0.9188654  0.4275579   2 -0.1138527 -1.1174780 -0.603929772
  54. #   4:  1  1 -0.3258952  0.3159080  1.3667256   1  0.1155321  0.9482175  0.290026606
  55. #   5:  1  1 -0.2819986 -1.2371227  0.6597289   2 -0.1138527 -1.1174780 -0.603929772
  56. #  ---                                                                              
  57. # 196:  2  2  0.6875229 -1.4182973 -0.1774791   1  0.5244143 -0.4805122  0.084895890
  58. # 197:  2  2  1.1732799 -0.7428654 -0.1777401   1  0.5244143 -0.4805122  0.084895890
  59. # 198:  2  2 -1.1992074 -0.6165810  0.0421549   2 -0.8536335  0.6373769 -0.007181771
  60. # 199:  2  2 -0.4136762  1.1002398  0.4306150   2 -0.8536335  0.6373769 -0.007181771
  61. # 200:  2  2 -0.1929361 -0.7346460 -0.4713694   1  0.5244143 -0.4805122  0.084895890
Advertisement
Add Comment
Please, Sign In to add comment