Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(dplyr)
- library(tidyr)
- library(pipeR)
- ngrp <- 2L
- numSamples <- 200L
- DF <- data.frame(V1 = sample(1L:3L, numSamples, TRUE), V2 = sample(1L:2L, numSamples, TRUE),
- V3 = rnorm(numSamples), V4 = rnorm(numSamples), V5 = rnorm(numSamples))
- DF %>>% group_by(V1, V2) %>>%
- summarise(
- oriData = list(data.frame(V3 = V3, V4 = V4, V5 = V5)),
- kmeansRes = list(kmeans(data.frame(V3_center = V3, V4_center = V4, V5_center = V5), ngrp))) %>%
- rowwise %>>%
- do(V1 = .$V1, V2 = .$V2, oriData = .$oriData, grps = .$kmeansRes$cluster,
- centers = as.data.frame(.$kmeansRes$centers[.$kmeansRes$cluster, ])) %>>%
- mutate(V1 = unlist(V1), V2 = unlist(V2)) %>>%
- unnest(grps, oriData, centers)
- # # A tibble: 200 × 9
- # V1 V2 grps V3 V4 V5 V3_center V4_center V5_center
- # <int> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
- # 1 1 1 1 1.11016253 0.02722608 0.3027893 -0.35019463 0.1903187 0.9848824
- # 2 1 1 1 0.70820726 -2.19334026 0.3242983 -0.35019463 0.1903187 0.9848824
- # 3 1 1 2 -0.33493165 -0.07425543 -0.6383053 -0.02633612 0.3241840 -1.0112687
- # 4 1 1 2 -2.03926090 0.24728959 0.1302806 -0.02633612 0.3241840 -1.0112687
- # 5 1 1 2 -0.04744358 0.20338375 -1.3591982 -0.02633612 0.3241840 -1.0112687
- # 6 1 1 2 0.12768265 1.34077790 -1.4590170 -0.02633612 0.3241840 -1.0112687
- # 7 1 1 2 1.08012650 1.95067610 1.3336783 -0.02633612 0.3241840 -1.0112687
- # 8 1 1 2 0.85677265 0.46973309 -0.6577587 -0.02633612 0.3241840 -1.0112687
- # 9 1 1 1 0.90455630 2.44301533 -1.0511750 -0.35019463 0.1903187 0.9848824
- # 10 1 1 2 1.07202152 -0.05310248 -1.4048170 -0.02633612 0.3241840 -1.0112687
- # # ... with 190 more rows
- # data.table
- library(data.table)
- ngrp <- 2L
- numSamples <- 200L
- DT <- data.table(V1 = sample(1L:3L, numSamples, TRUE), V2 = sample(1L:2L, numSamples, TRUE),
- V3 = rnorm(numSamples), V4 = rnorm(numSamples), V5 = rnorm(numSamples))
- DT_kmeans <- DT[ , .(
- oriData = list(data.table(V3, V4, V5)),
- kmeansRes = list(kmeans(data.table(V3, V4, V5) %>>%
- setnames(paste0(names(.), "_center")), ngrp) %>>%
- {data.table(grp = .$cluster, .$centers[.$cluster, ])})),
- by = .(V1, V2)]
- cbind(DT_kmeans[ , .(V1, V2)][rep(1L:nrow(DT_kmeans), sapply(DT_kmeans$oriData, nrow)), ],
- rbindlist(DT_kmeans$oriData),
- rbindlist(DT_kmeans$kmeansRes))
- # V1 V2 V3 V4 V5 grp V3_center V4_center V5_center
- # 1: 1 1 -1.5910524 2.1274208 -1.3464532 1 0.1155321 0.9482175 0.290026606
- # 2: 1 1 0.3280774 -0.1150860 -1.2363502 2 -0.1138527 -1.1174780 -0.603929772
- # 3: 1 1 -0.2026653 -0.9188654 0.4275579 2 -0.1138527 -1.1174780 -0.603929772
- # 4: 1 1 -0.3258952 0.3159080 1.3667256 1 0.1155321 0.9482175 0.290026606
- # 5: 1 1 -0.2819986 -1.2371227 0.6597289 2 -0.1138527 -1.1174780 -0.603929772
- # ---
- # 196: 2 2 0.6875229 -1.4182973 -0.1774791 1 0.5244143 -0.4805122 0.084895890
- # 197: 2 2 1.1732799 -0.7428654 -0.1777401 1 0.5244143 -0.4805122 0.084895890
- # 198: 2 2 -1.1992074 -0.6165810 0.0421549 2 -0.8536335 0.6373769 -0.007181771
- # 199: 2 2 -0.4136762 1.1002398 0.4306150 2 -0.8536335 0.6373769 -0.007181771
- # 200: 2 2 -0.1929361 -0.7346460 -0.4713694 1 0.5244143 -0.4805122 0.084895890
Advertisement
Add Comment
Please, Sign In to add comment