Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #### libraries ----
- # install.packages("intergraph")
- library(dplyr)
- library(purrr)
- library(furrr)
- library(GGally)
- library(igraph)
- library(stringdist)
- library(tidyr) # for expand_grid
- #### environment parameters ----
- plan(multiprocess)
- #### helper function(s) ----
- similar_strings <- function(df, x, y = NULL, dist = 0.7, ...) {
- x <- enquo(x)
- y <- enquo(y)
- if (quo_name(y) == "NULL")
- y <- x
- dat <- expand_grid(x = df[[quo_name(x)]],
- y = df[[quo_name(y)]]) %>%
- mutate(string_dist = pmap_dbl(., ~stringsim(.x, .y))) %>%
- filter(string_dist < 1, string_dist >= dist)
- out <- list()
- out$distances <- dat
- out$graph <- dat %>%
- select(x, y) %>%
- graph_from_data_frame()
- out$networks <- decompose.graph(out$graph)
- out$similar_strings <- lapply(out$networks, function(x) V(x)$name)
- out
- }
- #### load data ----
- data(mtcars)
- dat_mtcars <- as_tibble(mtcars) %>%
- mutate(models = rownames(mtcars))
- #### Calculate string distance(s) & group into networks ----
- dat <- similar_strings(dat_mtcars, models, dist = 0.6)
- # list of similar strings
- dat$similar_strings
- # plot full graph
- ggnet2(dat$graph, label = TRUE, layout.exp = 1.1)
- # plot networks individually
- lapply(dat$networks, ggnet2, label = TRUE, layout.exp = 1.1)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement