Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # The Marvel Universe Social Network
- # Dataset created by Cesc Rosselló, Ricardo Alberich, and Joe Miro
- # The network data is bipartite. There are characters and the books they appear within.
- # Information on the data: http://bioinfo.uib.es/~joemiro/marvel.html
- # Primary source: http://www.chronologyproject.com/
- library(igraph); library(Rcpp); library(compiler)
- marvnet <- read.graph("http://bioinfo.uib.es/~joemiro/marvel/porgat.txt", format = "pajek")
- # Count the number of characters and books
- n.books <- sum(get.vertex.attribute(marvnet, "type"))
- n.chars <- sum(get.vertex.attribute(marvnet, "type")==FALSE)
- # How many books are each character in?
- V(marvnet)$n.books <- rep(NA, vcount(marvnet))
- V(marvnet)$n.books[which(V(marvnet)$type==FALSE)] <- degree(marvnet, which(V(marvnet)$type==FALSE))
- # Create a one-mode projection for relationships between characters
- cppFunction('IntegerMatrix all_pairs_Cpp(IntegerVector y){
- int ylen = y.size();
- int outrows = ylen * (ylen-1) / 2;
- IntegerMatrix out(outrows, 2);
- int outrowcounter = 0;
- for(int i = 0; i < (ylen - 1); ++i){
- for(int j = (i+1); j < ylen; ++j){
- out(outrowcounter, 0) = y[i];
- out(outrowcounter, 1) = y[j];
- outrowcounter++;
- }
- }
- return out;
- }')
- all_pairs <- function(x){
- a <- 1:length(x)
- outmat <- all_pairs_Cpp(a)
- outmat <- data.frame(outmat)
- outmat[,1] <- x[outmat[,1]]
- outmat[,2] <- x[outmat[,2]]
- return(outmat)
- }
- all_pairs <- cmpfun(all_pairs)
- charel <- all_pairs(V(marvnet)$id[which(V(marvnet)$type==FALSE)])
- charel <- data.frame(From = charel[, 1], To = charel[, 2], stringsAsFactors=FALSE)
- charel <- charel[-which(charel$From == charel$To),]
- rm(all_pairs, all_pairs_Cpp)
- # How many interactions would we expect by chance?
- cppFunction('NumericVector expected_interaction(IntegerVector x, IntegerVector y, int totalinteraction){
- int n = x.size();
- NumericVector out(n);
- for(int i = 0; i < n; ++i){
- out[i] = x[i] * y[i] / totalinteraction;
- }
- return out;
- }')
- charel[,"Prob"] <- expected_interaction(V(marvnet)$n.books[match(charel$From, V(marvnet)$id)], V(marvnet)$n.books[match(charel$To, V(marvnet)$id)], n.books)
- rm(expected_interaction)
- # How many interactions do we observe?
- marvneigh <- neighborhood(marvnet, order = 1, nodes = which(V(marvnet)$type==FALSE))
- marvneigh <- lapply(marvneigh, function(x) return(as.integer(x[-1])))
- names(marvneigh) <- V(marvnet)$id[which(V(marvnet)$type==FALSE)]
- marvneighsize <- sapply(marvneigh, length)
- charel[,"FromInt"] <- match(charel$From, names(marvneigh))
- charel[,"ToInt"] <- match(charel$To, names(marvneigh))
- cppFunction('IntegerVector n_common_elements(IntegerVector ind1, IntegerVector ind2, List masterlist){
- int n = ind1.size();
- IntegerVector out(n);
- for(int i = 0; i < n; ++i){
- int aind = ind1[i]-1;
- int bind = ind2[i]-1;
- IntegerVector a = masterlist[aind];
- IntegerVector b = masterlist[bind];
- int countcommon = 0;
- for(int j = 0; j < a.size(); ++j){
- if(is_true(any(b == a[j]))){
- countcommon++;
- }
- }
- out[i] = countcommon;
- }
- return out;
- }')
- # This line might take a couple minutes.
- charel[,"Obs"] <- n_common_elements(charel$FromInt, charel$ToInt, marvneigh)
- rm(marvneigh, marvneighsize, n_common_elements) # Delete unnecessary objects
- charel <- charel[, -c(4, 5)] # Drop unnecessary columns
- cppFunction('NumericVector z_score_fun(IntegerVector obs, NumericVector expected){
- int n = obs.size();
- NumericVector out(n);
- double avgexp = mean(expected);
- double sdobs;
- NumericVector obsminusexpsq(n);
- for(int i = 0; i < n; ++i){
- obsminusexpsq[i] = pow(obs[i] - avgexp, 2);
- }
- sdobs = sqrt(sum(obsminusexpsq) / n);
- for(int j = 0; j < n; ++j){
- out[j] = (obs[j] - expected[j]) / sdobs;
- }
- return out;
- }')
- charel[, "ObsZ"] <- z_score_fun(charel$Obs, charel$Prob)
- rm(z_score_fun)
- cppFunction('NumericVector log_transform(NumericVector x){
- int n = x.size();
- NumericVector out(n);
- for(int i = 0; i < n; ++i){
- if(x[i] == 0){
- out[i] = 0;
- }
- if(x[i] > 0){
- out[i] = log(x[i]);
- }
- if(x[i] < 0){
- out[i] = (-1) * log(abs(x[i]));
- }
- }
- return out;
- }')
- alpha = .00001; criticalvalue <- qnorm(alpha, lower.tail = FALSE)
- charel$ObsZlog <- log_transform(charel$ObsZ)
- charel[, "Keep"] <- (abs(charel$ObsZ) >= criticalvalue) & (abs(charel$ObsZlog) >= criticalvalue)
- rm(alpha, criticalvalue, log_transform)
- charelbb <- charel[which(charel$Keep == TRUE), ]
- charelbb <- charelbb[, c("From", "To", "ObsZlog")]
- charelbb <- graph.data.frame(charelbb, directed = FALSE)
- charelbb <- simplify(charelbb)
- cleanupnames <- function(y){
- y <- strsplit(y, "/", fixed = TRUE)[[1]][1]
- y <- strsplit(y, " [", fixed = TRUE)[[1]][1]
- y <- gsub(".", "", y, fixed = TRUE)
- ylen <- nchar(y)
- lastchar <- substr(y, ylen, ylen)
- y <- ifelse(lastchar == " ", substr(y, 1, ylen - 1), y)
- return(y)
- }
- cleanupnames <- cmpfun(cleanupnames)
- V(charelbb)$name <- sapply(1:vcount(charelbb), function(i) return(cleanupnames(V(charelbb)$name[i])))
- rm(cleanupnames)
- lo.charelbb <- layout.fruchterman.reingold(charelbb, params = list(niter = 5000, area = vcount(charelbb)^3))
- plot(charelbb, vertex.size = 2 + 3 * betweenness(charelbb)/900, vertex.label.cex = .5, layout = lo.charelbb, vertex.label.family = "sans", vertex.color = "#F297A0", vertex.frame.color = "#6F95A2", vertex.label.color = "#000000", edge.color = "#B6BFBE")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement