Advertisement
BenjaminLind

ReadMarvelCharacterNets.R

May 11th, 2014
420
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 5.73 KB | None | 0 0
  1. # The Marvel Universe Social Network
  2. # Dataset created by Cesc Rosselló, Ricardo Alberich, and Joe Miro
  3. # The network data is bipartite. There are characters and the books they appear within.
  4. # Information on the data: http://bioinfo.uib.es/~joemiro/marvel.html
  5. # Primary source: http://www.chronologyproject.com/
  6.  
  7. library(igraph); library(Rcpp); library(compiler)
  8. marvnet <- read.graph("http://bioinfo.uib.es/~joemiro/marvel/porgat.txt", format = "pajek")
  9.  
  10. # Count the number of characters and books
  11. n.books <- sum(get.vertex.attribute(marvnet, "type"))
  12. n.chars <- sum(get.vertex.attribute(marvnet, "type")==FALSE)
  13.  
  14. # How many books are each character in?
  15. V(marvnet)$n.books <- rep(NA, vcount(marvnet))
  16. V(marvnet)$n.books[which(V(marvnet)$type==FALSE)] <- degree(marvnet, which(V(marvnet)$type==FALSE))
  17.  
  18. # Create a one-mode projection for relationships between characters
  19. cppFunction('IntegerMatrix all_pairs_Cpp(IntegerVector y){
  20.    int ylen = y.size();
  21.    int outrows = ylen * (ylen-1) / 2;
  22.    IntegerMatrix out(outrows, 2);
  23.    int outrowcounter = 0;
  24.    for(int i = 0; i < (ylen - 1); ++i){        
  25.        for(int j = (i+1); j < ylen; ++j){
  26.            out(outrowcounter, 0) = y[i];
  27.            out(outrowcounter, 1) = y[j];
  28.            outrowcounter++;
  29.            }
  30.        }
  31.    return out;
  32.    }')
  33. all_pairs <- function(x){
  34.     a <- 1:length(x)
  35.     outmat <- all_pairs_Cpp(a)
  36.     outmat <- data.frame(outmat)
  37.     outmat[,1] <- x[outmat[,1]]
  38.     outmat[,2] <- x[outmat[,2]]
  39.     return(outmat)
  40.     }
  41. all_pairs <- cmpfun(all_pairs)
  42. charel <- all_pairs(V(marvnet)$id[which(V(marvnet)$type==FALSE)])
  43. charel <- data.frame(From = charel[, 1], To = charel[, 2], stringsAsFactors=FALSE)
  44. charel <- charel[-which(charel$From == charel$To),]
  45. rm(all_pairs, all_pairs_Cpp)
  46.  
  47. # How many interactions would we expect by chance?
  48. cppFunction('NumericVector expected_interaction(IntegerVector x, IntegerVector y, int totalinteraction){
  49.    int n = x.size();
  50.    NumericVector out(n);
  51.    for(int i = 0; i < n; ++i){
  52.        out[i] = x[i] * y[i] / totalinteraction;
  53.        }
  54.    return out;
  55.    }')
  56. charel[,"Prob"] <- expected_interaction(V(marvnet)$n.books[match(charel$From, V(marvnet)$id)], V(marvnet)$n.books[match(charel$To, V(marvnet)$id)], n.books)
  57. rm(expected_interaction)
  58.  
  59. # How many interactions do we observe?
  60. marvneigh <- neighborhood(marvnet, order = 1, nodes = which(V(marvnet)$type==FALSE))
  61. marvneigh <- lapply(marvneigh, function(x) return(as.integer(x[-1])))
  62. names(marvneigh) <- V(marvnet)$id[which(V(marvnet)$type==FALSE)]
  63. marvneighsize <- sapply(marvneigh, length)
  64.  
  65. charel[,"FromInt"] <- match(charel$From, names(marvneigh))
  66. charel[,"ToInt"] <- match(charel$To, names(marvneigh))
  67. cppFunction('IntegerVector n_common_elements(IntegerVector ind1, IntegerVector ind2, List masterlist){
  68.    int n = ind1.size();
  69.    IntegerVector out(n);
  70.    for(int i = 0; i < n; ++i){
  71.        int aind = ind1[i]-1;
  72.        int bind = ind2[i]-1;
  73.        IntegerVector a = masterlist[aind];
  74.        IntegerVector b = masterlist[bind];
  75.        int countcommon = 0;
  76.        for(int j = 0; j < a.size(); ++j){
  77.            if(is_true(any(b == a[j]))){
  78.                countcommon++;
  79.                }
  80.            }        
  81.        out[i] = countcommon;
  82.        }
  83.    return out;
  84.    }')
  85. # This line might take a couple minutes.
  86. charel[,"Obs"] <- n_common_elements(charel$FromInt, charel$ToInt, marvneigh)
  87. rm(marvneigh, marvneighsize, n_common_elements) # Delete unnecessary objects
  88. charel <- charel[, -c(4, 5)] # Drop unnecessary columns
  89.  
  90. cppFunction('NumericVector z_score_fun(IntegerVector obs, NumericVector expected){
  91.    int n = obs.size();
  92.    NumericVector out(n);
  93.    double avgexp = mean(expected);
  94.    double sdobs;
  95.    NumericVector obsminusexpsq(n);
  96.    for(int i = 0; i < n; ++i){
  97.        obsminusexpsq[i] = pow(obs[i] - avgexp, 2);
  98.        }
  99.    sdobs = sqrt(sum(obsminusexpsq) / n);
  100.    for(int j = 0; j < n; ++j){
  101.        out[j] = (obs[j] - expected[j]) / sdobs;
  102.        }
  103.    return out;
  104.    }')
  105.  
  106. charel[, "ObsZ"] <- z_score_fun(charel$Obs, charel$Prob)
  107. rm(z_score_fun)
  108.  
  109. cppFunction('NumericVector log_transform(NumericVector x){
  110.    int n = x.size();
  111.    NumericVector out(n);
  112.    for(int i = 0; i < n; ++i){
  113.        if(x[i] == 0){
  114.            out[i] = 0;
  115.            }
  116.        if(x[i] > 0){
  117.            out[i] = log(x[i]);
  118.            }
  119.        if(x[i] < 0){
  120.            out[i] = (-1) * log(abs(x[i]));
  121.            }
  122.        }
  123.    return out;
  124.    }')
  125.  
  126. alpha = .00001; criticalvalue <- qnorm(alpha, lower.tail = FALSE)
  127. charel$ObsZlog <- log_transform(charel$ObsZ)
  128. charel[, "Keep"] <- (abs(charel$ObsZ) >= criticalvalue) & (abs(charel$ObsZlog) >= criticalvalue)
  129. rm(alpha, criticalvalue, log_transform)
  130. charelbb <- charel[which(charel$Keep == TRUE), ]
  131. charelbb <- charelbb[, c("From", "To", "ObsZlog")]
  132. charelbb <- graph.data.frame(charelbb, directed = FALSE)
  133. charelbb <- simplify(charelbb)
  134. cleanupnames <- function(y){
  135.     y <- strsplit(y, "/", fixed = TRUE)[[1]][1]
  136.     y <- strsplit(y, " [", fixed = TRUE)[[1]][1]
  137.     y <- gsub(".", "", y, fixed = TRUE)
  138.     ylen <- nchar(y)
  139.     lastchar <- substr(y, ylen, ylen)
  140.     y <- ifelse(lastchar == " ", substr(y, 1, ylen - 1), y)
  141.     return(y)
  142.     }
  143. cleanupnames <- cmpfun(cleanupnames)
  144. V(charelbb)$name <- sapply(1:vcount(charelbb), function(i) return(cleanupnames(V(charelbb)$name[i])))
  145. rm(cleanupnames)
  146. lo.charelbb <- layout.fruchterman.reingold(charelbb, params = list(niter = 5000, area = vcount(charelbb)^3))
  147. plot(charelbb, vertex.size = 2 + 3 * betweenness(charelbb)/900, vertex.label.cex = .5, layout = lo.charelbb, vertex.label.family = "sans", vertex.color = "#F297A0", vertex.frame.color = "#6F95A2", vertex.label.color = "#000000", edge.color = "#B6BFBE")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement