Advertisement
Guest User

Untitled

a guest
Jul 28th, 2014
212
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.63 KB | None | 0 0
  1. Vertebrates
  2. fish
  3. goldfish
  4. clownfish
  5. amphibian
  6. frog
  7. toad
  8. reptiles
  9. snake
  10. lizard
  11. turtle
  12. tortoise
  13. birds
  14. sparrow
  15. crow
  16. parrot
  17. mammals
  18. dog
  19. cat
  20. horse
  21. whale
  22.  
  23. Vertebrates
  24. fish
  25. goldfish
  26. clownfish
  27. amphibian
  28. frog
  29. toad
  30. reptiles
  31. snake
  32. lizard
  33. turtle
  34. tortoise
  35. birds
  36. sparrow
  37. house
  38. factory
  39. crow
  40. parrot
  41. crane
  42. mammals
  43. dog
  44. cat
  45. horse
  46. whale
  47.  
  48. dat = structure(list(V1 = c("Vertebrates", NA, NA, NA, NA, NA, NA,
  49. NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
  50. NA), V2 = c(NA, "fish", NA, NA, "amphibian", NA, NA, "reptiles",
  51. NA, NA, NA, NA, "birds", NA, NA, NA, NA, NA, NA, "mammals", NA,
  52. NA, NA, NA), V3 = c(NA, NA, "goldfish", "clownfish", NA, "frog",
  53. "toad", NA, "snake", "lizard", "turtle", "tortoise", NA, "sparrow",
  54. NA, NA, "crow", "parrot", "crane", NA, "dog", "cat", "horse",
  55. "whale"), V4 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
  56. NA, NA, NA, "house", "factory", NA, NA, NA, NA, NA, NA, NA, NA
  57. )), .Names = c("V1", "V2", "V3", "V4"), class = "data.frame", row.names = c(NA,
  58. -24L))
  59. >
  60.  
  61. library(zoo)
  62. library(igraph)
  63.  
  64. # read tab delimited data - keep structure by setting "" to missing
  65. # (would of been great if you had given this in a format easier to use)
  66.  
  67. dat <- read.table("test.txt", sep="t", header=FALSE, fill=TRUE,
  68. na.strings="", strip.white=TRUE, stringsAsFactors=FALSE)
  69.  
  70. head(dat, 7)
  71. # V1 V2 V3
  72. #1 Vertebrates <NA> <NA>
  73. #2 <NA> fish <NA>
  74. #3 <NA> <NA> goldfish
  75. #4 <NA> <NA> clownfish
  76. #5 <NA> amphibian <NA>
  77. #6 <NA> <NA> frog
  78. #7 <NA> <NA> toad
  79.  
  80. # carry forward the last value in first two columns to impute missing
  81. dat[1:2] <- sapply(dat[1:2], na.locf, na.rm=FALSE)
  82. dat <- na.omit(dat)
  83.  
  84. # get edges for graph - we want two columns (from and to) for each edges
  85. edges <- rbind(dat[1:2],setNames(dat[2:3],names(dat[1:2])))
  86.  
  87. # create graph
  88. g <- graph.data.frame(edges)
  89.  
  90. # Plot graph
  91. E(g)$curved <- 0
  92. plot.igraph(g, vertex.size=0, edge.arrow.size=0 ,
  93. layout=-layout.reingold.tilford(g)[,2:1])
  94.  
  95. dat <- structure(list(V1 = c("Vertebrates", NA, NA, NA, NA, NA, NA,
  96. NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), V2 = c(NA,
  97. "fish", NA, NA, "amphibian", NA, NA, "reptiles", NA, NA, NA,
  98. NA, "birds", NA, NA, NA, "mammals", NA, NA, NA, NA), V3 = c(NA,
  99. NA, "goldfish", "clownfish", NA, "frog", "toad", NA, "snake",
  100. "lizard", "turtle", "tortoise", NA, "sparrow", "crow", "parrot",
  101. NA, "dog", "cat", "horse", "whale")), .Names = c("V1", "V2",
  102. "V3"), class = "data.frame", row.names = c(NA, -21L))
  103.  
  104. # To prepare the data
  105.  
  106. # carry forward the last value in columns if lower level (col to the right)
  107. # is non-missing
  108. dat2[1] <- na.locf(dat2[1], na.rm=FALSE)
  109.  
  110. for(i in ncol(dat2):2) {
  111. dat2[[i-1]] <- ifelse(!is.na(dat2[[i]]), na.locf(dat2[[i-1]], na.rm=F),
  112. dat2[[i-1]])
  113. }
  114.  
  115. # get edges for graph
  116. edges <- rbind(na.omit(dat2[1:2]),
  117. do.call('rbind',
  118. lapply(1:(ncol(dat2)-2), function(i)
  119. na.omit(setNames(dat2[(1+i):(2+i)],
  120. names(dat2[1:2])))))
  121. )
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement