Advertisement
Guest User

Untitled

a guest
Jul 24th, 2017
45
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.51 KB | None | 0 0
  1. #excuse the lazy file loading
  2. library(UpSetR)
  3. setwd("Desktop/Mondsee/Sequencing/Genome_Assemblies_V2/General_Stats/busco/")
  4. O07<-read.table("full_table_OHJ07_BUSCO",fill=TRUE)
  5. O13<-read.table("full_table_OHJ13_BUSCO",fill=TRUE)
  6. O22<-read.table("full_table_OHJ22_BUSCO",fill=TRUE)
  7. O72<-read.table("full_table_OHJ72_BUSCO",fill=TRUE)
  8. O82<-read.table("full_table_OHJ82_BUSCO",fill=TRUE)
  9. O96<-read.table("full_table_OHJ96_BUSCO",fill=TRUE)
  10. O97<-read.table("full_table_OHJ97_BUSCO",fill=TRUE)
  11. O98<-read.table("full_table_OHJ98_BUSCO",fill=TRUE)
  12. O104<-read.table("full_table_OHJ104_BUSCO",fill=TRUE)
  13. O105<-read.table("full_table_OHJ105_BUSCO",fill=TRUE)
  14. It<-read.table("full_table_Italy2_BUSCO",fill=TRUE)
  15. Tisc<-read.table("full_table_TiscarSM28_BUSCO",fill=TRUE)
  16. Tok<-read.table("full_table_Tokyo1_BUSCO",fill=TRUE)
  17. MN<-read.table("full_table_MNCHU008_BUSCO",fill=TRUE)
  18. colnames(O07)<-c("gene","status")
  19. colnames(O13)<-c("gene","status")
  20. colnames(O22)<-c("gene","status")
  21. colnames(O72)<-c("gene","status")
  22. colnames(O82)<-c("gene","status")
  23. colnames(O96)<-c("gene","status")
  24. colnames(O97)<-c("gene","status")
  25. colnames(O98)<-c("gene","status")
  26. colnames(O104)<-c("gene","status")
  27. colnames(O105)<-c("gene","status")
  28. colnames(It)<-c("gene","status")
  29. colnames(Tisc)<-c("gene","status")
  30. colnames(Tok)<-c("gene","status")
  31. colnames(MN)<-c("gene","status")
  32. OHJ07_set<-unique(O07[O07$status == "Complete",]$gene)
  33. OHJ13_set<-unique(O13[O13$status == "Complete",]$gene)
  34. OHJ22_set<-unique(O22[O22$status == "Complete",]$gene)
  35. OHJ72_set<-unique(O72[O72$status == "Complete",]$gene)
  36. OHJ82_set<-unique(O82[O82$status == "Complete",]$gene)
  37. OHJ96_set<-unique(O96[O96$status == "Complete",]$gene)
  38. OHJ97_set<-unique(O97[O97$status == "Complete",]$gene)
  39. OHJ98_set<-unique(O98[O98$status == "Complete",]$gene)
  40. OHJ104_set<-unique(O104[O104$status == "Complete",]$gene)
  41. OHJ105_set<-unique(O105[O105$status == "Complete",]$gene)
  42. Italy_set<-unique(It[It$status == "Complete",]$gene)
  43. Tiscar_set<-unique(Tisc[Tisc$status == "Complete",]$gene)
  44. Tokyo_set<-unique(Tok[Tok$status == "Complete",]$gene)
  45. MNCHU_set<-unique(MN[MN$status == "Complete",]$gene)
  46. genesets<-list(Italy=Italy_set,Tiscar=Tiscar_set,Tokyo=Tokyo_set,MNCHU008=MNCHU_set,OHJ07=OHJ07_set,OHJ13=OHJ13_set,OHJ22=OHJ22_set,OHJ72=OHJ72_set,OHJ82=OHJ82_set,OHJ96=OHJ96_set,OHJ97=OHJ97_set,OHJ98=OHJ98_set,OHJ104=OHJ104_set,OHJ105=OHJ105_set)
  47. clonenames<-c("Italy","Tiscar","Tokyo","OHJ82","OHJ22","OHJ104","OHJ97","OHJ96","OHJ98","OHJ105","OHJ07","OHJ13","OHJ72","MNCHU008")
  48. upset(fromList(genesets),sets=clonenames,order.by="freq",keep.order=TRUE,group.by="sets")
  49.  
  50. #So the plot and stuff worked well
  51. #But now I want to see what the overlaps are, not just how big
  52.  
  53. library(VennDiagram)
  54. calculate.overlap(x=genesets)
  55. #ERROR [2017-07-24 16:16:42] Invalid size of input object
  56. #I wanted to see that the error was because I had too many comparisons
  57. testsets<-list(Italy=Italy_set,Tiscar=Tiscar_set,Tokyo=Tokyo_set)
  58. overlap<-calculate.overlap(x=testsets)
  59. #overlap is produced and is a list of 7 overlaps
  60. #overlap[1] has 177 genes
  61. #Now, I COULD just keep comparing the genomes in smaller groups, but this seems inefficient
  62.  
  63. #I also saw fromList in UpSetR but this gives me a binary matrix with no reference to WHICH genes they are?
  64.  
  65. #So then I thought I'd concatenate all the lists and try and see which ones are there 14 times (ie. in all genomes)
  66. allsets<-cat(Italy_set,Tiscar_set,Tokyo_set,MNCHU_set,OHJ07_set,OHJ13_set,OHJ22_set,OHJ72_set,OHJ82_set,OHJ96_set,OHJ97_set,OHJ98_set,OHJ104_set,OHJ105_set)
  67. #When I do this allsets =NULL and this is in the std out
  68. 29 164 98 131 91 169 100 273 113 184 130 122 46 163 111 35 211 228 298 227 197 266 160 75 53 120 58 230 6 279 21 238 38 299 80 11 162 78 84 9 264 76 127 259 258 297 32 119 172 171 43 150 61 210 292 96 2 267 101 223 224 132 55 68 153 254 294 239 37 97 109 57 45 189 165 152 253 252 168 240 143 260 36 237 244 173 18 77 251 275 233 272 291 300 50 48 293 185 102 16 15 92 212 123 30 276 33 148 170 229 213 192 73 176 190 85 79 141 137 248 19 167 218 3 154 182 222 158 186 287 87 235 88 146 214 232 83 282 134 13 202 209 145 138 269 41 136 26 225 104 178 147 63 203 220 234 157 206 207 10 108 205 49 271 42 20 51 246 250 261 1 180 155 124 174 44 285 128 236 219 268 263 90 142 125 242 107 281 194 115 106 110 116 215 301 241 31 262 199 265 161 89 295 23 99 70 284 290 65 117 28 243 149 288 198 64 195 183 277 221 29 69 41 131 91 169 100 113 184 130 122 247 46 163 35 211 228 298 227 197 266 160 75 53 120 58 27 151 230 6 279 21 290 38 299 11 162 78 84 9 264 127 76 81 259 258 297 36 32 119 206 172 171 15... <truncated>
  69.  
  70. #so now Im stuck
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement