Advertisement
Guest User

Untitled

a guest
Mar 29th, 2017
55
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.83 KB | None | 0 0
  1. library(reshape2)
  2. hasDT <- library(data.table, logical.return=TRUE) # load package, if not installed return FALSE
  3.  
  4. # ==================
  5. # = Make Fake Data =
  6. # ==================
  7. # ---- Options affecting size of 'big' ----
  8. nScaff <- 50 # number of scaffolds
  9. nIndex <- 100
  10.  
  11. # ---- Scaffold is combination of letters and numbers; a character ----
  12. sL <- function() sample(LETTERS, 3, replace=TRUE)
  13. sN <- function() sample(1:9, 5, replace=TRUE)
  14. scaffs <- replicate(nScaff, paste0(c(sL(),sN()),collapse=""))
  15.  
  16. # ---- index is the "word" and is an integer ----
  17. index <- 1:nIndex
  18.  
  19. # ---- value is the coverage or some measured value of interest; is an integer ----
  20. value <- sample(1:4, length(scaffs)*length(index), replace=TRUE)
  21.  
  22. # ---- combine these elements into a data.table / data.frame ----
  23. # note that data.table is a lot faster, though not noticable for small stuff
  24. if(hasDT){ # if you have data.table installed, use it
  25. big <- data.table::CJ(scaffold=scaffs,index=index)[,value:=value]
  26. }else{ # otherwise just use data.frame
  27. big <- cbind(expand.grid(scaffold=scaffs, index=index), value=value)
  28. }
  29.  
  30. # ===============================
  31. # = 'Cast' into a matrix/ array =
  32. # ===============================
  33. # change object dimensions
  34. smaller <- reshape2::acast(data=big, formula=index~scaffold)
  35.  
  36.  
  37. # ====================================
  38. # = Compare object sizes in R memory =
  39. # ====================================
  40. bigMem <- print(object.size(big), units='Kb')
  41. smallerMem <- print(object.size(smaller), units='Kb')
  42.  
  43.  
  44. # ============================================
  45. # = Compare object sizes saved as text files =
  46. # ============================================
  47. # ---- names ----
  48. bTxt <- "~/Desktop/big.txt"
  49. sTxt <- "~/Desktop/smaller.txt"
  50.  
  51. # ---- do big ----
  52. write.table(big, file=bTxt, sep="\t") # write
  53. bigTxt <- file.size(bTxt) # size
  54. file.remove(bTxt) # remove
  55.  
  56. # ---- do small ----
  57. write.table(smaller, file=sTxt, sep="\t") # write
  58. smallerTxt <- file.size(sTxt) # size
  59. file.remove(sTxt) # remove
  60.  
  61. # ==============================================
  62. # = Compare object sizes saved as .RData files =
  63. # ==============================================
  64. # ---- names ----
  65. bRD <- "~/Desktop/big.RData"
  66. sRD <- "~/Desktop/smaller.RData"
  67.  
  68. # ---- do big ----
  69. save(big, file=bRD) # write
  70. bigRD <- file.size(bRD) # size
  71. file.remove(bRD) # remove
  72.  
  73. # ---- do small ----
  74. save(smaller, file=sRD) # write
  75. smallerRD <- file.size(sRD) # size
  76. file.remove(sRD) # remove
  77.  
  78. # ====================
  79. # = Comparison Table =
  80. # ====================
  81. sizes <- c(bigMem, smallerMem, bigTxt, smallerTxt, bigRD, smallerRD)
  82. rowN <- c("big (original)","small (matrix)")
  83. colN <- c("in R memory","as .txt file","as .RData file")
  84. sumMat <- matrix(sizes, nrow=2, ncol=3, dimnames=list(rowN,colN))/1E3 # in KB
  85. smallPercent <- apply(sumMat, 2, function(x)x[2]/x[1]) # size of smaller as a percent of big
  86. (sumMat2 <- rbind(sumMat, smallPercent=smallPercent))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement