Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(reshape2)
- hasDT <- library(data.table, logical.return=TRUE) # load package, if not installed return FALSE
- # ==================
- # = Make Fake Data =
- # ==================
- # ---- Options affecting size of 'big' ----
- nScaff <- 50 # number of scaffolds
- nIndex <- 100
- # ---- Scaffold is combination of letters and numbers; a character ----
- sL <- function() sample(LETTERS, 3, replace=TRUE)
- sN <- function() sample(1:9, 5, replace=TRUE)
- scaffs <- replicate(nScaff, paste0(c(sL(),sN()),collapse=""))
- # ---- index is the "word" and is an integer ----
- index <- 1:nIndex
- # ---- value is the coverage or some measured value of interest; is an integer ----
- value <- sample(1:4, length(scaffs)*length(index), replace=TRUE)
- # ---- combine these elements into a data.table / data.frame ----
- # note that data.table is a lot faster, though not noticable for small stuff
- if(hasDT){ # if you have data.table installed, use it
- big <- data.table::CJ(scaffold=scaffs,index=index)[,value:=value]
- }else{ # otherwise just use data.frame
- big <- cbind(expand.grid(scaffold=scaffs, index=index), value=value)
- }
- # ===============================
- # = 'Cast' into a matrix/ array =
- # ===============================
- # change object dimensions
- smaller <- reshape2::acast(data=big, formula=index~scaffold)
- # ====================================
- # = Compare object sizes in R memory =
- # ====================================
- bigMem <- print(object.size(big), units='Kb')
- smallerMem <- print(object.size(smaller), units='Kb')
- # ============================================
- # = Compare object sizes saved as text files =
- # ============================================
- # ---- names ----
- bTxt <- "~/Desktop/big.txt"
- sTxt <- "~/Desktop/smaller.txt"
- # ---- do big ----
- write.table(big, file=bTxt, sep="\t") # write
- bigTxt <- file.size(bTxt) # size
- file.remove(bTxt) # remove
- # ---- do small ----
- write.table(smaller, file=sTxt, sep="\t") # write
- smallerTxt <- file.size(sTxt) # size
- file.remove(sTxt) # remove
- # ==============================================
- # = Compare object sizes saved as .RData files =
- # ==============================================
- # ---- names ----
- bRD <- "~/Desktop/big.RData"
- sRD <- "~/Desktop/smaller.RData"
- # ---- do big ----
- save(big, file=bRD) # write
- bigRD <- file.size(bRD) # size
- file.remove(bRD) # remove
- # ---- do small ----
- save(smaller, file=sRD) # write
- smallerRD <- file.size(sRD) # size
- file.remove(sRD) # remove
- # ====================
- # = Comparison Table =
- # ====================
- sizes <- c(bigMem, smallerMem, bigTxt, smallerTxt, bigRD, smallerRD)
- rowN <- c("big (original)","small (matrix)")
- colN <- c("in R memory","as .txt file","as .RData file")
- sumMat <- matrix(sizes, nrow=2, ncol=3, dimnames=list(rowN,colN))/1E3 # in KB
- smallPercent <- apply(sumMat, 2, function(x)x[2]/x[1]) # size of smaller as a percent of big
- (sumMat2 <- rbind(sumMat, smallPercent=smallPercent))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement