daily pastebin goal
40%
SHARE
TWEET

Untitled

a guest Apr 16th, 2018 48 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. rm(list=ls())
  2.  
  3. data <- read.delim(file.choose(),header=F)
  4.  
  5. # Now using strsplit function to create a regular dataser
  6.  
  7. temp <- list()
  8.  
  9. for(i in 1:length(data$V1)){
  10. temp[i] <- strsplit(as.character(data$V1[i]),c(" "))
  11. }
  12.  
  13. response <- list()
  14.  
  15. for(i in 1:length(temp)){
  16. response[[i]] <- as.numeric(strsplit(temp[[i]][1],",")[[1]])
  17. }
  18.  
  19. # Now working for responses
  20. l.response <- 0
  21.  
  22. for (i in 1:length(response)){
  23. l.response[i] <- length(response[[i]])
  24. }
  25.  
  26. col.names <- paste(rep("R",22),1:22,sep="")
  27.  
  28.  
  29.  
  30. l.r <- length(temp)
  31.  
  32. df.response <- data.frame(R1=rep(0,l.r),R2=rep(0,l.r),R3=rep(0,l.r),R4=rep(0,l.r),R5=rep(0,l.r)
  33.                          ,R6=rep(0,l.r),R7=rep(0,l.r),R8=rep(0,l.r),R9=rep(0,l.r),R10=rep(0,l.r)
  34.                          ,R11=rep(0,l.r),R12=rep(0,l.r),R13=rep(0,l.r),R14=rep(0,l.r),R15=rep(0,l.r)
  35.                          ,R16=rep(0,l.r),R17=rep(0,l.r),R18=rep(0,l.r),R19=rep(0,l.r),R20=rep(0,l.r)
  36.                          ,R21=rep(0,l.r),R22=rep(0,l.r))
  37.  
  38.  
  39.  
  40. for(i in 1:length(response)){
  41. df.response[i,(response[[i]]+1)] <- 1
  42. }
  43.  
  44. feature <- c(0)
  45. value <- c(0)
  46.  
  47. v.l <- 21519
  48.  
  49. v.list <- list()
  50. list.name <- paste(rep("V",v.l),1:v.l,sep="")
  51.  
  52. f.vec <- 0
  53. v.vec <- 0
  54.  
  55. for(i in 1:length(temp)){
  56. for(j in 2:length(temp[[i]])){
  57.  
  58. f.vec[j-1] <- as.numeric(strsplit(temp[[i]][j],":")[[1]])[1]
  59. v.vec[j-1] <- as.numeric(strsplit(temp[[i]][j],":")[[1]])[2]
  60.  
  61. }
  62.  
  63. v.list[[i]] <- data.frame(f.vec,v.vec)
  64.  
  65. }
  66.  
  67. feature.name <- paste(rep("V",30438),1:30438,sep="")
  68.  
  69. v.l <- 21519
  70.  
  71. variables <- data.frame(temp = rep(0,v.l))
  72.  
  73. for(i in 1:length(feature.name)){
  74.  
  75. variables[,feature.name[i]] <- rep(0,v.l)
  76.  
  77. }
  78.  
  79.  
  80. variables <- variables[,-1]
  81.  
  82. copy.variables <- variables
  83.  
  84. for(i in 1:100){
  85.  
  86. pos <- v.list[[i]][,"f.vec"]
  87. replace <- v.list[[i]][,"v.vec"]
  88.  
  89. if(length(unique(pos))!=length(pos)){
  90. repeat{
  91.  
  92. uni <- as.numeric(attr(which(table(pos)>1), "names"))
  93.  
  94. for(k in 1:length(uni)){
  95.  
  96. t.pos <- which(pos==uni[k])
  97.  
  98. pos <- pos[-t.pos[1]]
  99.  
  100. replace <- replace[-t.pos[1]]
  101. }
  102.  
  103. if(length(unique(pos))==length(pos)) break
  104. }
  105. }
  106. variables[i,pos]<- replace
  107.  
  108.  
  109. }
  110.  
  111.  
  112. dim(df.response)
  113. dim(variables)
  114.    
  115. final.data <- cbind(variables[1:100,],df.response[1:100,])
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top