Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- rm(list=ls())
- data <- read.delim(file.choose(),header=F)
- # Now using strsplit function to create a regular dataser
- temp <- list()
- for(i in 1:length(data$V1)){
- temp[i] <- strsplit(as.character(data$V1[i]),c(" "))
- }
- response <- list()
- for(i in 1:length(temp)){
- response[[i]] <- as.numeric(strsplit(temp[[i]][1],",")[[1]])
- }
- # Now working for responses
- l.response <- 0
- for (i in 1:length(response)){
- l.response[i] <- length(response[[i]])
- }
- col.names <- paste(rep("R",22),1:22,sep="")
- l.r <- length(temp)
- df.response <- data.frame(R1=rep(0,l.r),R2=rep(0,l.r),R3=rep(0,l.r),R4=rep(0,l.r),R5=rep(0,l.r)
- ,R6=rep(0,l.r),R7=rep(0,l.r),R8=rep(0,l.r),R9=rep(0,l.r),R10=rep(0,l.r)
- ,R11=rep(0,l.r),R12=rep(0,l.r),R13=rep(0,l.r),R14=rep(0,l.r),R15=rep(0,l.r)
- ,R16=rep(0,l.r),R17=rep(0,l.r),R18=rep(0,l.r),R19=rep(0,l.r),R20=rep(0,l.r)
- ,R21=rep(0,l.r),R22=rep(0,l.r))
- for(i in 1:length(response)){
- df.response[i,(response[[i]]+1)] <- 1
- }
- feature <- c(0)
- value <- c(0)
- v.l <- 21519
- v.list <- list()
- list.name <- paste(rep("V",v.l),1:v.l,sep="")
- f.vec <- 0
- v.vec <- 0
- for(i in 1:length(temp)){
- for(j in 2:length(temp[[i]])){
- f.vec[j-1] <- as.numeric(strsplit(temp[[i]][j],":")[[1]])[1]
- v.vec[j-1] <- as.numeric(strsplit(temp[[i]][j],":")[[1]])[2]
- }
- v.list[[i]] <- data.frame(f.vec,v.vec)
- }
- feature.name <- paste(rep("V",30438),1:30438,sep="")
- v.l <- 21519
- variables <- data.frame(temp = rep(0,v.l))
- for(i in 1:length(feature.name)){
- variables[,feature.name[i]] <- rep(0,v.l)
- }
- variables <- variables[,-1]
- copy.variables <- variables
- for(i in 1:100){
- pos <- v.list[[i]][,"f.vec"]
- replace <- v.list[[i]][,"v.vec"]
- if(length(unique(pos))!=length(pos)){
- repeat{
- uni <- as.numeric(attr(which(table(pos)>1), "names"))
- for(k in 1:length(uni)){
- t.pos <- which(pos==uni[k])
- pos <- pos[-t.pos[1]]
- replace <- replace[-t.pos[1]]
- }
- if(length(unique(pos))==length(pos)) break
- }
- }
- variables[i,pos]<- replace
- }
- dim(df.response)
- dim(variables)
- final.data <- cbind(variables[1:100,],df.response[1:100,])
Add Comment
Please, Sign In to add comment