Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(rmr2)
- library(rhdfs)
- setwd('/C:UsersDesktopNew folder' )
- data<-read.csv("datafile.csv")
- #head(data)
- hdfs.init()
- data.values <- to.dfs(data)
- data.map.fn <- function(k,v)
- {
- p <- which((as.numeric(v[,4]) == 1) & ((as.numeric(v[,8]) >= 500) & (as.numeric(v[,8]) <= 565))
- keyval(v[p,],v[p,c(4,8)])
- }
- data.reduce.fn <- function(k,v)
- {
- keyval(k,(unlist(v)))
- }
- # study mapreduce function and pass appropriate inputs and ouputs.
- dataex <- mapreduce(input= data.values ,
- map = data.map.fn,
- reduce = data.reduce.fn)
- totalvar<-from.dfs(dataex)
- o<- unlist(totalvar[2])
- l <- length(o)/2
- totalvar #AT this point return is a NULL
- k <- unlist(totalvar)
- l <- length(k)/2
- y<-matrix(k,nrow=l,ncol=2,byrow=TRUE)
- k <- rbind(as.data.frame(totalvar[2]))
- j <- as.data.frame(k[!duplicated(as.data.frame(k)),])
- y<-matrix(k,nrow=l,ncol=10,byrow=FALSE)
- write.csv(y,'mapreducedfile.csv')
Add Comment
Please, Sign In to add comment