Untitled

#!/usr/local/hawq/ext/R-2.13.0-1/bin/Rscript
trimWhiteSpace <- function(line) gsub("(^ +)|( +$)", "", line)
splitIntoWords <- function(line) unlist(strsplit(line, "[[:space:]]+"))

con <- file("stdin", open = "r")
while (length(line <- readLines(con, n = 1, warn = FALSE)) > 0) {
line <- trimWhiteSpace(line)
words <- splitIntoWords(line)
## **** can be done as cat(paste(words, "t1n", sep=""), sep="")
for (w in words)
    cat(w, "t1n", sep="")
}
close(con)

#!/usr/local/hawq/ext/R-2.13.0-1/bin/Rscript
trimWhiteSpace <- function(line) gsub("(^ +)|( +$)", "", line)
splitLine <- function(line) {
val <- unlist(strsplit(line, "t"))
list(word = val[1], count = as.integer(val[2]))
}
env <- new.env(hash = TRUE)
con <- file("stdin", open = "r")
while (length(line <- readLines(con, n = 1, warn = FALSE)) > 0) {
line <- trimWhiteSpace(line)
split <- splitLine(line)
word <- split$word
count <- split$count
if (exists(word, envir = env, inherits = FALSE)) {
    oldcount <- get(word, envir = env)
    assign(word, oldcount + count, envir = env)
}
else assign(word, count, envir = env)
}
close(con)
for (w in ls(env, all = TRUE))
cat(w, "t", get(w, envir = env), "n", sep = "")

[gpadmin@hdw3 wordcount]$ cat data
foo foo quux labs foo bar quux
[gpadmin@hdw3 wordcount]$ cat data | Rscript mapper.R
foo 1
foo 1
quux    1
labs    1
foo 1
bar 1
quux    1
[gpadmin@hdw3 wordcount]$ cat data | Rscript mapper.R  | Rscript reducer.R
bar 1
foo 3
labs    1
quux    2

[gpadmin@hdw3 wordcount]$ hadoop jar /usr/lib/gphd/hadoop-mapreduce/hadoop-streaming-2.2.0-gphd-3.0.1.0.jar -D mapreduce.reduce.tasks=0 -file "mapper.R" -mapper "mapper.R" -file "reducer.R" -reducer "reducer.R" -input "/tmp/dummy/input/data" -output "/tmp/dummy/output"

[gpadmin@hdw3 wordcount]$ hadoop jar /usr/lib/gphd/hadoop-mapreduce/hadoop-streaming-2.2.0-gphd-3.0.1.0.jar -D mapreduce.reduce.tasks=0 -file "mapper.R" -mapper "mapper.R" -input "/tmp/dummy/input/data" -output "/tmp/dummy/output"
.....
.....
14/07/23 00:15:46 INFO mapreduce.Job: Job job_1406094762596_0012 running in uber mode : false
14/07/23 00:15:46 INFO mapreduce.Job:  map 0% reduce 0%
14/07/23 00:16:05 INFO mapreduce.Job: Task Id : attempt_1406094762596_0012_m_000000_0, Status : FAILED
Error: java.lang.RuntimeException: PipeMapRed.waitOutputThreads(): subprocess failed with code 2
at org.apache.hadoop.streaming.PipeMapRed.waitOutputThreads(PipeMapRed.java:320)
at org.apache.hadoop.streaming.PipeMapRed.mapRedFinished(PipeMapRed.java:533)
at org.apache.hadoop.streaming.PipeMapper.close(PipeMapper.java:130)
at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:61)
at org.apache.hadoop.streaming.PipeMapRunner.run(PipeMapRunner.java:34)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:429)
....above is repeated....
14/07/23 00:16:32 INFO mapreduce.Job:  map 100% reduce 100%
14/07/23 00:16:32 INFO mapreduce.Job: Job job_1406094762596_0012 failed with state FAILED due to: Task failed task_1406094762596_0012_m_000000
Job failed as tasks failed. failedMaps:1 failedReduces:0
14/07/23 00:16:33 INFO mapreduce.Job: Counters: 10
Job Counters
    Failed map tasks=7
    Killed map tasks=1
    Launched map tasks=8
    Other local map tasks=6
    Data-local map tasks=2
    Total time spent by all maps in occupied slots (ms)=156534
    Total time spent by all reduces in occupied slots (ms)=0
Map-Reduce Framework
    CPU time spent (ms)=0
    Physical memory (bytes) snapshot=0
    Virtual memory (bytes) snapshot=0
14/07/23 00:16:33 ERROR streaming.StreamJob: Job not Successful!

Showing 4096 bytes of 8910 total. Click here for the full log.

ain] org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 536870912
2014-07-23 00:16:01,292 INFO [main] org.apache.hadoop.mapred.MapTask: kvstart = 134217724; length = 33554432
2014-07-23 00:16:01,320 INFO [main] org.apache.hadoop.streaming.PipeMapRed: PipeMapRed exec [/data/3/yarn/nm-local-dir/usercache/gpadmin/appcache/application_1406094762596_0012/container_1406094762596_0012_01_000002/./mapper.R]
2014-07-23 00:16:01,324 INFO [main] org.apache.hadoop.conf.Configuration.deprecation: map.input.start is deprecated. Instead, use mapreduce.map.input.start
2014-07-23 00:16:01,335 INFO [main] org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
2014-07-23 00:16:01,336 INFO [main] org.apache.hadoop.conf.Configuration.deprecation: map.input.length is deprecated. Instead, use mapreduce.map.input.length
2014-07-23 00:16:01,355 INFO [main] org.apache.hadoop.conf.Configuration.deprecation: mapred.work.output.dir is deprecated. Instead, use mapreduce.task.output.dir
2014-07-23 00:16:01,365 INFO [main] org.apache.hadoop.conf.Configuration.deprecation: map.input.file is deprecated. Instead, use mapreduce.map.input.file
2014-07-23 00:16:01,464 INFO [main] org.apache.hadoop.streaming.PipeMapRed: PipeMapRed failed!