Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/local/hawq/ext/R-2.13.0-1/bin/Rscript
- trimWhiteSpace <- function(line) gsub("(^ +)|( +$)", "", line)
- splitIntoWords <- function(line) unlist(strsplit(line, "[[:space:]]+"))
- con <- file("stdin", open = "r")
- while (length(line <- readLines(con, n = 1, warn = FALSE)) > 0) {
- line <- trimWhiteSpace(line)
- words <- splitIntoWords(line)
- ## **** can be done as cat(paste(words, "t1n", sep=""), sep="")
- for (w in words)
- cat(w, "t1n", sep="")
- }
- close(con)
- #!/usr/local/hawq/ext/R-2.13.0-1/bin/Rscript
- trimWhiteSpace <- function(line) gsub("(^ +)|( +$)", "", line)
- splitLine <- function(line) {
- val <- unlist(strsplit(line, "t"))
- list(word = val[1], count = as.integer(val[2]))
- }
- env <- new.env(hash = TRUE)
- con <- file("stdin", open = "r")
- while (length(line <- readLines(con, n = 1, warn = FALSE)) > 0) {
- line <- trimWhiteSpace(line)
- split <- splitLine(line)
- word <- split$word
- count <- split$count
- if (exists(word, envir = env, inherits = FALSE)) {
- oldcount <- get(word, envir = env)
- assign(word, oldcount + count, envir = env)
- }
- else assign(word, count, envir = env)
- }
- close(con)
- for (w in ls(env, all = TRUE))
- cat(w, "t", get(w, envir = env), "n", sep = "")
- [gpadmin@hdw3 wordcount]$ cat data
- foo foo quux labs foo bar quux
- [gpadmin@hdw3 wordcount]$ cat data | Rscript mapper.R
- foo 1
- foo 1
- quux 1
- labs 1
- foo 1
- bar 1
- quux 1
- [gpadmin@hdw3 wordcount]$ cat data | Rscript mapper.R | Rscript reducer.R
- bar 1
- foo 3
- labs 1
- quux 2
- [gpadmin@hdw3 wordcount]$ hadoop jar /usr/lib/gphd/hadoop-mapreduce/hadoop-streaming-2.2.0-gphd-3.0.1.0.jar -D mapreduce.reduce.tasks=0 -file "mapper.R" -mapper "mapper.R" -file "reducer.R" -reducer "reducer.R" -input "/tmp/dummy/input/data" -output "/tmp/dummy/output"
- [gpadmin@hdw3 wordcount]$ hadoop jar /usr/lib/gphd/hadoop-mapreduce/hadoop-streaming-2.2.0-gphd-3.0.1.0.jar -D mapreduce.reduce.tasks=0 -file "mapper.R" -mapper "mapper.R" -input "/tmp/dummy/input/data" -output "/tmp/dummy/output"
- .....
- .....
- 14/07/23 00:15:46 INFO mapreduce.Job: Job job_1406094762596_0012 running in uber mode : false
- 14/07/23 00:15:46 INFO mapreduce.Job: map 0% reduce 0%
- 14/07/23 00:16:05 INFO mapreduce.Job: Task Id : attempt_1406094762596_0012_m_000000_0, Status : FAILED
- Error: java.lang.RuntimeException: PipeMapRed.waitOutputThreads(): subprocess failed with code 2
- at org.apache.hadoop.streaming.PipeMapRed.waitOutputThreads(PipeMapRed.java:320)
- at org.apache.hadoop.streaming.PipeMapRed.mapRedFinished(PipeMapRed.java:533)
- at org.apache.hadoop.streaming.PipeMapper.close(PipeMapper.java:130)
- at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:61)
- at org.apache.hadoop.streaming.PipeMapRunner.run(PipeMapRunner.java:34)
- at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:429)
- ....above is repeated....
- 14/07/23 00:16:32 INFO mapreduce.Job: map 100% reduce 100%
- 14/07/23 00:16:32 INFO mapreduce.Job: Job job_1406094762596_0012 failed with state FAILED due to: Task failed task_1406094762596_0012_m_000000
- Job failed as tasks failed. failedMaps:1 failedReduces:0
- 14/07/23 00:16:33 INFO mapreduce.Job: Counters: 10
- Job Counters
- Failed map tasks=7
- Killed map tasks=1
- Launched map tasks=8
- Other local map tasks=6
- Data-local map tasks=2
- Total time spent by all maps in occupied slots (ms)=156534
- Total time spent by all reduces in occupied slots (ms)=0
- Map-Reduce Framework
- CPU time spent (ms)=0
- Physical memory (bytes) snapshot=0
- Virtual memory (bytes) snapshot=0
- 14/07/23 00:16:33 ERROR streaming.StreamJob: Job not Successful!
- Showing 4096 bytes of 8910 total. Click here for the full log.
- ain] org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 536870912
- 2014-07-23 00:16:01,292 INFO [main] org.apache.hadoop.mapred.MapTask: kvstart = 134217724; length = 33554432
- 2014-07-23 00:16:01,320 INFO [main] org.apache.hadoop.streaming.PipeMapRed: PipeMapRed exec [/data/3/yarn/nm-local-dir/usercache/gpadmin/appcache/application_1406094762596_0012/container_1406094762596_0012_01_000002/./mapper.R]
- 2014-07-23 00:16:01,324 INFO [main] org.apache.hadoop.conf.Configuration.deprecation: map.input.start is deprecated. Instead, use mapreduce.map.input.start
- 2014-07-23 00:16:01,335 INFO [main] org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
- 2014-07-23 00:16:01,336 INFO [main] org.apache.hadoop.conf.Configuration.deprecation: map.input.length is deprecated. Instead, use mapreduce.map.input.length
- 2014-07-23 00:16:01,355 INFO [main] org.apache.hadoop.conf.Configuration.deprecation: mapred.work.output.dir is deprecated. Instead, use mapreduce.task.output.dir
- 2014-07-23 00:16:01,365 INFO [main] org.apache.hadoop.conf.Configuration.deprecation: map.input.file is deprecated. Instead, use mapreduce.map.input.file
- 2014-07-23 00:16:01,464 INFO [main] org.apache.hadoop.streaming.PipeMapRed: PipeMapRed failed!
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement