Advertisement
Guest User

Untitled

a guest
Jul 23rd, 2014
165
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.79 KB | None | 0 0
  1. #!/usr/local/hawq/ext/R-2.13.0-1/bin/Rscript
  2. trimWhiteSpace <- function(line) gsub("(^ +)|( +$)", "", line)
  3. splitIntoWords <- function(line) unlist(strsplit(line, "[[:space:]]+"))
  4.  
  5. con <- file("stdin", open = "r")
  6. while (length(line <- readLines(con, n = 1, warn = FALSE)) > 0) {
  7. line <- trimWhiteSpace(line)
  8. words <- splitIntoWords(line)
  9. ## **** can be done as cat(paste(words, "t1n", sep=""), sep="")
  10. for (w in words)
  11. cat(w, "t1n", sep="")
  12. }
  13. close(con)
  14.  
  15. #!/usr/local/hawq/ext/R-2.13.0-1/bin/Rscript
  16. trimWhiteSpace <- function(line) gsub("(^ +)|( +$)", "", line)
  17. splitLine <- function(line) {
  18. val <- unlist(strsplit(line, "t"))
  19. list(word = val[1], count = as.integer(val[2]))
  20. }
  21. env <- new.env(hash = TRUE)
  22. con <- file("stdin", open = "r")
  23. while (length(line <- readLines(con, n = 1, warn = FALSE)) > 0) {
  24. line <- trimWhiteSpace(line)
  25. split <- splitLine(line)
  26. word <- split$word
  27. count <- split$count
  28. if (exists(word, envir = env, inherits = FALSE)) {
  29. oldcount <- get(word, envir = env)
  30. assign(word, oldcount + count, envir = env)
  31. }
  32. else assign(word, count, envir = env)
  33. }
  34. close(con)
  35. for (w in ls(env, all = TRUE))
  36. cat(w, "t", get(w, envir = env), "n", sep = "")
  37.  
  38. [gpadmin@hdw3 wordcount]$ cat data
  39. foo foo quux labs foo bar quux
  40. [gpadmin@hdw3 wordcount]$ cat data | Rscript mapper.R
  41. foo 1
  42. foo 1
  43. quux 1
  44. labs 1
  45. foo 1
  46. bar 1
  47. quux 1
  48. [gpadmin@hdw3 wordcount]$ cat data | Rscript mapper.R | Rscript reducer.R
  49. bar 1
  50. foo 3
  51. labs 1
  52. quux 2
  53.  
  54. [gpadmin@hdw3 wordcount]$ hadoop jar /usr/lib/gphd/hadoop-mapreduce/hadoop-streaming-2.2.0-gphd-3.0.1.0.jar -D mapreduce.reduce.tasks=0 -file "mapper.R" -mapper "mapper.R" -file "reducer.R" -reducer "reducer.R" -input "/tmp/dummy/input/data" -output "/tmp/dummy/output"
  55.  
  56. [gpadmin@hdw3 wordcount]$ hadoop jar /usr/lib/gphd/hadoop-mapreduce/hadoop-streaming-2.2.0-gphd-3.0.1.0.jar -D mapreduce.reduce.tasks=0 -file "mapper.R" -mapper "mapper.R" -input "/tmp/dummy/input/data" -output "/tmp/dummy/output"
  57. .....
  58. .....
  59. 14/07/23 00:15:46 INFO mapreduce.Job: Job job_1406094762596_0012 running in uber mode : false
  60. 14/07/23 00:15:46 INFO mapreduce.Job: map 0% reduce 0%
  61. 14/07/23 00:16:05 INFO mapreduce.Job: Task Id : attempt_1406094762596_0012_m_000000_0, Status : FAILED
  62. Error: java.lang.RuntimeException: PipeMapRed.waitOutputThreads(): subprocess failed with code 2
  63. at org.apache.hadoop.streaming.PipeMapRed.waitOutputThreads(PipeMapRed.java:320)
  64. at org.apache.hadoop.streaming.PipeMapRed.mapRedFinished(PipeMapRed.java:533)
  65. at org.apache.hadoop.streaming.PipeMapper.close(PipeMapper.java:130)
  66. at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:61)
  67. at org.apache.hadoop.streaming.PipeMapRunner.run(PipeMapRunner.java:34)
  68. at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:429)
  69. ....above is repeated....
  70. 14/07/23 00:16:32 INFO mapreduce.Job: map 100% reduce 100%
  71. 14/07/23 00:16:32 INFO mapreduce.Job: Job job_1406094762596_0012 failed with state FAILED due to: Task failed task_1406094762596_0012_m_000000
  72. Job failed as tasks failed. failedMaps:1 failedReduces:0
  73. 14/07/23 00:16:33 INFO mapreduce.Job: Counters: 10
  74. Job Counters
  75. Failed map tasks=7
  76. Killed map tasks=1
  77. Launched map tasks=8
  78. Other local map tasks=6
  79. Data-local map tasks=2
  80. Total time spent by all maps in occupied slots (ms)=156534
  81. Total time spent by all reduces in occupied slots (ms)=0
  82. Map-Reduce Framework
  83. CPU time spent (ms)=0
  84. Physical memory (bytes) snapshot=0
  85. Virtual memory (bytes) snapshot=0
  86. 14/07/23 00:16:33 ERROR streaming.StreamJob: Job not Successful!
  87.  
  88. Showing 4096 bytes of 8910 total. Click here for the full log.
  89.  
  90. ain] org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 536870912
  91. 2014-07-23 00:16:01,292 INFO [main] org.apache.hadoop.mapred.MapTask: kvstart = 134217724; length = 33554432
  92. 2014-07-23 00:16:01,320 INFO [main] org.apache.hadoop.streaming.PipeMapRed: PipeMapRed exec [/data/3/yarn/nm-local-dir/usercache/gpadmin/appcache/application_1406094762596_0012/container_1406094762596_0012_01_000002/./mapper.R]
  93. 2014-07-23 00:16:01,324 INFO [main] org.apache.hadoop.conf.Configuration.deprecation: map.input.start is deprecated. Instead, use mapreduce.map.input.start
  94. 2014-07-23 00:16:01,335 INFO [main] org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
  95. 2014-07-23 00:16:01,336 INFO [main] org.apache.hadoop.conf.Configuration.deprecation: map.input.length is deprecated. Instead, use mapreduce.map.input.length
  96. 2014-07-23 00:16:01,355 INFO [main] org.apache.hadoop.conf.Configuration.deprecation: mapred.work.output.dir is deprecated. Instead, use mapreduce.task.output.dir
  97. 2014-07-23 00:16:01,365 INFO [main] org.apache.hadoop.conf.Configuration.deprecation: map.input.file is deprecated. Instead, use mapreduce.map.input.file
  98. 2014-07-23 00:16:01,464 INFO [main] org.apache.hadoop.streaming.PipeMapRed: PipeMapRed failed!
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement