Advertisement
decimusphostle

Hadoop Pipes Example

Aug 25th, 2011
739
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.17 KB | None | 0 0
  1. ::::::::::::::
  2. dp_hpt_mr.hh
  3. ::::::::::::::
  4. #ifndef __DP_HPT_MR_HH__
  5. #define __DP_HPT_MR_HH__
  6.  
  7. #include <Pipes.hh>
  8.  
  9. namespace dp_hpt
  10. {
  11. class word_count_mapper : public HadoopPipes::Mapper
  12. {
  13. public:
  14. word_count_mapper(const HadoopPipes::TaskContext &) {};
  15.  
  16. void map(HadoopPipes::MapContext & context);
  17. };
  18.  
  19. class word_count_reducer : public HadoopPipes::Reducer
  20. {
  21. public:
  22. word_count_reducer(const HadoopPipes::TaskContext &) {};
  23.  
  24. void reduce(HadoopPipes::ReduceContext & context);
  25. };
  26. };
  27.  
  28. #endif //__DP_HPT_MR_HH__
  29. ::::::::::::::
  30. dp_hadoop_pipes_test.cpp
  31. ::::::::::::::
  32. #include "dp_hpt_mr.hh"
  33.  
  34. #include <iostream>
  35. #include <TemplateFactory.hh>
  36.  
  37. using namespace std;
  38.  
  39. int
  40. main(int argc,
  41. char * argv[])
  42. {
  43. HadoopPipes::TemplateFactory2<dp_hpt::word_count_mapper,
  44. dp_hpt::word_count_reducer> factory;
  45.  
  46. return HadoopPipes::runTask(factory);
  47. }
  48. ::::::::::::::
  49. dp_hpt_mr.cpp
  50. ::::::::::::::
  51. #include "dp_hpt_mr.hh"
  52. #include <boost/algorithm/string.hpp>
  53. #include <boost/tokenizer.hpp>
  54. #include <boost/lexical_cast.hpp>
  55. #include <string>
  56.  
  57. namespace dp_hpt
  58. {
  59. void word_count_mapper::map(HadoopPipes::MapContext & context)
  60. {
  61. typedef boost::tokenizer<> tokenizer_t;
  62. tokenizer_t tokenizer(context.getInputValue());
  63.  
  64. for(tokenizer_t::const_iterator i = tokenizer.begin();
  65. tokenizer.end() != i;
  66. ++i)
  67. {
  68. context.emit(boost::to_lower_copy(*i), "1");
  69. }
  70. }
  71.  
  72. void word_count_reducer::reduce(HadoopPipes::ReduceContext & context)
  73. {
  74. uint32_t count(0);
  75.  
  76. do
  77. {
  78. ++count;
  79. }
  80. while(context.nextValue());
  81.  
  82. context.emit(context.getInputKey(),
  83. boost::lexical_cast<std::string>(count));
  84. }
  85. };
  86. ::::::::::::::
  87. ../conf/dp_hpt_conf.xml
  88. ::::::::::::::
  89. <?xml version="1.0"?>
  90. <configuration>
  91. <property>
  92. <name>hadoop.pipes.executable</name>
  93. <value>/path/to/dp_hadoop_pipes_test</value>
  94. </property>
  95. <property>
  96. <name>hadoop.pipes.java.recordreader</name>
  97. <value>true</value>
  98. </property>
  99. <property>
  100. <name>hadoop.pipes.java.recordwriter</name>
  101. <value>true</value>
  102. </property>
  103. </configuration>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement