Advertisement
Guest User

Word Count 0.20 - Denis

a guest
Nov 24th, 2011
117
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.48 KB | None | 0 0
  1. import java.io.IOException;
  2. import java.util.Iterator;
  3. import java.util.StringTokenizer;
  4.  
  5. import org.apache.hadoop.conf.Configuration;
  6. import org.apache.hadoop.fs.Path;
  7. import org.apache.hadoop.io.IntWritable;
  8. import org.apache.hadoop.io.LongWritable;
  9. import org.apache.hadoop.io.Text;
  10. import org.apache.hadoop.mapreduce.Job;
  11. import org.apache.hadoop.mapreduce.Mapper;
  12. import org.apache.hadoop.mapreduce.Reducer;
  13. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  14. import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
  15. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  16. import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
  17. import org.apache.hadoop.util.ToolRunner;
  18.  
  19. import com.bejoy.sampels.worcount.WordCountDriver;
  20.  
  21. public class WordCountNewAPI {
  22.  
  23. public static class WordCountMapper extends Mapper<LongWritable,Text, Text, IntWritable> {
  24.  
  25. private final static IntWritable ONE = new IntWritable(1);
  26. private Text word = new Text();
  27.  
  28. @Override
  29. protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
  30.  
  31. String line = value.toString();
  32. StringTokenizer tokenizer = new StringTokenizer(line);
  33. while (tokenizer.hasMoreTokens()) {
  34. word.set(tokenizer.nextToken());
  35. context.write(word, ONE);
  36. }
  37.  
  38. super.map(key, value, context);
  39. }
  40.  
  41. }
  42.  
  43. public static class WordCountReducer extends Reducer<Text,IntWritable, Text, IntWritable> {
  44.  
  45. @Override
  46. protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
  47.  
  48. int sum = 0;
  49. Iterator<IntWritable> iter = values.iterator();
  50. while (iter.hasNext()) {
  51. sum += iter.next().get();
  52. }
  53.  
  54. context.write(key, new IntWritable(sum));
  55.  
  56. super.reduce(key, values, context);
  57. }
  58.  
  59. }
  60.  
  61. /**
  62. * @param args
  63. * @throws Exception
  64. */
  65. public static void main(String[] args) throws Exception {
  66.  
  67.  
  68. int res = ToolRunner.run(new Configuration(), new WordCountDriver(), args);
  69. System.exit(res);
  70. }
  71.  
  72. public Void run() throws Exception {
  73.  
  74. Configuration conf = new Configuration();
  75. conf.set("mapred.job.tracker", "<ip:8021>");
  76.  
  77. Job job = new Job(conf, "wordcount");
  78.  
  79. job.setJarByClass(WordCountNewAPI.class);
  80.  
  81. job.setOutputKeyClass(Text.class);
  82. job.setOutputValueClass(IntWritable.class);
  83.  
  84. job.setMapperClass(WordCountMapper.class);
  85. job.setCombinerClass(WordCountReducer.class);
  86. job.setReducerClass(WordCountReducer.class);
  87.  
  88. job.setInputFormatClass(TextInputFormat.class);
  89. job.setOutputFormatClass(TextOutputFormat.class);
  90.  
  91. FileInputFormat.setInputPaths(job, new Path("hdfs://localhost:9000/userdata/bejoy/samples/wc/input"));
  92. FileOutputFormat.setOutputPath(job, new Path("hdfs://localhost:9000/userdata/bejoy/samples/wc/input"));
  93.  
  94. boolean b = job.waitForCompletion(true);
  95. if (!b) {
  96. throw new IOException("error with job!");
  97. }
  98.  
  99. return null;
  100. }
  101.  
  102. }
  103.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement