Advertisement
Eamesy

WordCount

May 23rd, 2019
182
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 2.61 KB | None | 0 0
  1. package org.apache.hadoop.examples;
  2.  
  3. import java.io.IOException;
  4. import java.util.StringTokenizer;
  5. import org.apache.hadoop.conf.Configuration;
  6. import org.apache.hadoop.fs.Path;
  7. import org.apache.hadoop.io.IntWritable;
  8. import org.apache.hadoop.io.LongWritable;
  9. import org.apache.hadoop.io.Text;
  10. import org.apache.hadoop.mapreduce.Job;
  11. import org.apache.hadoop.mapreduce.Mapper;
  12. import org.apache.hadoop.mapreduce.Reducer;
  13. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  14. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  15. import org.apache.hadoop.util.GenericOptionsParser;
  16. /**
  17.  * Hello world!
  18.  *
  19.  */
  20. public class WordCount
  21. {
  22.     public static class TokenizerMapper
  23.         extends Mapper<Object, Text, Text, IntWritable>{
  24.             private final static IntWritable one = new IntWritable(1);
  25.             private Text word = new Text();
  26.  
  27.     public void map(Object key, Text value, Context context
  28.                     ) throws IOException, InterruptedException {
  29.         StringTokenizer itr = new StringTokenizer(value.toString());
  30.         while (itr.hasMoreTokens()) {
  31.             String token = itr.nextToken();
  32.             if(token.startsWith("b") || token.startsWith("t")))
  33.             {
  34.                 context.write(word, one);
  35.             }
  36.        
  37.         }
  38.     }
  39. }
  40.  
  41. public static class IntSumReducer
  42.         extends Reducer<Text,IntWritable,Text,IntWritable> {
  43.     private IntWritable result = new IntWritable();
  44.  
  45.     public void reduce(Text key, Iterable<IntWritable> values,
  46.                         Context context
  47.                         ) throws IOException, InterruptedException {
  48.         int sum = 0;
  49.         for (IntWritable val : values) {
  50.             sum += val.get();
  51.         }
  52.         context.write(key, new IntWritable(sum));
  53.     }
  54. }
  55.  
  56. public static void main(String[] args) throws Exception {
  57.     Configuration conf = new Configuration();
  58.     String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
  59.     if (otherArgs.length != 2) {
  60.         System.err.println("Usage: wordcount <in> <out>");
  61.         System.exit(2);
  62.     }
  63.     Job job = new Job(conf, "word count");
  64.     job.setJarByClass(WordCount.class);
  65.     job.setMapperClass(TokenizerMapper.class);
  66.     job.setCombinerClass(IntSumReducer.class);
  67.     job.setReducerClass(IntSumReducer.class);
  68.     job.setOutputKeyClass(Text.class);
  69.     job.setOutputValueClass(IntWritable.class);
  70.     FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
  71.     FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
  72.     System.exit(job.waitForCompletion(true) ? 0 : 1);
  73.     }
  74. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement