daily pastebin goal
68%
SHARE
TWEET

Hadoop WordCount - NetBeans IDE

a guest Feb 12th, 2013 2,629 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. package com.etapix.wordcount;
  2.  
  3. import java.io.IOException;
  4. import java.util.Iterator;
  5. import java.util.StringTokenizer;
  6. import org.apache.hadoop.fs.Path;
  7. import org.apache.hadoop.io.*;
  8. import org.apache.hadoop.mapred.*;
  9.  
  10. /**
  11.  * Hello world!
  12.  *
  13.  */
  14. public class App {
  15.  
  16.     public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> {
  17.  
  18.         private final static IntWritable one = new IntWritable(1);
  19.         private Text word = new Text();
  20.  
  21.         @Override
  22.         public void map(LongWritable k1, Text v1, OutputCollector<Text, IntWritable> oc, Reporter rep) throws IOException {
  23.             String line = v1.toString();
  24.             StringTokenizer tokenizer = new StringTokenizer(line);
  25.             while (tokenizer.hasMoreTokens()) {
  26.                 word.set(tokenizer.nextToken());
  27.                 oc.collect(word, one);
  28.             }
  29.         }
  30.     }
  31.  
  32.     public static class Reduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> {
  33.  
  34.         @Override
  35.         public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
  36.             int sum = 0;
  37.             while (values.hasNext()) {
  38.                 sum += values.next().get();
  39.             }
  40.             output.collect(key, new IntWritable(sum));
  41.         }
  42.     }
  43.  
  44.     public static void main(String[] args) throws IOException {
  45.         JobConf conf = new JobConf(App.class);
  46.         conf.setJobName("wordcount");
  47.  
  48.         conf.setOutputKeyClass(Text.class);
  49.         conf.setOutputValueClass(IntWritable.class);
  50.  
  51.         conf.setMapperClass(Map.class);
  52.         conf.setCombinerClass(Reduce.class);
  53.         conf.setReducerClass(Reduce.class);
  54.  
  55.         conf.setInputFormat(TextInputFormat.class);
  56.         conf.setOutputFormat(TextOutputFormat.class);
  57.  
  58.         FileInputFormat.setInputPaths(conf, new Path(args[0]));
  59.         FileOutputFormat.setOutputPath(conf, new Path(args[1]));
  60.  
  61.         JobClient.runJob(conf);
  62.     }
  63. }
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top