Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package com.etapix.wordcount;
- import java.io.IOException;
- import java.util.Iterator;
- import java.util.StringTokenizer;
- import org.apache.hadoop.fs.Path;
- import org.apache.hadoop.io.*;
- import org.apache.hadoop.mapred.*;
- /**
- * Hello world!
- *
- */
- public class App {
- public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> {
- private final static IntWritable one = new IntWritable(1);
- private Text word = new Text();
- @Override
- public void map(LongWritable k1, Text v1, OutputCollector<Text, IntWritable> oc, Reporter rep) throws IOException {
- String line = v1.toString();
- StringTokenizer tokenizer = new StringTokenizer(line);
- while (tokenizer.hasMoreTokens()) {
- word.set(tokenizer.nextToken());
- oc.collect(word, one);
- }
- }
- }
- public static class Reduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> {
- @Override
- public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
- int sum = 0;
- while (values.hasNext()) {
- sum += values.next().get();
- }
- output.collect(key, new IntWritable(sum));
- }
- }
- public static void main(String[] args) throws IOException {
- JobConf conf = new JobConf(App.class);
- conf.setJobName("wordcount");
- conf.setOutputKeyClass(Text.class);
- conf.setOutputValueClass(IntWritable.class);
- conf.setMapperClass(Map.class);
- conf.setCombinerClass(Reduce.class);
- conf.setReducerClass(Reduce.class);
- conf.setInputFormat(TextInputFormat.class);
- conf.setOutputFormat(TextOutputFormat.class);
- FileInputFormat.setInputPaths(conf, new Path(args[0]));
- FileOutputFormat.setOutputPath(conf, new Path(args[1]));
- JobClient.runJob(conf);
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement