Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // Imports
- import java.io.IOException;
- import java.util.StringTokenizer;
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.fs.Path;
- import org.apache.hadoop.io.IntWritable;
- import org.apache.hadoop.io.Text;
- import org.apache.hadoop.mapreduce.Job;
- import org.apache.hadoop.mapreduce.Mapper;
- import org.apache.hadoop.mapreduce.Reducer;
- import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
- import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
- // Set up class WordCount
- public class WordCount {
- // Mapper Class
- public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable>{
- // Set up IntWritable
- private final static IntWritable one = new IntWritable(1);
- // Variable for word.
- private Text word = new Text();
- // Map function
- public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
- // Convert value to string.
- StringTokenizer itr = new StringTokenizer(value.toString());
- // Iterate through StringTokenizer.
- while (itr.hasMoreTokens()) {
- word.set(itr.nextToken());
- context.write(word, one);
- }
- }
- }
- // Reducer Class
- public static class IntSumReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
- // Set up IntWritable
- private IntWritable result = new IntWritable();
- String[] search = new String[] {"education", "politics", "sports", "agriculture"};
- // Reduce function.
- public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
- String tmp = key.toString().toLowerCase();
- for(int i = 0; i < search.length; i++) {
- if (tmp.contains(search[i])) {
- // Sum all occurences of a word.
- int sum = 0;
- for (IntWritable val : values) {
- sum += val.get();
- }
- // Set result, context.
- result.set(sum);
- context.write(new Text(search[i]), result);
- }
- }
- }
- }
- // Main function.
- public static void main(String[] args) throws Exception {
- // Create new config object.
- Configuration conf = new Configuration();
- // Set up a new job and give it a name.
- Job job = Job.getInstance(conf, "word count");
- // When creating jar, use this class name.
- job.setJarByClass(WordCount.class);
- // Class for mapping.
- job.setMapperClass(TokenizerMapper.class);
- // Class for combining.
- job.setCombinerClass(IntSumReducer.class);
- // Class for reducing.
- job.setReducerClass(IntSumReducer.class);
- // What object type is the output key.
- job.setOutputKeyClass(Text.class);
- // What object type is the output value.
- job.setOutputValueClass(IntWritable.class);
- // Set input and output paths.
- FileInputFormat.addInputPath(job, new Path(args[0]));
- FileOutputFormat.setOutputPath(job, new Path(args[1]));
- // Exit when done.
- System.exit(job.waitForCompletion(true) ? 0 : 1);
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement