Advertisement
Guest User

Untitled

a guest
Feb 21st, 2017
63
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.80 KB | None | 0 0
  1. // Imports
  2. import java.io.IOException;
  3. import java.util.StringTokenizer;
  4. import org.apache.hadoop.conf.Configuration;
  5. import org.apache.hadoop.fs.Path;
  6. import org.apache.hadoop.io.IntWritable;
  7. import org.apache.hadoop.io.Text;
  8. import org.apache.hadoop.mapreduce.Job;
  9. import org.apache.hadoop.mapreduce.Mapper;
  10. import org.apache.hadoop.mapreduce.Reducer;
  11. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  12. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  13.  
  14. // Set up class WordCount
  15. public class WordCount {
  16.  
  17. // Mapper Class
  18. public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable>{
  19.  
  20. // Set up IntWritable
  21. private final static IntWritable one = new IntWritable(1);
  22.  
  23. // Variable for word.
  24. private Text word = new Text();
  25.  
  26. // Map function
  27. public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
  28.  
  29. // Convert value to string.
  30. StringTokenizer itr = new StringTokenizer(value.toString());
  31.  
  32. // Iterate through StringTokenizer.
  33. while (itr.hasMoreTokens()) {
  34. word.set(itr.nextToken());
  35. context.write(word, one);
  36. }
  37. }
  38. }
  39.  
  40. // Reducer Class
  41. public static class IntSumReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
  42.  
  43. // Set up IntWritable
  44. private IntWritable result = new IntWritable();
  45. String[] search = new String[] {"education", "politics", "sports", "agriculture"};
  46.  
  47. // Reduce function.
  48. public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
  49.  
  50. String tmp = key.toString().toLowerCase();
  51.  
  52. for(int i = 0; i < search.length; i++) {
  53. if (tmp.contains(search[i])) {
  54.  
  55. // Sum all occurences of a word.
  56. int sum = 0;
  57. for (IntWritable val : values) {
  58. sum += val.get();
  59. }
  60.  
  61. // Set result, context.
  62. result.set(sum);
  63. context.write(new Text(search[i]), result);
  64. }
  65. }
  66. }
  67. }
  68.  
  69. // Main function.
  70. public static void main(String[] args) throws Exception {
  71.  
  72. // Create new config object.
  73. Configuration conf = new Configuration();
  74.  
  75. // Set up a new job and give it a name.
  76. Job job = Job.getInstance(conf, "word count");
  77.  
  78. // When creating jar, use this class name.
  79. job.setJarByClass(WordCount.class);
  80.  
  81. // Class for mapping.
  82. job.setMapperClass(TokenizerMapper.class);
  83.  
  84. // Class for combining.
  85. job.setCombinerClass(IntSumReducer.class);
  86.  
  87. // Class for reducing.
  88. job.setReducerClass(IntSumReducer.class);
  89.  
  90. // What object type is the output key.
  91. job.setOutputKeyClass(Text.class);
  92.  
  93. // What object type is the output value.
  94. job.setOutputValueClass(IntWritable.class);
  95.  
  96. // Set input and output paths.
  97. FileInputFormat.addInputPath(job, new Path(args[0]));
  98. FileOutputFormat.setOutputPath(job, new Path(args[1]));
  99.  
  100. // Exit when done.
  101. System.exit(job.waitForCompletion(true) ? 0 : 1);
  102.  
  103. }
  104. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement