Guest User

code

a guest
Dec 22nd, 2016
217
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.41 KB | None | 0 0
  1. Mapper
  2. package edu;
  3.  
  4. import java.io.IOException;
  5. import java.util.HashSet;
  6. import java.util.Set;
  7. import java.util.StringTokenizer;
  8.  
  9. import org.apache.hadoop.conf.Configuration;
  10. import org.apache.hadoop.fs.FileSystem;
  11. import org.apache.hadoop.io.IntWritable;
  12. import org.apache.hadoop.io.LongWritable;
  13. import org.apache.hadoop.io.NullWritable;
  14. import org.apache.hadoop.io.Text;
  15. import org.apache.hadoop.mapreduce.Mapper;
  16. import org.apache.hadoop.mapreduce.lib.input.FileSplit;
  17.  
  18.  
  19. public class MyMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
  20. private final static IntWritable one = new IntWritable(1);
  21. private Text word = new Text();
  22.  
  23. public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
  24. String line = value.toString();
  25. StringTokenizer tokenizer = new StringTokenizer(line);
  26. while (tokenizer.hasMoreTokens()) {
  27. word.set(tokenizer.nextToken());
  28. context.write(word, one);
  29. }
  30. }
  31. }
  32.  
  33. Reducer
  34.  
  35. package edu;
  36.  
  37. import java.io.IOException;
  38.  
  39. import org.apache.hadoop.io.IntWritable;
  40. import org.apache.hadoop.io.Text;
  41. import org.apache.hadoop.mapreduce.Reducer;
  42.  
  43.  
  44. public class MyReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
  45.  
  46. public void reduce(Text key, Iterable<IntWritable> values, Context context)
  47. throws IOException, InterruptedException {
  48. int sum = 0;
  49. for (IntWritable val : values) {
  50. sum += val.get();
  51. }
  52. context.write(key, new IntWritable(sum));
  53. }
  54. }
  55. Driver
  56. package edu;
  57. import org.apache.hadoop.conf.Configuration;
  58. import org.apache.hadoop.conf.Configured;
  59. import org.apache.hadoop.fs.FileSystem;
  60. import org.apache.hadoop.fs.Path;
  61. import org.apache.hadoop.io.IntWritable;
  62. import org.apache.hadoop.io.Text;
  63. import org.apache.hadoop.mapreduce.Job;
  64. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  65. import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
  66. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  67. import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
  68. import org.apache.hadoop.util.Tool;
  69. import org.apache.hadoop.util.ToolRunner;
  70.  
  71. public class WordCount extends Configured implements Tool {
  72.  
  73.  
  74.  
  75. public static void main(String[] args) throws Exception {
  76. Configuration conf = new Configuration();
  77. try {
  78. int res = ToolRunner.run(conf, new WordCount(), args);
  79. } catch (Exception e) {
  80. // TODO Auto-generated catch block
  81. e.printStackTrace();
  82. }
  83. }
  84.  
  85. @Override
  86. public int run(String[] args) throws Exception {
  87. // TODO Auto-generated method stub
  88. Configuration conf = new Configuration();
  89. Job job = new Job(conf, "wordcount");
  90. job.setJarByClass(WordCount.class);
  91. job.setOutputKeyClass(Text.class);
  92. job.setOutputValueClass(IntWritable.class);
  93. job.setMapOutputKeyClass(Text.class);
  94. job.setMapOutputValueClass(IntWritable.class);
  95. job.setMapperClass(MyMapper.class);
  96. job.setReducerClass(MyReducer.class);
  97.  
  98. job.setInputFormatClass(TextInputFormat.class);
  99. job.setOutputFormatClass(TextOutputFormat.class);
  100.  
  101. FileSystem fs = FileSystem.get(conf);
  102. if(fs.exists(new Path(args[1]))){
  103. fs.delete(new Path(args[1]), true);
  104. }
  105. FileInputFormat.addInputPath(job, new Path(args[0]));
  106. FileOutputFormat.setOutputPath(job, new Path(args[1]));
  107.  
  108. job.waitForCompletion(true);
  109. return 0;
  110. }
  111.  
  112. }
Add Comment
Please, Sign In to add comment