Advertisement
Ladies_Man

#HADOOP Lab1 (simple map/reduce) COMPLETE

Oct 3rd, 2015
107
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 2.84 KB | None | 0 0
  1. //Параллельные и распределенные программы
  2. //hadoop lab1
  3. //WordCountApp.java
  4.  
  5. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  6. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  7. import org.apache.hadoop.mapreduce.Job;
  8. import org.apache.hadoop.fs.Path;
  9. import org.apache.hadoop.io.Text;
  10. import org.apache.hadoop.io.IntWritable;
  11.  
  12. public class WordCountApp {
  13.     public static void main(String[] args) throws Exception {
  14.         if (args.length != 2) {
  15.             System.err.println("Usage: WordCountApp <input path> <output path>");
  16.             System.exit(-1);
  17.         }
  18.         Job job = Job.getInstance();
  19.         job.setJarByClass(WordCountApp.class);
  20.         job.setJobName("Word count");
  21.         FileInputFormat.addInputPath(job, new Path(args[0]));
  22.         FileOutputFormat.setOutputPath(job, new Path(args[1]));
  23.         job.setMapperClass(WordMapper.class);
  24.         job.setReducerClass(WordReducer.class);
  25.         job.setOutputKeyClass(Text.class);
  26.         job.setOutputValueClass(IntWritable.class);
  27.         job.setNumReduceTasks(2);
  28.         System.exit(job.waitForCompletion(true) ? 0 : 1);
  29.     }
  30. }
  31.  
  32.  
  33.  
  34.  
  35. //WordMapper.java
  36.  
  37. import org.apache.hadoop.io.IntWritable;
  38. import org.apache.hadoop.io.LongWritable;
  39. import org.apache.hadoop.io.Text;
  40. import org.apache.hadoop.mapreduce.Mapper;
  41.  
  42. import java.io.IOException;
  43.  
  44. public class WordMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
  45.     @Override
  46.     protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
  47.         String line = value.toString();
  48.  
  49.         line = line.toLowerCase();
  50.  
  51.         String[] words = line.split("[^a-zA-Z]"); //не забыть русский текст через а-яА-Я
  52.  
  53.         for (String word : words) {
  54.             context.write(new Text(word), new IntWritable(1));
  55.         }
  56.     }
  57. }
  58.  
  59.  
  60.  
  61.  
  62. //WordReducer.java
  63.  
  64. import org.apache.hadoop.io.IntWritable;
  65. import org.apache.hadoop.io.LongWritable;
  66. import org.apache.hadoop.io.Text;
  67. import org.apache.hadoop.mapreduce.Reducer;
  68.  
  69. import java.io.IOException;
  70. import java.util.Iterator;
  71.  
  72. public class WordReducer extends Reducer<Text, IntWritable, Text, LongWritable> {
  73.     @Override
  74.     protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
  75.         long count=0;
  76.         Iterator iter = values.iterator();
  77.         while(iter.hasNext()) {
  78.             iter.next();
  79.             count++;
  80.         }
  81.         context.write(key, new LongWritable(count));
  82.     }
  83. }
  84.  
  85.  
  86.  
  87.  
  88. //на выходе 2 txtшника в outputе и пустой success
  89.  
  90. //очистить вывод в хадупе через hadoop -fs -rmr output
  91. //затем заново вывести через copytolocal
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement