Advertisement
Guest User

Untitled

a guest
Apr 29th, 2017
70
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 3.29 KB | None | 0 0
  1. import java.io.BufferedReader;
  2. import java.io.FileNotFoundException;
  3. import java.io.FileReader;
  4. import java.io.IOException;
  5. import java.util.HashSet;
  6. import java.util.StringTokenizer;
  7. import java.util.logging.Logger;
  8.  
  9. import org.apache.hadoop.conf.Configuration;
  10. import org.apache.hadoop.fs.Path;
  11. import org.apache.hadoop.io.IntWritable;
  12. import org.apache.hadoop.io.Text;
  13. import org.apache.hadoop.mapreduce.Job;
  14. import org.apache.hadoop.mapreduce.Mapper;
  15. import org.apache.hadoop.mapreduce.Reducer;
  16. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  17. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  18.  
  19. public class WordCount {
  20.  
  21.     public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable>{
  22.  
  23.         private final static IntWritable one = new IntWritable(1);
  24.         private Text word = new Text();
  25.  
  26.         //Hashset contenente le stopwords
  27.         private static HashSet<String> stopWords = new HashSet<>();
  28.  
  29.         static{
  30.             //Carichiamo staticamente da file le stopwords
  31.             loadStopWords();
  32.         }
  33.  
  34.         public void map(Object key, Text value, Context context
  35.         ) throws IOException, InterruptedException {
  36.             StringTokenizer itr = new StringTokenizer(value.toString());
  37.             while (itr.hasMoreTokens()) {
  38.                 word.set(itr.nextToken());
  39.                 //Controllo se la parola che sto mappando รจ una stopwords
  40.                 if(!stopWords.contains(word.toString().toLowerCase()))
  41.                     context.write(word, one);
  42.             }
  43.         }
  44.  
  45.         /**
  46.          * Funzione per il caricamento delle stopwords da file in un hashset
  47.          */
  48.         private static void loadStopWords() {
  49.             try {
  50.                 BufferedReader reader = new BufferedReader(new FileReader("english.txt"));
  51.                 String line = "";
  52.                 while((line = reader.readLine()) != null)
  53.                     stopWords.add(line);
  54.             } catch (IOException e) {
  55.                 e.printStackTrace();
  56.             }
  57.  
  58.         }
  59.     }
  60.  
  61.     public static class IntSumReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
  62.         private IntWritable result = new IntWritable();
  63.  
  64.         public void reduce(Text key, Iterable<IntWritable> values,
  65.                            Context context
  66.         ) throws IOException, InterruptedException {
  67.             int sum = 0;
  68.             for (IntWritable val : values) {
  69.                 sum += val.get();
  70.             }
  71.             result.set(sum);
  72.             context.write(key, result);
  73.         }
  74.     }
  75.  
  76.     public static void main(String[] args) throws Exception {
  77.         Configuration conf = new Configuration();
  78.         Job job = Job.getInstance(conf, "word count");
  79.         job.setJarByClass(WordCount.class);
  80.         job.setMapperClass(TokenizerMapper.class);
  81.         job.setCombinerClass(IntSumReducer.class);
  82.         job.setReducerClass(IntSumReducer.class);
  83.         job.setOutputKeyClass(Text.class);
  84.         job.setOutputValueClass(IntWritable.class);
  85.         //Path di input
  86.         FileInputFormat.addInputPath(job, new Path(args[0]));
  87.         //Path di output
  88.         FileOutputFormat.setOutputPath(job, new Path(args[1]));
  89.         System.exit(job.waitForCompletion(true) ? 0 : 1);
  90.     }
  91. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement