Pastebin is 300% more awesome when you are logged in. Sign Up, it's FREE!
Guest

Zubnaya_Word_Mapper

By: a guest on Jan 10th, 2011  |  syntax: Java 5  |  size: 1.17 KB  |  hits: 36  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. import java.io.IOException;
  2. import java.util.StringTokenizer;
  3.  
  4. import java.util.regex.Matcher;
  5. import java.util.regex.Pattern;
  6.  
  7. import org.apache.hadoop.io.IntWritable;
  8. import org.apache.hadoop.io.Text;
  9. import org.apache.hadoop.mapred.Mapper;
  10. import org.apache.hadoop.mapred.MapReduceBase;
  11. import org.apache.hadoop.mapred.OutputCollector;
  12. import org.apache.hadoop.mapred.Reporter;
  13.  
  14. public class WordMapper extends MapReduceBase
  15.                 implements Mapper<Object, Text, Text, IntWritable>
  16. {
  17.         private Text word = new Text();
  18.         private final static IntWritable ONE = new IntWritable(1);
  19.        
  20.         public void map(Object key,
  21.                         Text value,
  22.                         OutputCollector<Text, IntWritable> output,
  23.                         Reporter reporter) throws IOException
  24.   {
  25.                 // Break line into words for processing
  26.     Pattern pat = Pattern.compile("(\\w+)");
  27.     Matcher mat = pat.matcher(value.toString());
  28.     while (mat.find())
  29.     {
  30.                         word.set(mat.group(1).toLowerCase());
  31.                         output.collect(word, ONE);
  32.     }
  33.     /*
  34.                 StringTokenizer wordList = new StringTokenizer(value.toString());
  35.                 while (wordList.hasMoreTokens())
  36.     {
  37.                         word.set(wordList.nextToken());
  38.                         output.collect(word, ONE);
  39.                 }
  40.     */
  41.         }
  42. }