Advertisement
Guest User

Untitled

a guest
Jul 20th, 2017
54
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Scala 0.80 KB | None | 0 0
  1. package test.invertedIndex
  2.  
  3. import java.util.StringTokenizer;
  4.  
  5. // inverted Import type for input and output keys and values.
  6.  
  7. import org.apache.hadoop.io.Text;
  8. import org.apache.hadoop.mapred.Mapper;
  9. import org.apache.hadoop.mapred.MapReduceBase;
  10. import org.apache.hadoop.mapred.OutputCollector;
  11. import org.apache.hadoop.mapred.Reporter;
  12.  
  13. import scala.collection.JavaConversions._
  14.  
  15. class invertedMapper() extends MapReduceBase
  16.         with Mapper[Text, Text, Text, Text] {
  17.  
  18.     private val word = new Text();
  19.  
  20.     def map(key: Text,
  21.             value: Text,
  22.             output: OutputCollector[Text, Text],
  23.             reporter: Reporter)
  24.      {
  25.  
  26.         val wordList = new StringTokenizer(value.toString());
  27.  
  28.         wordList.foreach( w => {
  29.             word.set(w.toString().replaceAll("[\\W]", ""));
  30.             output.collect(word, key);
  31.             })
  32.  
  33.     }
  34. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement