Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package test.invertedIndex
- import java.util.StringTokenizer;
- // inverted Import type for input and output keys and values.
- import org.apache.hadoop.io.Text;
- import org.apache.hadoop.mapred.Mapper;
- import org.apache.hadoop.mapred.MapReduceBase;
- import org.apache.hadoop.mapred.OutputCollector;
- import org.apache.hadoop.mapred.Reporter;
- import scala.collection.JavaConversions._
- class invertedMapper() extends MapReduceBase
- with Mapper[Text, Text, Text, Text] {
- private val word = new Text();
- def map(key: Text,
- value: Text,
- output: OutputCollector[Text, Text],
- reporter: Reporter)
- {
- val wordList = new StringTokenizer(value.toString());
- wordList.foreach( w => {
- word.set(w.toString().replaceAll("[\\W]", ""));
- output.collect(word, key);
- })
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement