Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package info.moaikids.mapred.map;
- import info.moaikids.chunker.Chunker;
- import info.moaikids.chunker.KuromojiChunker;
- import java.io.IOException;
- import org.apache.hadoop.io.IntWritable;
- import org.apache.hadoop.io.LongWritable;
- import org.apache.hadoop.io.Text;
- import org.apache.hadoop.mapreduce.Mapper;
- public class Figure38Mapper extends
- Mapper<LongWritable, Text, Text, IntWritable> {
- Chunker chunker = new KuromojiChunker();
- static final IntWritable ONE = new IntWritable(1);
- @Override
- protected void setup(Context context) throws IOException,
- InterruptedException {
- super.setup(context);
- }
- @Override
- protected void map(LongWritable key, Text value, Context context)
- throws IOException, InterruptedException {
- for (String line : value.toString().split("。|\n")) {
- line = line.replaceAll(" ", "").trim();
- String[] chunks = chunker.chunking(line);
- if (chunks.length > 1) {
- for (int i = 0; i < chunks.length; i++) {
- for (int j = 0; j < chunks.length; j++) {
- if (i >= j) {
- continue;
- }
- context.write(new Text(chunks[i] + " " + chunks[j]),
- ONE);
- }
- }
- }
- }
- }
- }
Add Comment
Please, Sign In to add comment