Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- public class CSMRMapper extends Mapper<Text,VectorWritable,IntWritable,
- DocumentWritable >{
- @Override
- public void map(Text key, VectorWritable value, Context context)
- throws IOException, InterruptedException{
- DocumentWritable p = new DocumentWritable(new Text(key.toString())
- ,new VectorWritable(value.get()));
- context.write(new IntWritable(1),p);
- }
- public class CSMRReducer extends Reducer<IntWritable, DocumentWritable ,Text,
- VectorArrayWritable> {
- private ArrayList<DocumentWritable> al;
- private VectorWritable[] val;
- private VectorArrayWritable vaw;
- @Override
- public void reduce(IntWritable key, Iterable<DocumentWritable> values,
- Context context) throws IOException, InterruptedException{
- al = new ArrayList();
- vaw = new VectorArrayWritable();
- /* Storing each key-value pair (document) in a java.util.ArrayList */
- for (DocumentWritable v : values){
- al.add(new DocumentWritable(v.getKey(),v.getValue()));
- }
- /* Generating all the possible combinations of documents */
- if (al.size()>0){
- for (int i=0;i<al.size();++i){
- for (int j=i+1;j<al.size();++j){
- val = new VectorWritable[2];
- /* Generating the key for the current document pair with
- the format "doci_name@docj_name" */
- String k = al.get(i).getKey().toString()+
- "@"+al.get(j).getKey().toString();
- //First Document (doci)
- val[0] = new VectorWritable(al.get(i).getValue().get());
- //Second Document (docj)
- val[1] = new VectorWritable(al.get(j).getValue().get());
- vaw.set(val);
- context.write(new Text(k), vaw);
- }
- }
- }
- }
- }
- public class CosineSimilarityReducer
- extends Reducer<Text, VectorArrayWritable, Text, DoubleWritable>{
- @Override
- public void reduce(Text key, Iterable<VectorArrayWritable> value, Context context)
- throws IOException, InterruptedException{
- CosineDistanceMeasure cdm = new CosineDistanceMeasure();
- VectorWritable docX,docY;
- double cosine;
- for (VectorArrayWritable v : value){
- docX = (VectorWritable)v.get()[0];
- docY = (VectorWritable)v.get()[1];
- cosine = cdm.distance(docX.get(), docY.get());
- context.write(key, new DoubleWritable(cosine));
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement