Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package mapreduce;
- import java.io.*;
- import java.util.*;
- import java.io.BufferedReader;
- import java.util.concurrent.ExecutorService;
- import java.util.concurrent.Executors;
- public class Mapper
- {
- private int fragmentSize;
- private int threadNumber;
- private float similarityMargin;
- private String fileName;
- private ArrayList generatedHashes;
- public Mapper(String fileName, int fragmentSize, float similarityMargin, int threadNumber)
- {
- this.fileName = fileName;
- this.fragmentSize = fragmentSize;
- this.similarityMargin = similarityMargin;
- this.threadNumber = threadNumber;
- }
- public void MapFragments()
- {
- try
- {
- File file = new File(fileName);
- BufferedReader reader = new BufferedReader(new FileReader(fileName));
- long fileSize = file.length();
- System.out.println(fileSize + " " + fileName);
- ExecutorService executor = Executors.newFixedThreadPool(threadNumber);
- long fileOffset;
- for (int i = 0; i < fileSize/fragmentSize; i++)
- {
- fileOffset = i*fragmentSize;
- Runnable worker = new Map(fileName, fileOffset, fragmentSize);
- executor.execute(worker);
- }
- if(fileSize%fragmentSize != 0)
- {
- fileOffset = fileSize - fragmentSize;
- Runnable worker = new Map(fileName, fileOffset, fileSize%fragmentSize);
- executor.execute(worker);
- }
- executor.shutdown();
- while (!executor.isTerminated()) {}
- System.out.println("Finished all threads");
- }
- catch(Exception e)
- {
- System.out.println(e.getMessage());
- }
- }
- }
- package mapreduce;
- import java.io.RandomAccessFile;
- import java.util.HashMap;
- import java.util.Iterator;
- import java.util.StringTokenizer;
- public class Map implements Runnable
- {
- String documentName;
- long documentOffset;
- long fragmentSize;
- HashMap<String, Integer> wordList= new HashMap<String, Integer>();
- String fragment;
- String separators = " ;:/?~\\.,><~`[]{}()!@#$%^&-_+'=*\"| \n";
- public Map(String documentName, long documentOffset, long fragmentSize)
- {
- this.documentName = documentName;
- this.documentOffset = documentOffset;
- this.fragmentSize = fragmentSize;
- }
- @Override
- public void run()
- {
- try
- {
- RandomAccessFile file = new RandomAccessFile(documentName, "r");
- StringBuffer currentFragment = new StringBuffer();
- if(documentOffset==0) documentOffset++;
- file.seek(documentOffset-1);
- char myByte = ' ';
- for(int i=1;i<=fragmentSize;i++)
- {
- myByte = (char) file.readByte();
- currentFragment.append(myByte);
- }
- try
- {
- while(!separators.contains(Character.toString(myByte)) )
- {
- myByte = (char) file.readByte();
- currentFragment.append(myByte);
- }
- }
- catch(Exception e)
- {
- System.out.println(e.getMessage());
- }
- fragment = currentFragment.toString();
- while( !separators.contains(Character.toString(fragment.charAt(0))) )
- {
- fragment = fragment.substring(1);
- }
- System.out.println("\n"+fragment+"\n");
- }
- catch(Exception e)
- {
- System.out.println(e.getMessage());
- }
- if(fragment != null)
- {
- EvaluateFragment();
- }
- }
- void EvaluateFragment()
- {
- StringTokenizer fragmentTokens = new StringTokenizer(fragment," ;:/?~\\.,><~`[]{}()!@#$%^&-_+'=*\"| \n");
- while(fragmentTokens.hasMoreTokens())
- {
- String token = fragmentTokens.nextToken();
- if(!wordList.containsKey(token))
- {
- wordList.put(token, 1);
- }
- else
- {
- int nr = wordList.get(token);
- wordList.put(token, nr+1);
- }
- }
- for(String key: wordList.keySet())
- {
- System.out.println(key +" :: "+ wordList.get(key));
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement