Advertisement
Guest User

Untitled

a guest
Nov 23rd, 2014
131
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.79 KB | None | 0 0
  1. package mapreduce;
  2.  
  3. import java.io.*;
  4. import java.util.*;
  5. import java.io.BufferedReader;
  6. import java.util.concurrent.ExecutorService;
  7. import java.util.concurrent.Executors;
  8.  
  9. public class Mapper
  10. {
  11. private int fragmentSize;
  12. private int threadNumber;
  13. private float similarityMargin;
  14. private String fileName;
  15.  
  16. private ArrayList generatedHashes;
  17.  
  18. public Mapper(String fileName, int fragmentSize, float similarityMargin, int threadNumber)
  19. {
  20. this.fileName = fileName;
  21. this.fragmentSize = fragmentSize;
  22. this.similarityMargin = similarityMargin;
  23. this.threadNumber = threadNumber;
  24. }
  25.  
  26. public void MapFragments()
  27. {
  28. try
  29. {
  30. File file = new File(fileName);
  31. BufferedReader reader = new BufferedReader(new FileReader(fileName));
  32.  
  33. long fileSize = file.length();
  34. System.out.println(fileSize + " " + fileName);
  35.  
  36. ExecutorService executor = Executors.newFixedThreadPool(threadNumber);
  37.  
  38. long fileOffset;
  39.  
  40. for (int i = 0; i < fileSize/fragmentSize; i++)
  41. {
  42. fileOffset = i*fragmentSize;
  43.  
  44. Runnable worker = new Map(fileName, fileOffset, fragmentSize);
  45. executor.execute(worker);
  46. }
  47.  
  48. if(fileSize%fragmentSize != 0)
  49. {
  50. fileOffset = fileSize - fragmentSize;
  51.  
  52. Runnable worker = new Map(fileName, fileOffset, fileSize%fragmentSize);
  53. executor.execute(worker);
  54. }
  55.  
  56. executor.shutdown();
  57.  
  58. while (!executor.isTerminated()) {}
  59.  
  60. System.out.println("Finished all threads");
  61. }
  62. catch(Exception e)
  63. {
  64. System.out.println(e.getMessage());
  65. }
  66. }
  67. }
  68.  
  69.  
  70. package mapreduce;
  71.  
  72. import java.io.RandomAccessFile;
  73. import java.util.HashMap;
  74. import java.util.Iterator;
  75. import java.util.StringTokenizer;
  76.  
  77. public class Map implements Runnable
  78. {
  79. String documentName;
  80. long documentOffset;
  81. long fragmentSize;
  82.  
  83. HashMap<String, Integer> wordList= new HashMap<String, Integer>();
  84.  
  85. String fragment;
  86.  
  87. String separators = " ;:/?~\\.,><~`[]{}()!@#$%^&-_+'=*\"| \n";
  88.  
  89. public Map(String documentName, long documentOffset, long fragmentSize)
  90. {
  91. this.documentName = documentName;
  92. this.documentOffset = documentOffset;
  93. this.fragmentSize = fragmentSize;
  94. }
  95.  
  96. @Override
  97. public void run()
  98. {
  99. try
  100. {
  101. RandomAccessFile file = new RandomAccessFile(documentName, "r");
  102. StringBuffer currentFragment = new StringBuffer();
  103.  
  104. if(documentOffset==0) documentOffset++;
  105.  
  106. file.seek(documentOffset-1);
  107.  
  108. char myByte = ' ';
  109.  
  110. for(int i=1;i<=fragmentSize;i++)
  111. {
  112. myByte = (char) file.readByte();
  113.  
  114. currentFragment.append(myByte);
  115. }
  116.  
  117. try
  118. {
  119. while(!separators.contains(Character.toString(myByte)) )
  120. {
  121. myByte = (char) file.readByte();
  122. currentFragment.append(myByte);
  123. }
  124. }
  125. catch(Exception e)
  126. {
  127. System.out.println(e.getMessage());
  128. }
  129. fragment = currentFragment.toString();
  130.  
  131. while( !separators.contains(Character.toString(fragment.charAt(0))) )
  132. {
  133. fragment = fragment.substring(1);
  134. }
  135.  
  136. System.out.println("\n"+fragment+"\n");
  137. }
  138. catch(Exception e)
  139. {
  140. System.out.println(e.getMessage());
  141. }
  142.  
  143. if(fragment != null)
  144. {
  145. EvaluateFragment();
  146. }
  147. }
  148.  
  149. void EvaluateFragment()
  150. {
  151. StringTokenizer fragmentTokens = new StringTokenizer(fragment," ;:/?~\\.,><~`[]{}()!@#$%^&-_+'=*\"| \n");
  152. while(fragmentTokens.hasMoreTokens())
  153. {
  154. String token = fragmentTokens.nextToken();
  155.  
  156. if(!wordList.containsKey(token))
  157. {
  158. wordList.put(token, 1);
  159. }
  160. else
  161. {
  162. int nr = wordList.get(token);
  163.  
  164. wordList.put(token, nr+1);
  165. }
  166. }
  167.  
  168. for(String key: wordList.keySet())
  169. {
  170. System.out.println(key +" :: "+ wordList.get(key));
  171.  
  172. }
  173.  
  174. }
  175. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement