Advertisement
Guest User

Untitled

a guest
Sep 3rd, 2015
70
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.73 KB | None | 0 0
  1. import java.io.File;
  2. import java.lang.Integer;
  3. import java.lang.System;
  4. import java.lang.reflect.Array;
  5. import java.security.MessageDigest;
  6. import java.util.Comparator;
  7. import java.security.NoSuchAlgorithmException;
  8. import java.util.*;
  9. import java.io.BufferedReader;
  10. import java.io.FileReader;
  11. import java.util.InputMismatchException;
  12.  
  13. public class MP1 {
  14. Random generator;
  15. String userName;
  16. String inputFileName;
  17.  
  18. static final String delimiters = " \t,;.?!-:@[](){}_*/";
  19. static final String[] stopWordsArray = {"i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours",
  20. "yourself", "yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself", "it", "its",
  21. "itself", "they", "them", "their", "theirs", "themselves", "what", "which", "who", "whom", "this", "that",
  22. "these", "those", "am", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "having",
  23. "do", "does", "did", "doing", "a", "an", "the", "and", "but", "if", "or", "because", "as", "until", "while",
  24. "of", "at", "by", "for", "with", "about", "against", "between", "into", "through", "during", "before",
  25. "after", "above", "below", "to", "from", "up", "down", "in", "out", "on", "off", "over", "under", "again",
  26. "further", "then", "once", "here", "there", "when", "where", "why", "how", "all", "any", "both", "each",
  27. "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", "than",
  28. "too", "very", "s", "t", "can", "will", "just", "don", "should", "now"};
  29.  
  30. static final Set<String> stopWordsSet = new HashSet<String>(Arrays.asList(stopWordsArray));
  31.  
  32. void initialRandomGenerator(String seed) throws NoSuchAlgorithmException {
  33. MessageDigest messageDigest = MessageDigest.getInstance("SHA");
  34. messageDigest.update(seed.toLowerCase().trim().getBytes());
  35. byte[] seedMD5 = messageDigest.digest();
  36.  
  37. long longSeed = 0;
  38. for (int i = 0; i < seedMD5.length; i++) {
  39. longSeed += ((long) seedMD5[i] & 0xffL) << (8 * i);
  40. }
  41.  
  42. this.generator = new Random(longSeed);
  43. }
  44.  
  45. // not using this till now
  46. Integer[] getIndexes() throws NoSuchAlgorithmException {
  47. Integer n = 10000;
  48. Integer number_of_lines = 50000;
  49. Integer[] ret = new Integer[n];
  50. this.initialRandomGenerator(this.userName);
  51. for (int i = 0; i < n; i++) {
  52. ret[i] = generator.nextInt(number_of_lines);
  53. }
  54. return ret;
  55. }
  56.  
  57. public MP1(String userName, String inputFileName) {
  58. this.userName = userName;
  59. this.inputFileName = inputFileName;
  60. }
  61.  
  62. public String[] process() throws Exception {
  63. String[] ret = new String[20];
  64. HashMap<String, Integer> counter = new HashMap<String, Integer>();
  65.  
  66. // get the indices for the user id
  67. Integer[] indices = this.getIndexes();
  68. Arrays.sort(indices);
  69.  
  70. // reading the file here
  71. List<String> lines = new ArrayList<String>();
  72. BufferedReader br = new BufferedReader(new FileReader(inputFileName));
  73. String line = br.readLine();
  74. while (line != null) {
  75. lines.add(line);
  76. line = br.readLine();
  77. }
  78.  
  79. String[] validLines = new String[indices.length];
  80. for (int i=0; i < validLines.length; i++) {
  81. validLines[i] = lines.get(indices[i]);
  82. }
  83.  
  84.  
  85. for (String topic: validLines) {
  86. ArrayList<String> words = splitWords(topic);
  87. for (String word: words) {
  88. int count = counter.containsKey(word) ? counter.get(word) + 1 : 1;
  89. counter.put(word, count);
  90. }
  91. }
  92.  
  93. // build a sorted map
  94. // NOTE: Extremely shitty hack to get the first 20
  95. TreeMap<String, Integer> sortedMap = SortByValue(counter);
  96. int index = 0;
  97. for (Map.Entry<String, Integer> entry : sortedMap.entrySet()) {
  98. if (index < 20) {
  99. ret[index] = entry.getKey();
  100. index += 1;
  101. } else {
  102. break;
  103. }
  104. }
  105.  
  106. return ret;
  107. }
  108.  
  109. private TreeMap<String, Integer> SortByValue(HashMap<String, Integer> map) {
  110. ValueComparator vc = new ValueComparator(map);
  111. TreeMap<String, Integer> sortedMap = new TreeMap<String, Integer>(vc);
  112. sortedMap.putAll(map);
  113. return sortedMap;
  114. }
  115.  
  116. public ArrayList<String> splitWords(String title) {
  117. StringTokenizer st = new StringTokenizer(title, delimiters);
  118. ArrayList<String> words = new ArrayList<String>();
  119. while (st.hasMoreTokens()) {
  120. String token = st.nextToken().trim().toLowerCase();
  121. if (!stopWordsSet.contains(token)) {
  122. words.add(token);
  123. }
  124. }
  125. return words;
  126. }
  127.  
  128. public static void main(String[] args) throws Exception{
  129. if (args.length < 1){
  130. System.out.println("MP1 <User ID>");
  131. }
  132. else {
  133. String userName = args[0];
  134. String inputFileName = "./input.txt";
  135. MP1 mp = new MP1(userName, inputFileName);
  136. String[] topItems = mp.process();
  137. for (String item: topItems){
  138. System.out.println(item);
  139. }
  140. }
  141. }
  142. }
  143.  
  144. class ValueComparator implements Comparator<String> {
  145. Map<String, Integer> map;
  146.  
  147. public ValueComparator(Map<String, Integer> base) {
  148. this.map = base;
  149. }
  150.  
  151. public int compare(String a, String b) {
  152. if (map.get(a) > map.get(b)) {
  153. return -1;
  154. } else if (map.get(a) < map.get(b)) {
  155. return 1;
  156. } else { // handle case of inequality
  157. return a.compareTo(b);
  158. }
  159. }
  160. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement