Advertisement
Guest User

Untitled

a guest
Nov 18th, 2018
91
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.00 KB | None | 0 0
  1. import java.io.BufferedReader;
  2. import java.io.FileReader;
  3. import java.io.IOException;
  4. import java.util.*;
  5.  
  6. public class HeavyHitters {
  7. /**
  8. * Do NOT change these constants
  9. */
  10. // Must use RAND to randomly choose e and g for the hash functions.
  11. // The order to choose $e_j$ and $g_j$ is fixed:
  12. // first choose $e_1$, then $g_1$, then choose $e_2$, then $g_2$, ..., and finally choose $e_4$ and $g_4$
  13. // Otherwise, you will NOT pass the tests.
  14. private static final Random RAND = new Random(33);
  15.  
  16. // Total number of elements in a stream provided.
  17. private static final int TOTAL_N = 87925;
  18.  
  19. // Threshold percentage to determine heavy hitters
  20. private static final double THRESH_PERCENT = 0.01;
  21.  
  22. // Fixed prime number p to be used in the hash functions
  23. private static final int HASH_PRIME = 10007;
  24.  
  25. // Number of hash tables used in each trail
  26. private static final int NUM_HASH_FUNCS = 4;
  27.  
  28. // Number of buckets in each hash table
  29. private static final int NUM_BUCKETS = 256;
  30.  
  31. // Number of trials to run
  32. private static final int NUM_TRIALS = 10;
  33.  
  34.  
  35. /**
  36. * Count-Min Sketch Data Structure
  37. */
  38. static class CountMinSketch {
  39. int[][] table;
  40. int[] e;
  41. int[] g;
  42.  
  43. CountMinSketch() {
  44. // Chose e and g for each hash table
  45. table = new int[NUM_HASH_FUNCS][NUM_BUCKETS];
  46. for(int i = 0; i < NUM_HASH_FUNCS; i++) {
  47. for(int j = 0; j < NUM_BUCKETS; j++) {
  48. table[i][j] = 0;
  49. }
  50. }
  51. e = new int[NUM_HASH_FUNCS];
  52. g = new int[NUM_HASH_FUNCS];
  53. for(int i = 0; i < NUM_HASH_FUNCS; i++) {
  54. e[i] = RAND.nextInt(HASH_PRIME-1) + 1;
  55. g[i] = RAND.nextInt(HASH_PRIME);
  56. }
  57. }
  58.  
  59. /**
  60. * Count-Min Sketch update
  61. * @param element input element
  62. */
  63. void inc(int element) {
  64. for(int i = 0; i < NUM_HASH_FUNCS; i++) {
  65. table[i][hash(element, e[i], g[i])] = table[i][hash(element, e[i], g[i])] + 1;
  66. }
  67. }
  68.  
  69. /**
  70. * Conservative update
  71. * @param element input element
  72. */
  73. void incConserv(int element) {
  74. int lowCount = Integer.MAX_VALUE;
  75. for(int i = 0; i < NUM_HASH_FUNCS; i++) {
  76. lowCount = Math.min(lowCount, table[i][hash(element, e[i], g[i])]);
  77. }
  78. for(int i = 0; i < NUM_HASH_FUNCS; i++) {
  79. if(table[i][hash(element, e[i], g[i])] == lowCount) {
  80. table[i][hash(element, e[i], g[i])] = table[i][hash(element, e[i], g[i])] + 1;
  81. }
  82. }
  83. }
  84.  
  85. /**
  86. * Count-Min Sketch count
  87. * @param element input element
  88. * @return current estimated count of element
  89. */
  90. int count(int element) {
  91. int minCount = Integer.MAX_VALUE;
  92. for(int i = 0; i < NUM_HASH_FUNCS; i++) {
  93. minCount = Math.min(minCount, table[i][hash(element, e[i], g[i])]);
  94. }
  95. return minCount;
  96. }
  97.  
  98. /**
  99. * Hashes an element to a hash value given e and f.
  100. * Remember, each hash table chooses their (e, g) pair independently.
  101. *
  102. * @param element element from a data stream to be hashed
  103. * @param e randomly chosen integer, 1 <= e <= p-1
  104. * @param f randomly chosen integer, 0 <= g <= p-1
  105. * @return hash value of element
  106. */
  107. private int hash(int element, int e, int g) {
  108. return (((e*element + g) % HASH_PRIME) % NUM_BUCKETS);
  109. }
  110. }
  111.  
  112.  
  113. public static void main(String[] args) {
  114. /** Do NOT modify filename and isConserv */
  115. String filename = args[0];
  116. // If isConserv is true, do conservative updates; otherwise, regular updates.
  117. boolean isConserv = Integer.parseInt(args[1]) == 1;
  118.  
  119. // estimate the number of heavy hitters and count of 9050
  120. /** The following skeleton code does mostly reading from a file line by line,
  121. * feel free to add your code anywhere down below */
  122. int totalHeavyHitters = 0;
  123. int totalFreq = 0;
  124. for (int trial = 1; trial <= NUM_TRIALS; trial++) {
  125. FileReader reader = null;
  126. try {
  127. reader = new FileReader(filename);
  128. BufferedReader bufferedReader = new BufferedReader(reader);
  129. CountMinSketch sketch = new CountMinSketch();
  130. Set<Integer> heavyHitters = new HashSet<Integer>();
  131. String line;
  132. while ((line = bufferedReader.readLine()) != null) {
  133. int curElement = Integer.parseInt(line);
  134. if(isConserv) {
  135. sketch.incConserv(curElement);
  136. } else {
  137. sketch.inc(curElement);
  138. }
  139. if(sketch.count(curElement) >= THRESH_PERCENT * TOTAL_N) {
  140. heavyHitters.add(curElement);
  141. }
  142. }
  143. int freq = sketch.count(9050);
  144. int heavyHittersCount = heavyHitters.size();
  145. totalFreq += freq;
  146. totalHeavyHitters += heavyHittersCount;
  147.  
  148.  
  149. System.out.println("Trial " + trial + ", Heavy Hitters Count: " + heavyHittersCount + ", 9050 Count: " + freq);
  150. } catch (IOException e) {
  151. e.printStackTrace();
  152. } finally {
  153. if (reader != null) {
  154. try {
  155. reader.close();
  156. } catch (IOException e) {
  157. e.printStackTrace();
  158. }
  159. }
  160. }
  161. }
  162.  
  163. double avgHeavyHitters = (totalHeavyHitters * 1.0) / NUM_TRIALS;
  164. double avgFreq = (totalFreq * 1.0) / NUM_TRIALS;
  165. System.out.println("Average" + ", Heavy Hitters Count: " + avgHeavyHitters + ", 9050 Count: " + avgFreq);
  166. }
  167. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement