Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.io.BufferedReader;
- import java.io.FileReader;
- import java.io.IOException;
- import java.util.*;
- public class HeavyHitters {
- /**
- * Do NOT change these constants
- */
- // Must use RAND to randomly choose e and g for the hash functions.
- // The order to choose $e_j$ and $g_j$ is fixed:
- // first choose $e_1$, then $g_1$, then choose $e_2$, then $g_2$, ..., and finally choose $e_4$ and $g_4$
- // Otherwise, you will NOT pass the tests.
- private static final Random RAND = new Random(33);
- // Total number of elements in a stream provided.
- private static final int TOTAL_N = 87925;
- // Threshold percentage to determine heavy hitters
- private static final double THRESH_PERCENT = 0.01;
- // Fixed prime number p to be used in the hash functions
- private static final int HASH_PRIME = 10007;
- // Number of hash tables used in each trail
- private static final int NUM_HASH_FUNCS = 4;
- // Number of buckets in each hash table
- private static final int NUM_BUCKETS = 256;
- // Number of trials to run
- private static final int NUM_TRIALS = 10;
- /**
- * Count-Min Sketch Data Structure
- */
- static class CountMinSketch {
- int[][] table;
- int[] e;
- int[] g;
- CountMinSketch() {
- // Chose e and g for each hash table
- table = new int[NUM_HASH_FUNCS][NUM_BUCKETS];
- for(int i = 0; i < NUM_HASH_FUNCS; i++) {
- for(int j = 0; j < NUM_BUCKETS; j++) {
- table[i][j] = 0;
- }
- }
- e = new int[NUM_HASH_FUNCS];
- g = new int[NUM_HASH_FUNCS];
- for(int i = 0; i < NUM_HASH_FUNCS; i++) {
- e[i] = RAND.nextInt(HASH_PRIME-1) + 1;
- g[i] = RAND.nextInt(HASH_PRIME);
- }
- }
- /**
- * Count-Min Sketch update
- * @param element input element
- */
- void inc(int element) {
- for(int i = 0; i < NUM_HASH_FUNCS; i++) {
- table[i][hash(element, e[i], g[i])] = table[i][hash(element, e[i], g[i])] + 1;
- }
- }
- /**
- * Conservative update
- * @param element input element
- */
- void incConserv(int element) {
- int lowCount = Integer.MAX_VALUE;
- for(int i = 0; i < NUM_HASH_FUNCS; i++) {
- lowCount = Math.min(lowCount, table[i][hash(element, e[i], g[i])]);
- }
- for(int i = 0; i < NUM_HASH_FUNCS; i++) {
- if(table[i][hash(element, e[i], g[i])] == lowCount) {
- table[i][hash(element, e[i], g[i])] = table[i][hash(element, e[i], g[i])] + 1;
- }
- }
- }
- /**
- * Count-Min Sketch count
- * @param element input element
- * @return current estimated count of element
- */
- int count(int element) {
- int minCount = Integer.MAX_VALUE;
- for(int i = 0; i < NUM_HASH_FUNCS; i++) {
- minCount = Math.min(minCount, table[i][hash(element, e[i], g[i])]);
- }
- return minCount;
- }
- /**
- * Hashes an element to a hash value given e and f.
- * Remember, each hash table chooses their (e, g) pair independently.
- *
- * @param element element from a data stream to be hashed
- * @param e randomly chosen integer, 1 <= e <= p-1
- * @param f randomly chosen integer, 0 <= g <= p-1
- * @return hash value of element
- */
- private int hash(int element, int e, int g) {
- return (((e*element + g) % HASH_PRIME) % NUM_BUCKETS);
- }
- }
- public static void main(String[] args) {
- /** Do NOT modify filename and isConserv */
- String filename = args[0];
- // If isConserv is true, do conservative updates; otherwise, regular updates.
- boolean isConserv = Integer.parseInt(args[1]) == 1;
- // estimate the number of heavy hitters and count of 9050
- /** The following skeleton code does mostly reading from a file line by line,
- * feel free to add your code anywhere down below */
- int totalHeavyHitters = 0;
- int totalFreq = 0;
- for (int trial = 1; trial <= NUM_TRIALS; trial++) {
- FileReader reader = null;
- try {
- reader = new FileReader(filename);
- BufferedReader bufferedReader = new BufferedReader(reader);
- CountMinSketch sketch = new CountMinSketch();
- Set<Integer> heavyHitters = new HashSet<Integer>();
- String line;
- while ((line = bufferedReader.readLine()) != null) {
- int curElement = Integer.parseInt(line);
- if(isConserv) {
- sketch.incConserv(curElement);
- } else {
- sketch.inc(curElement);
- }
- if(sketch.count(curElement) >= THRESH_PERCENT * TOTAL_N) {
- heavyHitters.add(curElement);
- }
- }
- int freq = sketch.count(9050);
- int heavyHittersCount = heavyHitters.size();
- totalFreq += freq;
- totalHeavyHitters += heavyHittersCount;
- System.out.println("Trial " + trial + ", Heavy Hitters Count: " + heavyHittersCount + ", 9050 Count: " + freq);
- } catch (IOException e) {
- e.printStackTrace();
- } finally {
- if (reader != null) {
- try {
- reader.close();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- }
- }
- double avgHeavyHitters = (totalHeavyHitters * 1.0) / NUM_TRIALS;
- double avgFreq = (totalFreq * 1.0) / NUM_TRIALS;
- System.out.println("Average" + ", Heavy Hitters Count: " + avgHeavyHitters + ", 9050 Count: " + avgFreq);
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement