Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*TopK.cpp: finds k highest weight datapoints in stream. Uses heapqueues to organize datapoints
- */
- #include "TopK.h"
- #include "Testing/TopKTests.h"
- #include "HeapPQueue.h"
- #include "stack.h"
- #include "vector.h"
- using namespace std;
- //Function: topK
- //Parameters: istream& stream : stream of Datapoints, int k : number of elements to acquire
- Vector<DataPoint> topK(istream& stream, int k) {
- HeapPQueue tempQ;
- if (k == 0){
- return {};
- }
- for (DataPoint pt; stream >> pt; ) { //not sure if this actually iterates over the stream MAKE sure to CHECK THIS
- if (tempQ.size() < k){
- tempQ.enqueue(pt);
- }
- else{
- if (tempQ.peek().weight < pt.weight){
- tempQ.dequeue();
- tempQ.enqueue(pt);
- }
- }
- }
- Stack<DataPoint> notreverse;
- int a = tempQ.size();
- for (int i = 0; i < a; i++){
- notreverse.push(tempQ.dequeue());
- }
- Vector<DataPoint> reverse;
- int b = notreverse.size();
- for (int j = 0; j < b; j++){
- reverse.add(notreverse.pop());
- }
- return reverse;
- }
- /* * * * * * Test Cases Below This Point * * * * * */
- /* Helper function that, given a list of data points, produces a stream from them. */
- stringstream asStream(const Vector<DataPoint>& dataPoints) {
- stringstream result;
- for (const auto& pt: dataPoints) {
- result << pt;
- }
- return result;
- }
- /* TODO: Add your own custom tests here! */
- ADD_TEST("Stream contains only duplicate elements") {
- Vector<DataPoint> vec = {
- { "" , 5 },
- { "" , 5 },
- { "" , 5 },
- { "" , 5 },
- { "" , 5 },
- { "" , 5 },
- { "" , 5 }
- };
- auto stream = asStream(vec);
- Vector<DataPoint> expected = { vec[0] };
- EXPECT_EQUAL(topK(stream, 1), expected);
- stream = asStream(vec);
- expected = { vec[0], vec[0] };
- EXPECT_EQUAL(topK(stream, 2), expected);
- }
- /* * * * * Provided Tests Below This Point * * * * */
- /* Constant used for sizing the tests below this point. */
- const int kMany = 100000;
- ADD_TEST("Provided Test: Stream 0 elements, ask for top 0") {
- auto stream = asStream({});
- Vector<DataPoint> expected = {};
- EXPECT_EQUAL(topK(stream, 0), expected);
- }
- ADD_TEST("Provided Test: Stream 0 elements, ask for top 1") {
- auto stream = asStream({});
- Vector<DataPoint> expected = {};
- EXPECT_EQUAL(topK(stream, 1), expected);
- }
- ADD_TEST("Provided Test: Stream 0 elements, ask for top many") {
- auto stream = asStream({});
- Vector<DataPoint> expected = {};
- EXPECT_EQUAL(topK(stream, kMany), expected);
- }
- ADD_TEST("Provided Test: Stream 1 element, ask for top 0") {
- auto stream = asStream({ { "A" , 1 } });
- Vector<DataPoint> expected = {};
- EXPECT_EQUAL(topK(stream, 0), expected);
- }
- ADD_TEST("Provided Test: Stream 1 element, ask for top 1") {
- auto stream = asStream({ { "A" , 1 } });
- Vector<DataPoint> expected = { { "A" , 1 } };
- EXPECT_EQUAL(topK(stream, 1), expected);
- }
- ADD_TEST("Provided Test: Stream 1 element, ask for top many") {
- auto stream = asStream({ { "A" , 1 } });
- Vector<DataPoint> expected = { { "A" , 1 } };
- EXPECT_EQUAL(topK(stream, kMany), expected);
- }
- ADD_TEST("Provided Test: Works in a simple case.") {
- /* Build a stream with some simple elements in it. */
- auto stream = asStream({
- { "A", 1 }, { "B", 2 }, { "C", 3 }, { "D", 4 }
- });
- /* What we should see. */
- Vector<DataPoint> expected = {
- { "D", 4 }, { "C", 3 }, { "B", 2 }
- };
- EXPECT(topK(stream, 3) == expected);
- }
- ADD_TEST("Provided Test: Stream contains duplicate elements") {
- Vector<DataPoint> vec = {
- { "" , 1 },
- { "" , 3 },
- { "" , 2 },
- { "" , 1 },
- { "" , 5 },
- { "" , 4 },
- { "" , 2 },
- { "" , 3 },
- { "" , 4 },
- { "" , 5 },
- };
- auto stream = asStream(vec);
- Vector<DataPoint> expected = { vec[4] };
- EXPECT_EQUAL(topK(stream, 1), expected);
- stream = asStream(vec);
- expected = { vec[4], vec[4] };
- EXPECT_EQUAL(topK(stream, 2), expected);
- stream = asStream(vec);
- expected = { vec[4], vec[4], vec[5] };
- EXPECT_EQUAL(topK(stream, 3), expected);
- stream = asStream(vec);
- expected = { vec[4], vec[4], vec[5], vec[5] };
- EXPECT_EQUAL(topK(stream, 4), expected);
- stream = asStream(vec);
- expected = { vec[4], vec[4], vec[5], vec[5], vec[1] };
- EXPECT_EQUAL(topK(stream, 5), expected);
- }
- ADD_TEST("Provided Test: Stream contains negative elements") {
- Vector<DataPoint> vec = {
- { "" , 1 },
- { "" , 3 },
- { "" , 2 },
- { "" , -1 },
- { "" , -5 },
- { "" , 4 },
- { "" , -2 },
- { "" , 3 },
- { "" , -4 },
- { "" , 5 },
- };
- auto stream = asStream(vec);
- Vector<DataPoint> expected = { vec[9] };
- EXPECT_EQUAL(topK(stream, 1), expected);
- stream = asStream(vec);
- expected = { vec[9], vec[5] };
- EXPECT_EQUAL(topK(stream, 2), expected);
- stream = asStream(vec);
- expected = { vec[9], vec[5], vec[1] };
- EXPECT_EQUAL(topK(stream, 3), expected);
- stream = asStream(vec);
- expected = { vec[9], vec[5], vec[1], vec[1] };
- EXPECT_EQUAL(topK(stream, 4), expected);
- stream = asStream(vec);
- expected = { vec[9], vec[5], vec[1], vec[1], vec[2] };
- EXPECT_EQUAL(topK(stream, 5), expected);
- }
- ADD_TEST("Provided Test: Stream many elements, ask for top 0") {
- Vector<DataPoint> vec;
- for (int i = 0; i < kMany; i++) vec.add({ "", i });
- auto stream = asStream(vec);
- Vector<DataPoint> expected = {};
- EXPECT_EQUAL(topK(stream, 0), expected);
- }
- ADD_TEST("Provided Test: Stream many elements, ask for top 1") {
- Vector<DataPoint> vec;
- for (int i = 0; i < kMany; i++) vec.add({ "", i });
- auto stream = asStream(vec);
- Vector<DataPoint> expected = { { "" , kMany - 1 } };
- EXPECT_EQUAL(topK(stream, 1), expected);
- }
- ADD_TEST("Provided Test: Stream many elements, ask for top 5") {
- Vector<DataPoint> vec;
- for (int i = 0; i < kMany; i++) vec.add({ "", i });
- auto stream = asStream(vec);
- Vector<DataPoint> expected = {
- { "" , kMany - 1 },
- { "" , kMany - 2 },
- { "" , kMany - 3 },
- { "" , kMany - 4 },
- { "" , kMany - 5 }
- };
- EXPECT_EQUAL(topK(stream, 5), expected);
- }
- ADD_TEST("Provided Test: Stress test") {
- Vector<int> sorted;
- Vector<DataPoint> points;
- for (int i = 0; i < 10000; i++) {
- int weight = randomInteger(0, 100000);
- sorted.add(weight);
- points.add({ "" , weight});
- }
- auto stream = asStream(points);
- sort(sorted.begin(), sorted.end(), greater<int>());
- auto result = topK(stream, 10);
- EXPECT_EQUAL(result.size(), 10);
- for (int i = 0; i < 10; i++) {
- EXPECT_EQUAL(result[i].weight, sorted[i]);
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement