Advertisement
Guest User

topk

a guest
Feb 21st, 2020
116
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.02 KB | None | 0 0
  1. /*TopK.cpp: finds k highest weight datapoints in stream. Uses heapqueues to organize datapoints
  2. */
  3.  
  4. #include "TopK.h"
  5. #include "Testing/TopKTests.h"
  6. #include "HeapPQueue.h"
  7. #include "stack.h"
  8. #include "vector.h"
  9. using namespace std;
  10.  
  11. //Function: topK
  12. //Parameters: istream& stream : stream of Datapoints, int k : number of elements to acquire
  13.  
  14. Vector<DataPoint> topK(istream& stream, int k) {
  15. HeapPQueue tempQ;
  16. if (k == 0){
  17. return {};
  18. }
  19. for (DataPoint pt; stream >> pt; ) { //not sure if this actually iterates over the stream MAKE sure to CHECK THIS
  20. if (tempQ.size() < k){
  21. tempQ.enqueue(pt);
  22. }
  23. else{
  24. if (tempQ.peek().weight < pt.weight){
  25. tempQ.dequeue();
  26. tempQ.enqueue(pt);
  27. }
  28. }
  29. }
  30. Stack<DataPoint> notreverse;
  31. int a = tempQ.size();
  32. for (int i = 0; i < a; i++){
  33. notreverse.push(tempQ.dequeue());
  34. }
  35. Vector<DataPoint> reverse;
  36. int b = notreverse.size();
  37. for (int j = 0; j < b; j++){
  38. reverse.add(notreverse.pop());
  39. }
  40. return reverse;
  41. }
  42.  
  43.  
  44. /* * * * * * Test Cases Below This Point * * * * * */
  45.  
  46. /* Helper function that, given a list of data points, produces a stream from them. */
  47. stringstream asStream(const Vector<DataPoint>& dataPoints) {
  48. stringstream result;
  49. for (const auto& pt: dataPoints) {
  50. result << pt;
  51. }
  52. return result;
  53. }
  54.  
  55. /* TODO: Add your own custom tests here! */
  56.  
  57. ADD_TEST("Stream contains only duplicate elements") {
  58. Vector<DataPoint> vec = {
  59. { "" , 5 },
  60. { "" , 5 },
  61. { "" , 5 },
  62. { "" , 5 },
  63. { "" , 5 },
  64. { "" , 5 },
  65. { "" , 5 }
  66. };
  67. auto stream = asStream(vec);
  68. Vector<DataPoint> expected = { vec[0] };
  69. EXPECT_EQUAL(topK(stream, 1), expected);
  70.  
  71. stream = asStream(vec);
  72. expected = { vec[0], vec[0] };
  73. EXPECT_EQUAL(topK(stream, 2), expected);
  74. }
  75.  
  76.  
  77. /* * * * * Provided Tests Below This Point * * * * */
  78.  
  79. /* Constant used for sizing the tests below this point. */
  80. const int kMany = 100000;
  81.  
  82. ADD_TEST("Provided Test: Stream 0 elements, ask for top 0") {
  83. auto stream = asStream({});
  84. Vector<DataPoint> expected = {};
  85. EXPECT_EQUAL(topK(stream, 0), expected);
  86. }
  87.  
  88. ADD_TEST("Provided Test: Stream 0 elements, ask for top 1") {
  89. auto stream = asStream({});
  90. Vector<DataPoint> expected = {};
  91. EXPECT_EQUAL(topK(stream, 1), expected);
  92. }
  93.  
  94. ADD_TEST("Provided Test: Stream 0 elements, ask for top many") {
  95. auto stream = asStream({});
  96. Vector<DataPoint> expected = {};
  97. EXPECT_EQUAL(topK(stream, kMany), expected);
  98. }
  99.  
  100. ADD_TEST("Provided Test: Stream 1 element, ask for top 0") {
  101. auto stream = asStream({ { "A" , 1 } });
  102. Vector<DataPoint> expected = {};
  103. EXPECT_EQUAL(topK(stream, 0), expected);
  104. }
  105.  
  106. ADD_TEST("Provided Test: Stream 1 element, ask for top 1") {
  107. auto stream = asStream({ { "A" , 1 } });
  108. Vector<DataPoint> expected = { { "A" , 1 } };
  109. EXPECT_EQUAL(topK(stream, 1), expected);
  110. }
  111.  
  112. ADD_TEST("Provided Test: Stream 1 element, ask for top many") {
  113. auto stream = asStream({ { "A" , 1 } });
  114. Vector<DataPoint> expected = { { "A" , 1 } };
  115. EXPECT_EQUAL(topK(stream, kMany), expected);
  116. }
  117.  
  118. ADD_TEST("Provided Test: Works in a simple case.") {
  119. /* Build a stream with some simple elements in it. */
  120. auto stream = asStream({
  121. { "A", 1 }, { "B", 2 }, { "C", 3 }, { "D", 4 }
  122. });
  123.  
  124. /* What we should see. */
  125. Vector<DataPoint> expected = {
  126. { "D", 4 }, { "C", 3 }, { "B", 2 }
  127. };
  128.  
  129. EXPECT(topK(stream, 3) == expected);
  130. }
  131.  
  132. ADD_TEST("Provided Test: Stream contains duplicate elements") {
  133. Vector<DataPoint> vec = {
  134. { "" , 1 },
  135. { "" , 3 },
  136. { "" , 2 },
  137. { "" , 1 },
  138. { "" , 5 },
  139. { "" , 4 },
  140. { "" , 2 },
  141. { "" , 3 },
  142. { "" , 4 },
  143. { "" , 5 },
  144. };
  145. auto stream = asStream(vec);
  146. Vector<DataPoint> expected = { vec[4] };
  147. EXPECT_EQUAL(topK(stream, 1), expected);
  148.  
  149. stream = asStream(vec);
  150. expected = { vec[4], vec[4] };
  151. EXPECT_EQUAL(topK(stream, 2), expected);
  152.  
  153. stream = asStream(vec);
  154. expected = { vec[4], vec[4], vec[5] };
  155. EXPECT_EQUAL(topK(stream, 3), expected);
  156.  
  157. stream = asStream(vec);
  158. expected = { vec[4], vec[4], vec[5], vec[5] };
  159. EXPECT_EQUAL(topK(stream, 4), expected);
  160.  
  161. stream = asStream(vec);
  162. expected = { vec[4], vec[4], vec[5], vec[5], vec[1] };
  163. EXPECT_EQUAL(topK(stream, 5), expected);
  164. }
  165.  
  166. ADD_TEST("Provided Test: Stream contains negative elements") {
  167. Vector<DataPoint> vec = {
  168. { "" , 1 },
  169. { "" , 3 },
  170. { "" , 2 },
  171. { "" , -1 },
  172. { "" , -5 },
  173. { "" , 4 },
  174. { "" , -2 },
  175. { "" , 3 },
  176. { "" , -4 },
  177. { "" , 5 },
  178. };
  179. auto stream = asStream(vec);
  180. Vector<DataPoint> expected = { vec[9] };
  181. EXPECT_EQUAL(topK(stream, 1), expected);
  182.  
  183. stream = asStream(vec);
  184. expected = { vec[9], vec[5] };
  185. EXPECT_EQUAL(topK(stream, 2), expected);
  186.  
  187. stream = asStream(vec);
  188. expected = { vec[9], vec[5], vec[1] };
  189. EXPECT_EQUAL(topK(stream, 3), expected);
  190.  
  191. stream = asStream(vec);
  192. expected = { vec[9], vec[5], vec[1], vec[1] };
  193. EXPECT_EQUAL(topK(stream, 4), expected);
  194.  
  195. stream = asStream(vec);
  196. expected = { vec[9], vec[5], vec[1], vec[1], vec[2] };
  197. EXPECT_EQUAL(topK(stream, 5), expected);
  198. }
  199.  
  200. ADD_TEST("Provided Test: Stream many elements, ask for top 0") {
  201. Vector<DataPoint> vec;
  202. for (int i = 0; i < kMany; i++) vec.add({ "", i });
  203. auto stream = asStream(vec);
  204. Vector<DataPoint> expected = {};
  205. EXPECT_EQUAL(topK(stream, 0), expected);
  206. }
  207.  
  208. ADD_TEST("Provided Test: Stream many elements, ask for top 1") {
  209. Vector<DataPoint> vec;
  210. for (int i = 0; i < kMany; i++) vec.add({ "", i });
  211. auto stream = asStream(vec);
  212. Vector<DataPoint> expected = { { "" , kMany - 1 } };
  213. EXPECT_EQUAL(topK(stream, 1), expected);
  214. }
  215.  
  216. ADD_TEST("Provided Test: Stream many elements, ask for top 5") {
  217. Vector<DataPoint> vec;
  218. for (int i = 0; i < kMany; i++) vec.add({ "", i });
  219. auto stream = asStream(vec);
  220. Vector<DataPoint> expected = {
  221. { "" , kMany - 1 },
  222. { "" , kMany - 2 },
  223. { "" , kMany - 3 },
  224. { "" , kMany - 4 },
  225. { "" , kMany - 5 }
  226. };
  227. EXPECT_EQUAL(topK(stream, 5), expected);
  228. }
  229.  
  230. ADD_TEST("Provided Test: Stress test") {
  231. Vector<int> sorted;
  232. Vector<DataPoint> points;
  233. for (int i = 0; i < 10000; i++) {
  234. int weight = randomInteger(0, 100000);
  235. sorted.add(weight);
  236. points.add({ "" , weight});
  237. }
  238.  
  239. auto stream = asStream(points);
  240. sort(sorted.begin(), sorted.end(), greater<int>());
  241. auto result = topK(stream, 10);
  242.  
  243. EXPECT_EQUAL(result.size(), 10);
  244. for (int i = 0; i < 10; i++) {
  245. EXPECT_EQUAL(result[i].weight, sorted[i]);
  246. }
  247. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement