Advertisement
Guest User

Untitled

a guest
Feb 8th, 2016
47
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.93 KB | None | 0 0
  1. /*
  2.  
  3. PageRank Algorithm
  4. Sandeep Gupta
  5.  
  6. Every node must have incoming links.
  7. ------------------------------------
  8. However, you do not need to do this explicitly, as long as d < 1.0.
  9. The (1-d) term that describes the “teleport” quantity will implicitly add outgoing links from every node to every other node.
  10. The d term should be a command-line argument of your tool that we can change.
  11.  
  12. Every node must also have at least one outgoing link.
  13. -----------------------------------------------------
  14. In the event that a node has no outgoing links, create “virtual” outgoing links to every other node in the graph.
  15. For such nodes you will basically distribute all of its outgoing PageRank in the "teleport" mechanism.
  16. Make sure to remove all self edges before processing data.
  17.  
  18. You should implement two different stopping criteria: stop after K iterations,
  19. and stop after every node changes no more than X
  20. (X= (|current_round_value - previous_round_value| / previous_round_value).
  21. The values X and K should be command-line arguments of your tool that we can change.
  22.  
  23. */
  24.  
  25. #include <cassert>
  26. #include <cstdlib>
  27. #include <cstring>
  28. #include <fstream>
  29. #include <iostream>
  30. #include <pthread.h>
  31. #include <sstream>
  32. #include <unordered_map>
  33. #include <stack>
  34. #include <utility>
  35. #include <vector>
  36. #include <algorithm>
  37. #include <cmath>
  38. #include <queue>
  39.  
  40. using namespace std;
  41.  
  42. struct nodeOut {
  43. int PR;
  44. int Xvalue;
  45. vector<int> out;
  46. };
  47.  
  48. // Calculating each sink's contribution
  49. double calculatingSinkContribution(vector<int>& sinksVec, int& numberVertices, const unordered_map<int, double>& rankMap) {
  50. double sinkContribution = 0;
  51. for(unsigned int i = 0; i < sinksVec.size(); i++) {
  52. // Not including self-edges (N - 1)
  53. sinkContribution += ((rankMap[sinksVec[i]])/(numberVertices - 1));
  54.  
  55. }
  56. return sinkContribution
  57. }
  58.  
  59. void accountingForSinkNodes(unordered_map<int, double>& rankMap, unordered_map<int, vector<int>>& outLinksMap,
  60. unordered_map<int, double>& newRankMap, const int& numberVertices, const double& sinkContribution) {
  61.  
  62. for(auto it1 = rankMap.begin(); it1 != rankMap.end(); ++it1) {
  63. // Adding the sink contribution to the new rank
  64. newRankMap[it1->first] += sinkContribution;
  65.  
  66. if(outLinksMap[it1->first].size() == 0) {
  67. // If the current node is a sink node, we don't want to double count
  68. // So we subtract the rank of the sink node we calculated in sinkContribution
  69. newRankMap[it1->first] -= rankMap[it1->first] / (numberVertices - 1);
  70. }
  71. else { // If not in sinksVec
  72. for(unsigned int x = 0; x < outLinksMap[it1->first].size(); x++) {
  73. newRankMap[outLinksMap[it1->first][x]] += (it1->second/outLinksMap[it1->first].size());
  74. }
  75. }
  76.  
  77. }
  78. }
  79.  
  80. int main(int argc, char *argv[]) {
  81. // argv[0] eecs485pa5p
  82. // argv[1] <dvalue>
  83. string argv1(argv[1]);
  84. // argv[2] either -k flag or -converge flag
  85. string flag(argv[2]);
  86. // argv[3] either <numiterations> or <maxchange>
  87. string argv3(argv[3]);
  88. // argv[4] inputnetfile
  89. string inputFileString(argv[4]);
  90. // argv[5] outputnetfile
  91. string outputFileString(argv[5]);
  92. // Place-holder string for each line in file
  93. string fileLine;
  94.  
  95. double dvalue = stod(argv1);
  96. double stopCRIT = stod(argv3);
  97.  
  98. // Create a map of id to ranks
  99. unordered_map<int, double> rankMap;
  100. // Create a map that id to outlinks
  101. unordered_map<int, vector<int>> outLinksMap;
  102. // Create a map for the new rank
  103. unordered_map<int, double> newRankMap;
  104.  
  105. vector<int> sinksVec;
  106.  
  107. ifstream inputFile;
  108. inputFile.open(inputFileString);
  109. getline(inputFile, fileLine);
  110. istringstream iss(fileLine);
  111.  
  112. int numberVertices = 0;
  113.  
  114. // The first word is <eecs485_edges>, we don't care about that
  115. iss >> fileLine;
  116.  
  117. // The input is of the format below, we need to parse it
  118. // and get the nodes
  119. /*
  120. <eecs485_edges>
  121. <eecs485_edge>
  122. <eecs485_from>303</eecs485_from>
  123. <eecs485_to>4177</eecs485_to>
  124. </eecs485_edge>
  125. */
  126. string inString;
  127. string outString;
  128. char firstDelim = '>';
  129. char lastDelim = '<';
  130. int source;
  131. int dest;
  132. int first;
  133. int last;
  134. double initialPR;
  135.  
  136. while(getline(inputFile, fileLine)) {
  137. if(fileLine.substr(0, 14) == "<eecs485_from>") {
  138. first = fileLine.find_first_of(firstDelim);
  139. last = fileLine.find_last_of(lastDelim);
  140. inString = fileLine.substr(first + 1, last - first);
  141. source = stoi(inString);
  142.  
  143. if(newRankMap.find(source) == newRankMap.end()) {
  144. numberVertices++;
  145. newRankMap[source] = 0;
  146. }
  147. }
  148. else if(fileLine.substr(0, 12) == "<eecs485_to>") {
  149. first = fileLine.find_first_of(firstDelim);
  150. last = fileLine.find_last_of(lastDelim);
  151. outString = fileLine.substr(first + 1, last - first);
  152. dest = stoi(outString);
  153.  
  154. if(newRankMap.find(source) == newRankMap.end()) {
  155. numberVertices++;
  156. newRankMap[source] = 0;
  157. }
  158. if(source != dest) {
  159. outLinksMap[source].push_back(dest);
  160. }
  161. }
  162. }
  163. // End of parsing
  164.  
  165. // Calculating initial PageRank value
  166. initialPR = 1/static_cast<double>(numberVertices);
  167. for(auto it = newRankMap.begin(); it != newRankMap.end(); ++it) {
  168. rankMap[it->first] = initialPR;
  169. }
  170.  
  171. // Finding sinks (nodes that do not point to other nodes)
  172. for(auto it = rankMap.begin(); it != rankMap.end(); ++it) {
  173. if(outLinksMap[it->first].size() == 0) {
  174. sinksVec.push_back(it->first);
  175. }
  176. }
  177.  
  178. // -k flag indicates number of iterations (stopCRIT)
  179. if(flag == "-k") {
  180. for(int i = 0; i < stopCRIT; i++) {
  181. // Calculating each sink's contribution
  182. double sinkContribution = calculatingSinkContribution(sinksVec, numberVertices, rankMap);
  183.  
  184. accountingForSinkNodes(rankMap, outLinksMap, newRankMap, numberVertices, sinkContribution);
  185.  
  186. // Final (1 - d)/N + d * NewRank
  187. for(auto it2 = newRankMap.begin(); it2 != newRankMap.end(); ++it2) {
  188. it2->second *= dvalue;
  189. it2->second += ((1 - dvalue)/numberVertices);
  190. rankMap[it2->first] = 0;
  191. }
  192. // We have an updated rank to the nodes and we need to store it somewhere
  193. rankMap.swap(newRankMap);
  194. }
  195. }
  196. // Converge flag indicates calculating PageRank until ONE PR value
  197. // falls below a certain value (stopCRIT)
  198. else if(flag == "-converge") {
  199. double Xval;
  200. bool xBreak = false;
  201. while(!xBreak) {
  202. xBreak = true;
  203. // Calculating each sink's contribution
  204. double sinkContribution = calculatingSinkContribution(sinksVec, numberVertices, rankMap);
  205.  
  206. accountingForSinkNodes(rankMap, outLinksMap, newRankMap, numberVertices, sinkContribution);
  207.  
  208. // Final (1 - d)/N + d * NewRank calculation and X comparison
  209. for(auto it2 = newRankMap.begin(); it2 != newRankMap.end(); ++it2) {
  210.  
  211. it2->second *= dvalue;
  212. it2->second += ((1 - dvalue)/numberVertices);
  213.  
  214. Xval = abs(it2->second - rankMap[it2->first])/rankMap[it2->first];
  215.  
  216. // If a PR value falls below a the critical value
  217. if(Xval > stopCRIT) {
  218. xBreak = false;
  219. }
  220.  
  221. rankMap[it2->first] = 0;
  222. }
  223.  
  224. rankMap.swap(newRankMap);
  225. }
  226. }
  227.  
  228. // Print PageRank values to a file
  229. inputFile.close();
  230. ofstream outFile;
  231. outFile.open(outputFileString);
  232. for(auto it = rankMap.begin(); it != rankMap.end(); ++it) {
  233. outFile << it->first << ", " << it->second << '\n';
  234. }
  235. outFile.close();
  236.  
  237. return 0;
  238. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement