Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- PageRank Algorithm
- Sandeep Gupta
- Every node must have incoming links.
- ------------------------------------
- However, you do not need to do this explicitly, as long as d < 1.0.
- The (1-d) term that describes the “teleport” quantity will implicitly add outgoing links from every node to every other node.
- The d term should be a command-line argument of your tool that we can change.
- Every node must also have at least one outgoing link.
- -----------------------------------------------------
- In the event that a node has no outgoing links, create “virtual” outgoing links to every other node in the graph.
- For such nodes you will basically distribute all of its outgoing PageRank in the "teleport" mechanism.
- Make sure to remove all self edges before processing data.
- You should implement two different stopping criteria: stop after K iterations,
- and stop after every node changes no more than X
- (X= (|current_round_value - previous_round_value| / previous_round_value).
- The values X and K should be command-line arguments of your tool that we can change.
- */
- #include <cassert>
- #include <cstdlib>
- #include <cstring>
- #include <fstream>
- #include <iostream>
- #include <pthread.h>
- #include <sstream>
- #include <unordered_map>
- #include <stack>
- #include <utility>
- #include <vector>
- #include <algorithm>
- #include <cmath>
- #include <queue>
- using namespace std;
- struct nodeOut {
- int PR;
- int Xvalue;
- vector<int> out;
- };
- // Calculating each sink's contribution
- double calculatingSinkContribution(vector<int>& sinksVec, int& numberVertices, const unordered_map<int, double>& rankMap) {
- double sinkContribution = 0;
- for(unsigned int i = 0; i < sinksVec.size(); i++) {
- // Not including self-edges (N - 1)
- sinkContribution += ((rankMap[sinksVec[i]])/(numberVertices - 1));
- }
- return sinkContribution
- }
- void accountingForSinkNodes(unordered_map<int, double>& rankMap, unordered_map<int, vector<int>>& outLinksMap,
- unordered_map<int, double>& newRankMap, const int& numberVertices, const double& sinkContribution) {
- for(auto it1 = rankMap.begin(); it1 != rankMap.end(); ++it1) {
- // Adding the sink contribution to the new rank
- newRankMap[it1->first] += sinkContribution;
- if(outLinksMap[it1->first].size() == 0) {
- // If the current node is a sink node, we don't want to double count
- // So we subtract the rank of the sink node we calculated in sinkContribution
- newRankMap[it1->first] -= rankMap[it1->first] / (numberVertices - 1);
- }
- else { // If not in sinksVec
- for(unsigned int x = 0; x < outLinksMap[it1->first].size(); x++) {
- newRankMap[outLinksMap[it1->first][x]] += (it1->second/outLinksMap[it1->first].size());
- }
- }
- }
- }
- int main(int argc, char *argv[]) {
- // argv[0] eecs485pa5p
- // argv[1] <dvalue>
- string argv1(argv[1]);
- // argv[2] either -k flag or -converge flag
- string flag(argv[2]);
- // argv[3] either <numiterations> or <maxchange>
- string argv3(argv[3]);
- // argv[4] inputnetfile
- string inputFileString(argv[4]);
- // argv[5] outputnetfile
- string outputFileString(argv[5]);
- // Place-holder string for each line in file
- string fileLine;
- double dvalue = stod(argv1);
- double stopCRIT = stod(argv3);
- // Create a map of id to ranks
- unordered_map<int, double> rankMap;
- // Create a map that id to outlinks
- unordered_map<int, vector<int>> outLinksMap;
- // Create a map for the new rank
- unordered_map<int, double> newRankMap;
- vector<int> sinksVec;
- ifstream inputFile;
- inputFile.open(inputFileString);
- getline(inputFile, fileLine);
- istringstream iss(fileLine);
- int numberVertices = 0;
- // The first word is <eecs485_edges>, we don't care about that
- iss >> fileLine;
- // The input is of the format below, we need to parse it
- // and get the nodes
- /*
- <eecs485_edges>
- <eecs485_edge>
- <eecs485_from>303</eecs485_from>
- <eecs485_to>4177</eecs485_to>
- </eecs485_edge>
- */
- string inString;
- string outString;
- char firstDelim = '>';
- char lastDelim = '<';
- int source;
- int dest;
- int first;
- int last;
- double initialPR;
- while(getline(inputFile, fileLine)) {
- if(fileLine.substr(0, 14) == "<eecs485_from>") {
- first = fileLine.find_first_of(firstDelim);
- last = fileLine.find_last_of(lastDelim);
- inString = fileLine.substr(first + 1, last - first);
- source = stoi(inString);
- if(newRankMap.find(source) == newRankMap.end()) {
- numberVertices++;
- newRankMap[source] = 0;
- }
- }
- else if(fileLine.substr(0, 12) == "<eecs485_to>") {
- first = fileLine.find_first_of(firstDelim);
- last = fileLine.find_last_of(lastDelim);
- outString = fileLine.substr(first + 1, last - first);
- dest = stoi(outString);
- if(newRankMap.find(source) == newRankMap.end()) {
- numberVertices++;
- newRankMap[source] = 0;
- }
- if(source != dest) {
- outLinksMap[source].push_back(dest);
- }
- }
- }
- // End of parsing
- // Calculating initial PageRank value
- initialPR = 1/static_cast<double>(numberVertices);
- for(auto it = newRankMap.begin(); it != newRankMap.end(); ++it) {
- rankMap[it->first] = initialPR;
- }
- // Finding sinks (nodes that do not point to other nodes)
- for(auto it = rankMap.begin(); it != rankMap.end(); ++it) {
- if(outLinksMap[it->first].size() == 0) {
- sinksVec.push_back(it->first);
- }
- }
- // -k flag indicates number of iterations (stopCRIT)
- if(flag == "-k") {
- for(int i = 0; i < stopCRIT; i++) {
- // Calculating each sink's contribution
- double sinkContribution = calculatingSinkContribution(sinksVec, numberVertices, rankMap);
- accountingForSinkNodes(rankMap, outLinksMap, newRankMap, numberVertices, sinkContribution);
- // Final (1 - d)/N + d * NewRank
- for(auto it2 = newRankMap.begin(); it2 != newRankMap.end(); ++it2) {
- it2->second *= dvalue;
- it2->second += ((1 - dvalue)/numberVertices);
- rankMap[it2->first] = 0;
- }
- // We have an updated rank to the nodes and we need to store it somewhere
- rankMap.swap(newRankMap);
- }
- }
- // Converge flag indicates calculating PageRank until ONE PR value
- // falls below a certain value (stopCRIT)
- else if(flag == "-converge") {
- double Xval;
- bool xBreak = false;
- while(!xBreak) {
- xBreak = true;
- // Calculating each sink's contribution
- double sinkContribution = calculatingSinkContribution(sinksVec, numberVertices, rankMap);
- accountingForSinkNodes(rankMap, outLinksMap, newRankMap, numberVertices, sinkContribution);
- // Final (1 - d)/N + d * NewRank calculation and X comparison
- for(auto it2 = newRankMap.begin(); it2 != newRankMap.end(); ++it2) {
- it2->second *= dvalue;
- it2->second += ((1 - dvalue)/numberVertices);
- Xval = abs(it2->second - rankMap[it2->first])/rankMap[it2->first];
- // If a PR value falls below a the critical value
- if(Xval > stopCRIT) {
- xBreak = false;
- }
- rankMap[it2->first] = 0;
- }
- rankMap.swap(newRankMap);
- }
- }
- // Print PageRank values to a file
- inputFile.close();
- ofstream outFile;
- outFile.open(outputFileString);
- for(auto it = rankMap.begin(); it != rankMap.end(); ++it) {
- outFile << it->first << ", " << it->second << '\n';
- }
- outFile.close();
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement