Untitled

package search.analyzers;

import datastructures.concrete.dictionaries.ChainedHashDictionary;
import datastructures.interfaces.IDictionary;
import datastructures.interfaces.ISet;
import search.models.Webpage;
import datastructures.concrete.ChainedHashSet;
import datastructures.concrete.KVPair;

import java.net.URI;

/**
 * This class is responsible for computing the 'page rank' of all available webpages.
 * If a webpage has many different links to it, it should have a higher page rank.
 * See the spec for more details.
 */
public class PageRankAnalyzer {
    private IDictionary<URI, Double> pageRanks;

    /**
     * Computes a graph representing the internet and computes the page rank of all
     * available webpages.
     *
     * @param webpages  A set of all webpages we have parsed.
     * @param decay     Represents the "decay" factor when computing page rank (see spec).
     * @param epsilon   When the difference in page ranks is less than or equal to this number,
     *                  stop iterating.
     * @param limit     The maximum number of iterations we spend computing page rank. This value
     *                  is meant as a safety valve to prevent us from infinite looping in case our
     *                  page rank never converges.
     */
    public PageRankAnalyzer(ISet<Webpage> webpages, double decay, double epsilon, int limit) {
        // Implementation note: We have commented these method calls out so your
        // search engine doesn't immediately crash when you try running it for the
        // first time.
        //
        // You should uncomment these lines when you're ready to begin working
        // on this class.

        // Step 1: Make a graph representing the 'internet'
        IDictionary<URI, ISet<URI>> graph = this.makeGraph(webpages);

        // Step 2: Use this graph to compute the page rank for each webpage
        this.pageRanks = this.makePageRanks(graph, decay, limit, epsilon);

        // Note: we don't store the graph as a field: once we've computed the
        // page ranks, we no longer need it!
    }

    /**
     * This method converts a set of webpages into an unweighted, directed graph,
     * in adjacency list form.
     *
     * You may assume that each webpage can be uniquely identified by its URI.
     *
     * Note that a webpage may contain links to other webpages that are *not*
     * included within set of webpages you were given. You should omit these
     * links from your graph: we want the final graph we build to be
     * entirely "self-contained".
     */
    private IDictionary<URI, ISet<URI>> makeGraph(ISet<Webpage> webpages) {
        IDictionary<URI, ISet<URI>> auxDict = new ChainedHashDictionary<>();
        ISet<URI> validURIS = new ChainedHashSet<>();
        for (Webpage page : webpages) {
            validURIS.add(page.getUri());
        }
        for (Webpage page : webpages) {
            auxDict.put(page.getUri(), getAllLinks(page, validURIS));
        }
        return auxDict;
    }


    private ISet<URI> getAllLinks(Webpage page, ISet<URI> validURIS) {
        ISet<URI> auxSet = new ChainedHashSet<URI>();
        for (URI link : page.getLinks()) {
            if (validURIS.contains(link)) {
                auxSet.add(link);
            }
        }
        if (auxSet.contains(page.getUri())) {
            auxSet.remove(page.getUri());
        }
        return auxSet;
    }

    /**
     * Computes the page ranks for all webpages in the graph.
     *
     * Precondition: assumes 'this.graphs' has previously been initialized.
     *
     * @param decay     Represents the "decay" factor when computing page rank (see spec).
     * @param epsilon   When the difference in page ranks is less than or equal to this number,
     *                  stop iterating.
     * @param limit     The maximum number of iterations we spend computing page rank. This value
     *                  is meant as a safety valve to prevent us from infinite looping in case our
     *                  page rank never converges.
     */
    private IDictionary<URI, Double> makePageRanks(IDictionary<URI, ISet<URI>> graph,
                                                   double decay,
                                                   int limit,
                                                   double epsilon) {
        // Step 1: The initialize step should go here
        IDictionary<URI, Double> auxDict = new ChainedHashDictionary<>();
        IDictionary<URI, Double> newDict = new ChainedHashDictionary<>();
        int graphSize = graph.size();
        for (KVPair<URI, ISet<URI>> pair : graph) {
            auxDict.put(pair.getKey(), 1.0/ graphSize);
        }
        for (int i = 0; i < limit; i++) {
            // Step 2: The update step should go here
            for (KVPair<URI, ISet<URI>> temp : graph) {
                newDict.put(temp.getKey(), 0.0);
            }
            for (KVPair<URI, ISet<URI>> pair : graph) {
                URI uri = pair.getKey();
                double parentValue = auxDict.get(uri);
                int numLinks = graph.get(uri).size();
                ISet<URI> children = graph.get(uri);
                if (numLinks > 0) {
                    for (URI child : children) {
                        newDict.put(child, newDict.get(child) + decay*parentValue/numLinks);
                    }
                }
                else {
                    for (KVPair<URI, ISet<URI>> pair2 : graph) {
                        newDict.put(pair2.getKey(), newDict.get(pair2.getKey()) + decay*parentValue/graphSize);
                    }
                }
            }
            double constantTerm = (1 - decay) / graphSize;
            for (KVPair<URI, ISet<URI>> temp : graph) {
                newDict.put(temp.getKey(), newDict.get(temp.getKey()) + constantTerm);
            }
            // Step 3: the convergence step should go here.
            // Return early if we've converged.
            boolean toContinue = false;
            for (KVPair<URI, ISet<URI>> pair : graph) {
                if (Math.abs(auxDict.get(pair.getKey()) - newDict.get(pair.getKey())) > epsilon) {
                    toContinue = true;
                }
            }
            for (KVPair<URI, Double> pair : newDict) {
                auxDict.put(pair.getKey(), pair.getValue());
            }
            if (!toContinue) {
                break;
            }
        }
        return auxDict;
    }

    /**
     * Returns the page rank of the given URI.
     *
     * Precondition: the given uri must have been one of the uris within the list of
     *               webpages given to the constructor.
     */
    public double computePageRank(URI pageUri) {
        // Implementation note: this method should be very simple: just one line!
        return pageRanks.get(pageUri);
    }
}