Advertisement
Guest User

Untitled

a guest
May 22nd, 2019
85
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.87 KB | None | 0 0
  1. package search.analyzers;
  2.  
  3. import datastructures.concrete.dictionaries.ChainedHashDictionary;
  4. import datastructures.interfaces.IDictionary;
  5. import datastructures.interfaces.ISet;
  6. import search.models.Webpage;
  7. import datastructures.concrete.ChainedHashSet;
  8. import datastructures.concrete.KVPair;
  9.  
  10. import java.net.URI;
  11.  
  12. /**
  13. * This class is responsible for computing the 'page rank' of all available webpages.
  14. * If a webpage has many different links to it, it should have a higher page rank.
  15. * See the spec for more details.
  16. */
  17. public class PageRankAnalyzer {
  18. private IDictionary<URI, Double> pageRanks;
  19.  
  20. /**
  21. * Computes a graph representing the internet and computes the page rank of all
  22. * available webpages.
  23. *
  24. * @param webpages A set of all webpages we have parsed.
  25. * @param decay Represents the "decay" factor when computing page rank (see spec).
  26. * @param epsilon When the difference in page ranks is less than or equal to this number,
  27. * stop iterating.
  28. * @param limit The maximum number of iterations we spend computing page rank. This value
  29. * is meant as a safety valve to prevent us from infinite looping in case our
  30. * page rank never converges.
  31. */
  32. public PageRankAnalyzer(ISet<Webpage> webpages, double decay, double epsilon, int limit) {
  33. // Implementation note: We have commented these method calls out so your
  34. // search engine doesn't immediately crash when you try running it for the
  35. // first time.
  36. //
  37. // You should uncomment these lines when you're ready to begin working
  38. // on this class.
  39.  
  40. // Step 1: Make a graph representing the 'internet'
  41. IDictionary<URI, ISet<URI>> graph = this.makeGraph(webpages);
  42.  
  43. // Step 2: Use this graph to compute the page rank for each webpage
  44. this.pageRanks = this.makePageRanks(graph, decay, limit, epsilon);
  45.  
  46. // Note: we don't store the graph as a field: once we've computed the
  47. // page ranks, we no longer need it!
  48. }
  49.  
  50. /**
  51. * This method converts a set of webpages into an unweighted, directed graph,
  52. * in adjacency list form.
  53. *
  54. * You may assume that each webpage can be uniquely identified by its URI.
  55. *
  56. * Note that a webpage may contain links to other webpages that are *not*
  57. * included within set of webpages you were given. You should omit these
  58. * links from your graph: we want the final graph we build to be
  59. * entirely "self-contained".
  60. */
  61. private IDictionary<URI, ISet<URI>> makeGraph(ISet<Webpage> webpages) {
  62. IDictionary<URI, ISet<URI>> auxDict = new ChainedHashDictionary<>();
  63. ISet<URI> validURIS = new ChainedHashSet<>();
  64. for (Webpage page : webpages) {
  65. validURIS.add(page.getUri());
  66. }
  67. for (Webpage page : webpages) {
  68. auxDict.put(page.getUri(), getAllLinks(page, validURIS));
  69. }
  70. return auxDict;
  71. }
  72.  
  73.  
  74. private ISet<URI> getAllLinks(Webpage page, ISet<URI> validURIS) {
  75. ISet<URI> auxSet = new ChainedHashSet<URI>();
  76. for (URI link : page.getLinks()) {
  77. if (validURIS.contains(link)) {
  78. auxSet.add(link);
  79. }
  80. }
  81. if (auxSet.contains(page.getUri())) {
  82. auxSet.remove(page.getUri());
  83. }
  84. return auxSet;
  85. }
  86.  
  87. /**
  88. * Computes the page ranks for all webpages in the graph.
  89. *
  90. * Precondition: assumes 'this.graphs' has previously been initialized.
  91. *
  92. * @param decay Represents the "decay" factor when computing page rank (see spec).
  93. * @param epsilon When the difference in page ranks is less than or equal to this number,
  94. * stop iterating.
  95. * @param limit The maximum number of iterations we spend computing page rank. This value
  96. * is meant as a safety valve to prevent us from infinite looping in case our
  97. * page rank never converges.
  98. */
  99. private IDictionary<URI, Double> makePageRanks(IDictionary<URI, ISet<URI>> graph,
  100. double decay,
  101. int limit,
  102. double epsilon) {
  103. // Step 1: The initialize step should go here
  104. IDictionary<URI, Double> auxDict = new ChainedHashDictionary<>();
  105. IDictionary<URI, Double> newDict = new ChainedHashDictionary<>();
  106. int graphSize = graph.size();
  107. for (KVPair<URI, ISet<URI>> pair : graph) {
  108. auxDict.put(pair.getKey(), 1.0/ graphSize);
  109. }
  110. for (int i = 0; i < limit; i++) {
  111. // Step 2: The update step should go here
  112. for (KVPair<URI, ISet<URI>> temp : graph) {
  113. newDict.put(temp.getKey(), 0.0);
  114. }
  115. for (KVPair<URI, ISet<URI>> pair : graph) {
  116. URI uri = pair.getKey();
  117. double parentValue = auxDict.get(uri);
  118. int numLinks = graph.get(uri).size();
  119. ISet<URI> children = graph.get(uri);
  120. if (numLinks > 0) {
  121. for (URI child : children) {
  122. newDict.put(child, newDict.get(child) + decay*parentValue/numLinks);
  123. }
  124. }
  125. else {
  126. for (KVPair<URI, ISet<URI>> pair2 : graph) {
  127. newDict.put(pair2.getKey(), newDict.get(pair2.getKey()) + decay*parentValue/graphSize);
  128. }
  129. }
  130. }
  131. double constantTerm = (1 - decay) / graphSize;
  132. for (KVPair<URI, ISet<URI>> temp : graph) {
  133. newDict.put(temp.getKey(), newDict.get(temp.getKey()) + constantTerm);
  134. }
  135. // Step 3: the convergence step should go here.
  136. // Return early if we've converged.
  137. boolean toContinue = false;
  138. for (KVPair<URI, ISet<URI>> pair : graph) {
  139. if (Math.abs(auxDict.get(pair.getKey()) - newDict.get(pair.getKey())) > epsilon) {
  140. toContinue = true;
  141. }
  142. }
  143. for (KVPair<URI, Double> pair : newDict) {
  144. auxDict.put(pair.getKey(), pair.getValue());
  145. }
  146. if (!toContinue) {
  147. break;
  148. }
  149. }
  150. return auxDict;
  151. }
  152.  
  153. /**
  154. * Returns the page rank of the given URI.
  155. *
  156. * Precondition: the given uri must have been one of the uris within the list of
  157. * webpages given to the constructor.
  158. */
  159. public double computePageRank(URI pageUri) {
  160. // Implementation note: this method should be very simple: just one line!
  161. return pageRanks.get(pageUri);
  162. }
  163. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement