Advertisement
Guest User

Untitled

a guest
Feb 10th, 2016
121
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.00 KB | None | 0 0
  1. package webcrawler;
  2.  
  3. import java.util.ArrayList;
  4. import java.util.HashMap;
  5. import java.util.Iterator;
  6. import java.util.List;
  7. import java.util.Map;
  8. import Jama.Matrix;
  9. import java.util.Set;
  10.  
  11. public class Ranking {
  12. private final double DAMPING_FACTOR = 0.85;
  13. private Map<String, String[]> mapInbound = new HashMap();
  14. private Map<String, String[]> mapOutbound = new HashMap();
  15. private List params = new ArrayList();
  16.  
  17. DB db = new DB("jdbc:mysql://localhost:3306/webcrawlerdb","root",""); // set server address, username and password
  18.  
  19. public void start() {
  20. InboundLinks();
  21. OutboundLinks();
  22. Set urls = db.getLinks();
  23.  
  24. Iterator it = urls.iterator();
  25. double sum = 0;
  26. while(it.hasNext()){
  27. Object url = it.next();
  28. db.insertRank(url.toString(), rank(url.toString()));
  29. sum += (rank(url.toString()));
  30. }
  31. System.out.println("PageRank finnished");
  32. }
  33.  
  34. /*public static void main(String[] args) {
  35. Ranking ranking = new Ranking();
  36. }*/
  37.  
  38. private void InboundLinks() {
  39. mapInbound = db.generateInboundLinks();
  40.  
  41. /*for (Map.Entry entry : mapInbound.entrySet()) {
  42. String[] inboundsArray = (String[]) entry.getValue();
  43. System.out.println(entry.getKey() + "|||" + Arrays.toString(inboundsArray));
  44. }*/
  45. }
  46.  
  47. private void OutboundLinks() {
  48. mapOutbound = db.generateOutboundLinks();
  49. }
  50.  
  51. public double rank(String pageId) {
  52. generateParamList(pageId);
  53. Matrix a = new Matrix(generateMatrix());
  54. double[][] arrB = new double[params.size()][1];
  55. for (int i = 0; i < params.size(); i++) {
  56. arrB[i][0] = 1 - DAMPING_FACTOR;
  57. }
  58. Matrix b = new Matrix(arrB);
  59. // Solve the equation and get the page ranks
  60. Matrix x = a.solve(b);
  61. int ind = 0;
  62. int cnt = 0;
  63. for (Iterator it = params.iterator(); it.hasNext();) {
  64. String curPage = (String) it.next();
  65. if (curPage.equals(pageId))
  66. ind = cnt;
  67. cnt++;
  68. }
  69. return x.getArray()[ind][0];
  70. }
  71.  
  72. private double[][] generateMatrix() {
  73. double[][] arr = new double[params.size()][params.size()];
  74. for (int i = 0; i < params.size(); i++) {
  75. for (int j = 0; j < params.size(); j++) {
  76. arr[i][j] = getMultiFactor((String) params.get(i),
  77. (String) params.get(j));
  78. }
  79. }
  80. return arr;
  81. }
  82.  
  83.  
  84. private double getMultiFactor(String sourceId, String linkId) {
  85. if (sourceId.equals(linkId))
  86. return 1;
  87. else {
  88. String[] inc = getInboundLinks(sourceId);
  89. for (int i = 0; i < inc.length; i++) {
  90. if (inc[i].equals(linkId)) {
  91. return -1 * (DAMPING_FACTOR / getOutboundLinks(linkId).length);
  92. }
  93. }
  94. }
  95. return 0;
  96. }
  97.  
  98. private void generateParamList(String pageId) {
  99. // Add the starting page.
  100. if (!params.contains(pageId))
  101. params.add(pageId);
  102. // Get list of the inbound pages
  103. String[] inc = getInboundLinks(pageId);
  104. // Add the inbound links to the params list and do same for inbound
  105. // links
  106. for (int i = 0; i < inc.length; i++) {
  107. if (!params.contains(inc[i]))
  108. generateParamList(inc[i]);
  109. }
  110. }
  111.  
  112. private String[] getInboundLinks(String pageId) {
  113. return (String[]) mapInbound.get(pageId);
  114. }
  115.  
  116. private String[] getOutboundLinks(String pageId) {
  117. return (String[]) mapOutbound.get(pageId);
  118. }
  119. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement