daily pastebin goal
34%
SHARE
TWEET

Untitled

a guest Jul 11th, 2018 59 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import java.io.*;
  2. import java.net.URL;
  3.  
  4. public class WebCrawler {
  5.  
  6.     Pair[] Visit;
  7.  
  8.     private int count;  // number of elements used in the Visit array
  9.  
  10.     Queue<String> WorkList;
  11.  
  12.     // N is the maximum number of websites to visit before stopping
  13.     public WebCrawler(int N) {
  14.         Visit = new Pair[N];
  15.         //System.out.println(Visit[0]);
  16.         count = 0;
  17.         WorkList = new Queue<String>();
  18.     }
  19.  
  20.     public String getPage( String url ) {
  21.  
  22.             try {
  23.                 BufferedReader br = new BufferedReader(
  24.                 new InputStreamReader(new URL(url).openStream()));
  25.                 StringBuffer sb = new StringBuffer();
  26.                 for( ; ; ) {
  27.                     String line = br.readLine();
  28.                     if (line == null) break;
  29.                     sb.append(line);
  30.                     sb.append("\n");
  31.                 }
  32.                 return sb.toString();
  33.             } catch( Exception e ) {
  34.             return null;
  35.             }
  36.     }
  37.  
  38.     // Beginning with the starting URL, visit websites adding them to the
  39.     // Visit array if not there already.
  40.     // The visit strategy described in Assignment 4 MUST be followed,
  41.     // otherwise there is danger of non-terminating loops.
  42.     public void Crawl( String startingURL ) {
  43.  
  44.         Pair n = new Pair(startingURL);
  45.         WorkList.enqueue(startingURL);
  46.         count = 0;
  47.         Visit[count] = n;
  48.  
  49.         while (WorkList.isEmpty() != true) {
  50.  
  51.             // Begin crawling with the startingURL
  52.             //String s = WorkList.dequeue();
  53.  
  54.         }
  55.  
  56.  
  57.  
  58.         // Prevent program from revisiting previously visited websites (preventing infinite loops)
  59.         // "Crawl" through the returned HTML from previous getPage function and find all valid URLs
  60.         // Add to the Visit array if not there already
  61.  
  62.  
  63.  
  64.  
  65.     }
  66.  
  67.     // Outputs the web addresses (URLs) and counts for the m most popular
  68.     // websites in the Visit array.
  69.     public void PrintTopSites( int m ) {
  70.         // fix me
  71.     }
  72. }
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top