Untitled

import java.io.*;
import java.net.URL;

public class WebCrawler {

    Pair[] Visit;

    private int count;  // number of elements used in the Visit array

    Queue<String> WorkList;

    // N is the maximum number of websites to visit before stopping
    public WebCrawler(int N) {
        Visit = new Pair[N];
        //System.out.println(Visit[0]);
        count = 0;
        WorkList = new Queue<String>();
    }

    public String getPage( String url ) {

            try {
                BufferedReader br = new BufferedReader(
                new InputStreamReader(new URL(url).openStream()));
                StringBuffer sb = new StringBuffer();
                for( ; ; ) {
                    String line = br.readLine();
                    if (line == null) break;
                    sb.append(line);
                    sb.append("\n");
                }
                return sb.toString();
            } catch( Exception e ) {
            return null;
            }
    }

    // Beginning with the starting URL, visit websites adding them to the
    // Visit array if not there already.
    // The visit strategy described in Assignment 4 MUST be followed,
    // otherwise there is danger of non-terminating loops.
    public void Crawl( String startingURL ) {

        Pair n = new Pair(startingURL);
        WorkList.enqueue(startingURL);
        count = 0;
        Visit[count] = n;

        while (WorkList.isEmpty() != true) {

            // Begin crawling with the startingURL
            //String s = WorkList.dequeue();

        }


        // Prevent program from revisiting previously visited websites (preventing infinite loops)
        // "Crawl" through the returned HTML from previous getPage function and find all valid URLs
        // Add to the Visit array if not there already


    }

    // Outputs the web addresses (URLs) and counts for the m most popular
    // websites in the Visit array.
    public void PrintTopSites( int m ) {
        // fix me
    }
}