WebCrawler Java fork/join

import java.io.*;
import java.net.*;
import java.util.*;
import java.util.concurrent.*;
import java.util.regex.*;

public class Crawler {
    private static final Pattern pattern1 =
        Pattern.compile( "(?i)href\\s*=\\s*(\"|\\')/?((?!#.*|/\\B|mailto:|location\\.|javascript:)[^\"\']+)(\"|\')" );
    private final ConcurrentMap<String,URL> sitesVisited = new ConcurrentHashMap<String,URL>( );
    private final ForkJoinPool pool = new ForkJoinPool( 10 );
    // This member MUST be volatile
    private volatile boolean done = false;
    private final BlockingQueue<Tuple<Long,Tuple<URL,Integer>>> printerQ =
        new ArrayBlockingQueue<Tuple<Long,Tuple<URL,Integer>>>( 20 );

    private final int MAX_DEPTH = 3;

    private static class Tuple<T1,T2> {
        Tuple( T1 first,T2 second ) {
            this.first = first;
            this.second = second;
        }
        final T1 first;
        final T2 second;
    }

    private String fetch( URL url ) throws IOException {
        HttpURLConnection urlc = ( HttpURLConnection ) url.openConnection( );
        urlc.setRequestMethod( "GET" );
        urlc.setConnectTimeout( 5000 );
        urlc.setRequestProperty( "Content-type","text/html" );
        urlc.connect( );
        try {
            if( urlc.getContentType( ).contains( "html" ) ) {
                BufferedReader reader = new BufferedReader( new InputStreamReader( urlc.getInputStream( ) ) );
                StringBuilder sb = new StringBuilder( );
                String inputLine;
                while( ( inputLine = reader.readLine( ) ) != null )
                    sb.append( inputLine );
                reader.close( );
                return sb.toString( );
            }
            return null;
        }
        catch( Exception e ) {
            System.out.println( "Fetch of document " + url + " failed" );
            return null;
        }
        finally {
            urlc.disconnect( );
        }
    }

    private Collection<URL> extractLinks( String html ) throws MalformedURLException {
        if( html == null ) return Collections.emptyList( );
        Collection<URL> urls = new ArrayList<URL>( );
        Matcher matcher = pattern1.matcher( html );
        while( matcher.find( ) ) {
            String link = matcher.group( 2 );
            if( ! link.startsWith( "http" ) )
                continue;
            urls.add( new URL( link ) );
        }
        return urls;
    }

    /**
     * For more information see http://gee.cs.oswego.edu/dl/papers/fj.pdf
     * and http://www.coopsoft.com/ar/ForkJoinArticle.html
     *
     */
    @SuppressWarnings("serial")
    private class CrawlerTask extends RecursiveTask<Collection<URL>> {
        private final Tuple<URL,Integer> msg;

        CrawlerTask( Tuple<URL,Integer> msg ) {
            this.msg = msg;
        }

        @Override
        protected Collection<URL> compute( ) {
            Collection<URL> urls = new ArrayList<URL>( );
            try {
                // Output and collect only if not the root of the search
                if( msg.second > 0 ) {
                    printerQ.put( new Tuple<Long,Tuple<URL,Integer>>( Thread.currentThread().getId( ),msg ) );
                    urls.add( msg.first );
                }
                Collection<CrawlerTask> forks = new ArrayList<CrawlerTask>( );
                String html = fetch( msg.first );
                for( URL url : extractLinks( html ) ) {
                    if( sitesVisited.putIfAbsent( url.toExternalForm( ),url ) == null &&  msg.second < MAX_DEPTH ) {
                        CrawlerTask task = new CrawlerTask( new Tuple<URL,Integer>( url,msg.second + 1 ) );
                        forks.add( task );
                        task.fork( );
                    }
                }

                for( CrawlerTask task : forks )
                    urls.addAll( task.join( ) );
                return urls;
            }
            catch( InterruptedException e ) {
                Thread.currentThread( ).interrupt( );
                return Collections.emptyList( );
            }
            catch( IOException e ) {
                return Collections.emptyList( );
            }
        }
    };

    public static void main( String ... args ) throws Exception {
        //new Crawler( ).crawl( "http://localhost:9080/Crawler_Files/test.html" );
        Collection<URL> urls = new Crawler( ).crawl( "http://news.google.com/" );
        System.out.println( "Crawling ended with " + urls.size( ) + "URLs" );
    }

    public Collection<URL> crawl( String strURL ) throws IOException, InterruptedException {
        ExecutorService es = Executors.newFixedThreadPool( 1 );
        es.submit(
            new Callable<Void>( ) {
                @Override
                public Void call( ) throws Exception {
                    do {
                        Tuple<Long, Tuple<URL, Integer>> msg;
                        msg = printerQ.take( );
                        System.out.println(
                                String.format(  "URL %s crawled by agent %d. Depth is %d",
                                                msg.second.first,msg.first,msg.second.second ) );
                    } while( ! done || ! printerQ.isEmpty( ) );
                    return null;
                }
            } );

        CrawlerTask task = new CrawlerTask( new Tuple<URL,Integer>( new URL( strURL ),0 ) );
        pool.submit( task );
        Collection<URL> urls = task.join( );
        done = true;
        es.shutdown( );
        return urls;
    }
}