Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- public void scrape(String url) {
- String docUrl = url;
- try {
- Document document = Jsoup.connect(docUrl).get();
- Elements foundUrls = document.select("a[href]");
- System.out.printf("Found %d links. %n", foundUrls.size());
- for (Element foundUrl : foundUrls) {
- String nextUrl = foundUrl.attr("href");
- if (visitedLinks.contains(nextUrl)) {
- System.out.println("Link already visited. Skipping URL.");
- }
- else {
- System.out.println("Scraping: "+ nextUrl);
- visitedLinks.add(nextUrl);
- scrape(nextUrl);
- }
- }
- } catch (Exception ex) {
- System.out.printf("Could not read %s.%n", url);
- }
- }
Add Comment
Please, Sign In to add comment