Guest User

Untitled

a guest
Sep 19th, 2018
69
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.75 KB | None | 0 0
  1. public void scrape(String url) {
  2.  
  3. String docUrl = url;
  4.  
  5. try {
  6.  
  7. Document document = Jsoup.connect(docUrl).get();
  8. Elements foundUrls = document.select("a[href]");
  9.  
  10. System.out.printf("Found %d links. %n", foundUrls.size());
  11.  
  12. for (Element foundUrl : foundUrls) {
  13.  
  14.  
  15. String nextUrl = foundUrl.attr("href");
  16.  
  17. if (visitedLinks.contains(nextUrl)) {
  18. System.out.println("Link already visited. Skipping URL.");
  19. }
  20.  
  21. else {
  22.  
  23. System.out.println("Scraping: "+ nextUrl);
  24. visitedLinks.add(nextUrl);
  25. scrape(nextUrl);
  26. }
  27.  
  28. }
  29. } catch (Exception ex) {
  30.  
  31. System.out.printf("Could not read %s.%n", url);
  32. }
  33. }
Add Comment
Please, Sign In to add comment