Advertisement
Guest User

webscrape

a guest
Jul 13th, 2014
189
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 1.59 KB | None | 0 0
  1. import java.io.IOException;
  2. import org.jsoup.Jsoup;
  3. import org.jsoup.nodes.Document;
  4. import org.jsoup.nodes.Element;
  5. import org.jsoup.select.Elements;
  6.  
  7. public class WebScrape {
  8.      public static void main(String[] args) {
  9.      int counter = 0;
  10.      String currentDebt = "";
  11.             Document doc;
  12.            
  13.             try {
  14.                 doc = Jsoup.connect("http://www.brillig.com/debt_clock/").get();
  15.                
  16.                 //get the items in the img and paragraph tags
  17.                 Elements img = doc.getElementsByTag("img");
  18.                 Elements paragraphs = doc.select("p");
  19.                
  20.                 //look goes through imgs to find the alts for the images. (only need the first one which contains the debt)
  21.                 for (Element el : img) {
  22.                     //only need the first alt (added a counter) there must be a better way to do this)
  23.                     if(counter != 0) {
  24.                         break;
  25.                     }          
  26.                             currentDebt = el.attr("alt");  //assigns the current debt to use as a variable later on
  27.                             counter += 1;
  28.                 }
  29.                counter = 0; // reset counter back to 0 so we can reuse.
  30.                
  31.                //goes through the paragraphs and outputs them (placing the current debt right after the first paragraph.)
  32.                 for (Element e : paragraphs) {
  33.                     System.out.printf(e.text());
  34.                    
  35.                     if(counter == 0){
  36.                         System.out.printf(" " + currentDebt + "\n");
  37.                         counter += 1;
  38.                     }
  39.                 }  
  40.                
  41.             } catch (IOException e) {
  42.                 System.out.println("Error exception");
  43.             }
  44.      }
  45. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement