Advertisement
GeeckoDev

Scraper.java

Dec 7th, 2012
91
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 2.07 KB | None | 0 0
  1. package scraper;
  2.  
  3. import java.io.IOException;
  4.  
  5. import com.gargoylesoftware.htmlunit.BrowserVersion;
  6. import com.gargoylesoftware.htmlunit.WebClient;
  7. import com.gargoylesoftware.htmlunit.html.HtmlForm;
  8. import com.gargoylesoftware.htmlunit.html.HtmlPage;
  9. import com.gargoylesoftware.htmlunit.html.HtmlSelect;
  10.  
  11. import model.Lesson;
  12. import model.Week;
  13.  
  14. public class Scraper {
  15.     private WebClient client;
  16.    
  17.     public Scraper() {
  18.         this.client = new WebClient();
  19.     }
  20.    
  21.     private String fetchContent(int gid, boolean next) throws IOException {
  22.         HtmlPage page;
  23.         HtmlForm form;
  24.         HtmlSelect select;
  25.         String content;
  26.        
  27.         // Open the website the normal way
  28.         this.client.getPage("http://www.formadep.fr/infolarochelle");
  29.         // Then go to the inner webpage
  30.         page = this.client.getPage("http://www.formadep.fr/extretud/exte_edtgroupe.aspx");
  31.    
  32.         // Form selection
  33.         form = page.getFormByName("_ctl0");
  34.         // Select a group
  35.         select = form.getSelectByName("dd1");
  36.         select.setSelectedAttribute(select.getOptionByValue("" + gid), true);
  37.         // Check the radio button if we want the next week
  38.         if (next) {
  39.             form.getRadioButtonsByName("semaine").get(1).setChecked(true);
  40.         }
  41.         // Submit
  42.         page = form.getInputByName("b1").click();
  43.        
  44.         // Get the text and close
  45.         content = page.asText();
  46.         this.client.closeAllWindows();
  47.        
  48.         return content;
  49.     }
  50.    
  51.     private void findWeek(int gid, boolean next, Week week) {
  52.         try {
  53.             String content;
  54.             int day=0;
  55.            
  56.             content = fetchContent(gid, next);
  57.            
  58.             for (String i : content.split("\n")) {
  59.                 if (i.contains("Matin") || i.contains("Après-midi")) {
  60.                     day = 0;
  61.                 }
  62.                 else if (i.equals("\t")) {
  63.                     day++;
  64.                 }
  65.                 else if (i.equals("\t\t")) {
  66.                     day+=2;
  67.                 }
  68.                 else if (i.contains(":")) {
  69.                     String[] s = i.split("\t");
  70.                    
  71.                     week.getDay(day).addLesson(new Lesson(s[0], s[1], s[2]));
  72.                 }
  73.             }
  74.         } catch (IOException e) {
  75.             System.out.println("IO exception");
  76.         }
  77.     }
  78.    
  79.     public void find(int gid, Week week, Week nextWeek) {
  80.         findWeek(gid, false, week);
  81.         findWeek(gid, true, nextWeek);
  82.     }
  83. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement