Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.util.ArrayList;
- import java.util.StringTokenizer;
- public class Parser {
- private String str; // stringa da parsare
- public Parser(String str) {
- this.str = str;
- }
- public static ArrayList generaLink(String str) {
- ArrayList<String> arrayString = new ArrayList();
- ArrayList<String> arrayLink = new ArrayList();
- StringTokenizer sToken = new StringTokenizer(str);
- while (sToken.hasMoreTokens()) {
- if (sToken.nextToken().equals("<div"))
- if (sToken.nextToken().equals("class=\"views-field"))
- if(sToken.nextToken().equals("views-field-title\">"))
- if (sToken.nextToken().equals("<h6"))
- if(sToken.nextToken().equals("class=\"field-content\"><a"))
- arrayString.add(sToken.nextToken());
- }
- for (int i=0; i<arrayString.size(); i++) {
- String x = arrayString.get(i);
- StringTokenizer stx = new StringTokenizer(x,"\" ");
- String link = "http://web.dmi.unict.it";
- while (stx.hasMoreTokens())
- if(stx.nextToken().equals("href="))
- arrayLink.add(link+stx.nextToken());
- }
- return arrayLink;
- }
- }
- import java.io.File;
- import java.text.DateFormat;
- import java.util.StringTokenizer;
- import org.jsoup.*;
- import org.jsoup.nodes.Document;
- import java.util.ArrayList;
- import java.util.Calendar;
- import java.util.Date;
- import java.util.GregorianCalendar;
- import java.util.Locale;
- import java.util.TimeZone;
- public class ParserAvvisi_DMIUNICT {
- static String pathData = "..\\..\\data";
- // C:\Users\Pierpaolo\Documents\NetBeansProjects\ParserAvvisi_DMIUNICT\data
- public static void main(String[] args) throws InterruptedException {
- Document doc;
- Parser parser;
- File file;
- while (true) {
- // Gestione calendario + estrapolazione link
- Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("Europe/Rome"),Locale.ITALY);
- Date today = calendar.getTime();
- DateFormat dateFormat =DateFormat.getDateInstance(DateFormat.SHORT, Locale.ITALY);
- String data = dateFormat.format(today); // 16/10/17
- String mese = data.substring(3,5); // 10
- String anno = data.substring(6,8); // 17
- String inLink = "20"+anno+mese;
- // Gestione file
- if (true);
- // Gestione estrapolazione documento HTML
- doc = new Document ("");
- try {
- doc = Jsoup.connect("http://web.dmi.unict.it/Didattica/Laurea%20Triennale%20in%20Informatica%20L-31/Avvisi/Archivio%20Avvisi/"+inLink).get();
- } catch(Exception e) {
- e.printStackTrace();
- }
- String stringa = doc.toString();
- // Parsing + Estrapolazione link
- parser = new Parser(stringa);
- ArrayList<String> link = Parser.generaLink(stringa);
- System.out.println(link.toString());
- Thread.sleep(5000);
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement