Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- * To change this license header, choose License Headers in Project Properties.
- * To change this template file, choose Tools | Templates
- * and open the template in the editor.
- */
- package mhd;
- import java.io.IOException;
- import java.net.MalformedURLException;
- import java.net.URL;
- import java.util.ArrayList;
- import java.util.List;
- import org.jsoup.Jsoup;
- import org.jsoup.nodes.Document;
- import org.jsoup.nodes.Element;
- import org.jsoup.select.Elements;
- /**
- *
- * @author martrin100
- */
- public class Parser2 {
- public static void main(String[] args) throws IOException {
- String url19 = "http://imhd.zoznam.sk/ke/cestovny-poriadok/linka/19/smer/Nad-jazerom-Napajadla/zastavka/KVP-klastor/60436.html";
- Parser2 p = new Parser2(url19);
- List<String> columnTitles = p.getColumnTitles();
- System.out.println(p.parseColumn(1));
- System.out.println("titles:" + columnTitles);
- for (int i = 0; i < columnTitles.size(); i++) {
- System.out.printf("stlpec:%s casy:%s\n", columnTitles.get(i),p.parseColumn(i));
- }
- }
- private final String url;
- private Document doc;
- public Parser2(String url) {
- this.url = url;
- try {
- doc = Jsoup.parse(new URL(url), 1000 * 5);
- } catch (MalformedURLException ex) {
- throw new IllegalArgumentException(String.format("Mallformed URL:%s", url));
- } catch (IOException ex) {
- throw new IllegalArgumentException(String.format("IO excception occured while downloading:%s", url));
- }
- }
- public static class Zastavka {
- public final String meno;
- public final int cas;
- public Zastavka(String meno, int cas) {
- this.meno = meno;
- this.cas = cas;
- }
- }
- public static class Cas {
- public final int hodina;
- public final int minuta;
- public Cas(int hodina, int minuta) {
- this.hodina = hodina;
- this.minuta = minuta;
- }
- @Override
- public String toString() {
- return String.format("%02d:%02d",hodina, minuta);
- }
- }
- private Element getBase(int index) {
- return doc.getElementsByClass("cp_obsah").get(0).getElementsByTag("tr").get(index);
- }
- public List<String> getColumnTitles() {
- List<String> result = new ArrayList<>();
- for (Element el : getBase(0).getElementsByTag("td")) {
- result.add(el.text());
- }
- return result;
- }
- public List<Cas> parseColumn(int column) {
- Element base = getBase(1).getElementsByClass("cp_odchody_tabulka").get(column);
- List<Cas> result = new ArrayList<>();
- for (Element hodina : base.getElementsByClass("cp_odchody")) {
- result.addAll(parseHodina(hodina));
- }
- return result;
- }
- private List<Cas> parseHodina(Element cpOdchody) {
- Elements odchody = cpOdchody.getElementsByTag("td");
- Element odchod = cpOdchody.getElementsByClass("cp_hodina").get(0);
- odchody.remove(odchod);
- int hodina = Integer.parseInt(odchod.text());
- List<Cas> result = new ArrayList<>();
- for (Element element : filterMinuty(odchody)) {
- int minuta = Integer.parseInt(element.text());
- result.add(new Cas(hodina, minuta));
- }
- return result;
- }
- private List<Element> filterMinuty(List<Element> elements) {
- List<Element> result = new ArrayList<>();
- for (Element element : elements) {
- if (!isEmpty(element.text())) {
- result.add(element);
- }
- }
- return result;
- }
- private boolean isEmpty(String str) {
- if (str == null || str.isEmpty()) {
- return true;
- }
- boolean whitespacesOnly = true;
- for (Character ch : str.toCharArray()) {
- whitespacesOnly = whitespacesOnly && Character.isWhitespace(ch);
- }
- return whitespacesOnly;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement