Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package org.jsoup.examples;
- import java.io.*;
- import org.jsoup.*;
- import org.jsoup.nodes.*;
- import org.jsoup.select.Elements;
- import java.io.IOException;
- /**
- * Example program to list links from a URL.
- */
- public class parseEasy {
- static parseEasy parseIt = new parseEasy();
- String companyName = "Platzhalter";
- String jobTitle = "Platzhalter";
- String location = "Platzhalter";
- String timeAdded = "Platzhalter";
- public static void main(String[] args) throws IOException
- {
- Database connect = new Database();
- connect.OpenConnectionDB();
- parseIt.getData(connect);
- connect.closeConnectionDB();
- }
- //
- public void getData(Database c) throws IOException
- {
- Document document = Jsoup.parse(new File("C:/Talend/workspace/WEBCRAWLER/output/keywords_SOA.txt"), "utf-8");
- Elements elements = document.select(".joblisting");
- for (Element element : elements)
- {
- // Counter for Number of Elements returned
- // Parse Data into Elements
- Elements jobTitleElement = element.select(".job_title span");
- Elements companyNameElement = element.select(".company_name span[itemprop=name]");
- Elements locationElement = element.select(".locality span[itemprop=addressLocality]");
- Elements dateElement = element.select(".job_date_added [datetime]");
- // Strip Data from unnecessary tags
- String companyName = companyNameElement.text();
- String jobTitle = jobTitleElement.text();
- String location = locationElement.text();
- String timeAdded = dateElement.attr("datetime");
- c.insert(companyName, jobTitle, timeAdded, location);
- c.closeConnectionDB();
- // Test output
- //System.out.println("Firma:t"+ companyName + "t" + jobTitle + "t in:t" + location + " t Erstellt am t" + timeAdded + "t. Eintrag Nummer:t" + count);
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement