Advertisement
Guest User

Untitled

a guest
Jul 24th, 2014
193
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.99 KB | None | 0 0
  1. package org.jsoup.examples;
  2.  
  3. import java.io.*;
  4.  
  5. import org.jsoup.*;
  6. import org.jsoup.nodes.*;
  7. import org.jsoup.select.Elements;
  8.  
  9. import java.io.IOException;
  10.  
  11. /**
  12. * Example program to list links from a URL.
  13. */
  14. public class parseEasy {
  15.  
  16.  
  17.  
  18. static parseEasy parseIt = new parseEasy();
  19.  
  20. String companyName = "Platzhalter";
  21. String jobTitle = "Platzhalter";
  22. String location = "Platzhalter";
  23. String timeAdded = "Platzhalter";
  24.  
  25. public static void main(String[] args) throws IOException
  26. {
  27. Database connect = new Database();
  28. connect.OpenConnectionDB();
  29.  
  30. parseIt.getData(connect);
  31.  
  32. connect.closeConnectionDB();
  33.  
  34.  
  35. }
  36.  
  37. //
  38. public void getData(Database c) throws IOException
  39. {
  40.  
  41.  
  42. Document document = Jsoup.parse(new File("C:/Talend/workspace/WEBCRAWLER/output/keywords_SOA.txt"), "utf-8");
  43. Elements elements = document.select(".joblisting");
  44. for (Element element : elements)
  45. {
  46. // Counter for Number of Elements returned
  47.  
  48.  
  49. // Parse Data into Elements
  50. Elements jobTitleElement = element.select(".job_title span");
  51. Elements companyNameElement = element.select(".company_name span[itemprop=name]");
  52. Elements locationElement = element.select(".locality span[itemprop=addressLocality]");
  53. Elements dateElement = element.select(".job_date_added [datetime]");
  54.  
  55. // Strip Data from unnecessary tags
  56. String companyName = companyNameElement.text();
  57. String jobTitle = jobTitleElement.text();
  58. String location = locationElement.text();
  59. String timeAdded = dateElement.attr("datetime");
  60.  
  61.  
  62.  
  63. c.insert(companyName, jobTitle, timeAdded, location);
  64. c.closeConnectionDB();
  65.  
  66.  
  67.  
  68. // Test output
  69. //System.out.println("Firma:t"+ companyName + "t" + jobTitle + "t in:t" + location + " t Erstellt am t" + timeAdded + "t. Eintrag Nummer:t" + count);
  70. }
  71. }
  72. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement