Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package sk.tuke.tssu;
- import org.jsoup.Jsoup;
- import org.jsoup.nodes.Document;
- import org.jsoup.nodes.Element;
- import org.jsoup.select.Elements;
- //import org.openqa.selenium.WebElement;
- import java.io.IOException;
- import java.sql.*;
- import java.text.ParseException;
- import java.text.SimpleDateFormat;
- import java.util.ArrayList;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
- public class Orsr {
- static final String JDBC_DRIVER = "com.mysql.jdbc.Driver";
- private static final String DB_URL = "jdbc:mysql://localhost/";
- // Database credentials
- private static final String USER = "tssuuser";
- private static final String PASS = "tssuuserPW";
- public static void main(String[] args) throws IOException, ParseException, InterruptedException {
- Element row = null;
- String html;
- ArrayList<Long> icoList = new ArrayList<Long>();
- int i = 0;
- Connection conn = null;
- Statement stmt = null;
- //ORSR
- try {
- //STEP 2: Register JDBC driver
- Class.forName("com.mysql.jdbc.Driver");
- //STEP 3: Open a connection
- System.out.println("Connecting to a selected database...");
- conn = DriverManager.getConnection(DB_URL, USER, PASS);
- System.out.println("Connected database successfully...");
- //STEP 4: Execute a query
- System.out.println("Creating statement...");
- stmt = conn.createStatement();
- String sql = "SELECT ICO FROM tssu.faktura";
- ResultSet rs = stmt.executeQuery(sql);
- //STEP 5: Extract data from result set
- while (rs.next()) {
- //Retrieve by column na
- long ico = rs.getLong("ICO");
- icoList.add(ico);
- //System.out.println(i+ " ICO: " + ico);
- i++;
- }
- rs.close();
- } catch (Exception e) {
- //Handle errors for Class.forName
- e.printStackTrace();
- } finally {
- //finally block used to close resources
- try {
- if (stmt != null)
- conn.close();
- } catch (SQLException ignored) {
- }// do nothing
- try {
- if (conn != null)
- conn.close();
- } catch (SQLException se) {
- se.printStackTrace();
- }//end finally try
- }//end try
- // System.out.println(icoList.size());
- //--------------------------DOWNLOAD----------------------------------------
- //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- //1506,1888,2002
- for (int j = 1800; j < icoList.size(); j++) {
- System.out.println("***************************************************************************************");
- System.out.println("Cislo faktury v cykle: " + j);
- if (j % 100 == 0) {
- System.out.println("Ideme spinkať...");
- Thread.sleep(2500);
- }
- ////////////////////////////////////////////////
- ////////////tabulka dodavatel////////////////////
- boolean isOZPZO = false;
- String sidlo = null;
- String denVymazu = null;
- String denZapisu = null;
- String obchodnéMeno;
- String čisloDomu;
- String pravnaForma = null;
- String PSČ = null;
- String mesto;
- String ulica;
- String ico = String.valueOf(icoList.get(j));
- ///////////////////////////////////////////////////
- ////////////////////////////////////////////////
- ////////////tabulka statutarny organ////////////
- String statutarnyOrgan = null;
- ///////////////////////////////////////////////
- ico = ico.replaceAll(" ", "");
- String url = "http://orsr.sk/hladaj_ico.asp?ICO=+" + ico;
- System.out.println("Adresa pre URL PRE HLADANIE:" + url);
- Document document = Jsoup.connect(url).get();
- Elements links = document.select("a[href]");
- String[] urls = new String[links.size()];
- for (i = 0; i < links.size(); i++) {
- urls[i] = links.get(i).attr("href");
- }
- url = "http://orsr.sk/" + urls[8];
- Pattern p = Pattern.compile("&P=1");
- Matcher m = p.matcher(url);
- if (m.find()) {
- System.out.println("Adresa pre URL PRE DETAIL:" + url);
- System.out.println("---------------------");
- document = Jsoup.connect(url).get();
- int tableSize = document.select("table").size();
- Element table = null;
- Elements rows = null;
- int helpName = 0;
- for (int l = 0; l < tableSize - 1; l++) {
- table = document.select("table").get(l);
- rows = table.select("tr");
- p = Pattern.compile("Obchodné meno:");
- m = p.matcher(rows.toString());
- if (m.find()) {
- helpName = l;
- break;
- }
- }
- table = document.select("table").get(helpName);
- rows = table.select("tr");
- p = Pattern.compile("Obchodné meno:");
- m = p.matcher(rows.toString());
- if (m.find()) {
- row = rows.get(1);
- Elements cols = row.select("td");
- Element col = cols.get(0);
- obchodnéMeno = col.toString();
- obchodnéMeno = obchodnéMeno.replaceAll("<[^>]*>", "");
- System.out.println("Obchodné meno: " + obchodnéMeno);
- } else {
- isOZPZO = true;
- row = rows.get(1);
- Elements cols = row.select("td");
- Element col = cols.get(0);
- obchodnéMeno = col.toString();
- obchodnéMeno = obchodnéMeno.replaceAll("<[^>]*>", "");
- System.out.println("Obchodné meno: " + obchodnéMeno);
- }
- ///////////////////////vlozenie obchodneho mena do tabulky dodavatel
- int helpSidlo = 0;
- int helpDenZapisu = 0;
- int helpDenVymazu = 0;
- int helpPravnaForma = 0;
- int helpStatutarnyOrgan = 0;
- for (int l = 0; l < tableSize - 1; l++) {
- table = document.select("table").get(l);//7
- rows = table.select("tr");
- for (int n = 0; n < rows.size(); n++) {
- row = rows.get(n);
- p = Pattern.compile("Sídlo:");
- m = p.matcher(row.toString());
- if (m.find()) {
- helpSidlo = n;
- tableSize = 0;
- break;
- }
- p = Pattern.compile("Miesto podnikania:");
- m = p.matcher(row.toString());
- if (m.find()) {
- helpSidlo = n;
- tableSize = 0;
- break;
- }
- p = Pattern.compile("Sídlo organizačnej zložky:");
- m = p.matcher(row.toString());
- if (m.find()) {
- helpSidlo = n;
- tableSize = 0;
- break;
- }
- }
- }
- m = p.matcher(row.toString());
- row = rows.get(helpSidlo);
- if (m.find())
- row = rows.get(helpSidlo + 1);
- Elements cols = row.select("td");
- Element col = cols.get(0);
- sidlo = col.toString();
- sidlo = sidlo.replaceAll("<[^>]*>", "");
- ArrayList<String> rozdelenaAdresa = rozdelAdresu(sidlo);
- ulica = rozdelenaAdresa.get(0);
- čisloDomu = rozdelenaAdresa.get(1);
- mesto = rozdelenaAdresa.get(2);
- if (rozdelenaAdresa.get(3) != null) {
- PSČ = rozdelenaAdresa.get(3);
- }
- System.out.println("ULICA: " + ulica + ", CD: " + čisloDomu + ", Mesto: " + mesto + ", psc: " + PSČ);
- ///////////vlozenie adresy do tabulky dodavatel
- tableSize = document.select("table").size();
- for (int l = 7; l < tableSize - 1; l++) {
- table = document.select("table").get(l);//7
- rows = table.select("tr");
- for (int n = 0; n < rows.size(); n++) {
- row = rows.get(n);
- p = Pattern.compile("Deň zápisu:");
- m = p.matcher(row.toString());
- if (m.find()) {
- helpDenZapisu = n;
- tableSize = 0;
- break;
- }
- }
- }
- row = rows.get(helpDenZapisu);
- m = p.matcher(row.toString());
- if (m.find())
- row = rows.get(helpDenZapisu + 1);
- cols = row.select("td");
- col = cols.get(0);
- denZapisu = col.toString();
- denZapisu = denZapisu.replaceAll("<[^>]*>", "");
- // System.out.println("Den zápisu: " + denZapisu);
- ///////////vlozenie dna zapisu dodavatel
- // System.out.println("---------------------");
- tableSize = document.select("table").size();
- for (int l = 9; l < tableSize - 1; l++) {
- table = document.select("table").get(l);
- rows = table.select("tr");
- for (int n = 0; n < rows.size(); n++) {
- row = rows.get(n);
- p = Pattern.compile("Právna forma:");
- m = p.matcher(row.toString());
- if (m.find()) {
- helpPravnaForma = n;
- tableSize = 0;
- break;
- }
- }
- }
- row = rows.get(helpPravnaForma);
- m = p.matcher(row.toString());
- if (m.find())
- row = rows.get(helpPravnaForma + 1);
- cols = row.select("td");
- col = cols.get(0);
- pravnaForma = col.toString();
- pravnaForma = pravnaForma.replaceAll("<[^>]*>", "");
- System.out.println("Právna forma: " + pravnaForma);
- ////////////////vlozenie pravnej formy
- // System.out.println("=================================================================================================");
- tableSize = document.select("table").size();
- int tableNum = 0;
- for (int l = 10; l < tableSize - 1; l++) {
- table = document.select("table").get(l);
- rows = table.select("tr");
- for (int n = 0; n < rows.size(); n++) {
- row = rows.get(n);
- p = Pattern.compile("Štatutárny orgán:");
- m = p.matcher(row.toString());
- Pattern p2 = Pattern.compile("Vedúci organizačnej zložky:");
- Matcher m2 = p2.matcher(row.toString());
- if (m.find()) {
- helpStatutarnyOrgan = n;
- tableNum = l;
- tableSize = 0;
- break;
- } else if (m2.find()) {
- helpStatutarnyOrgan = n;
- tableNum = l;
- tableSize = 0;
- break;
- }
- }
- }
- row = rows.get(helpStatutarnyOrgan);
- m = p.matcher(row.toString());
- if (m.find())
- row = rows.get(helpStatutarnyOrgan + 1);
- cols = row.select("td");
- col = cols.get(0);
- statutarnyOrgan = col.toString();
- statutarnyOrgan = statutarnyOrgan.replaceAll("<[^>]*>", "");
- if (pravnaForma.equals("Organizačná zložka podniku zahraničnej osoby."))
- statutarnyOrgan = "Organizačná zložka podniku zahraničnej osoby";
- System.out.println("Štatutárny orgán: " + statutarnyOrgan);
- ///////////////////vlozenie statutarny organ
- System.out.println("=================================================================================================");
- p = Pattern.compile("Samostatne podnikajúca fyzická osoba");
- m = p.matcher(pravnaForma);
- if (m.find() || isOZPZO) {
- statutarnyOrgan = null;
- }
- //-----------ČLENOVIA------------------------
- table = document.select("table").get(tableNum);
- rows = table.select("tr");
- p = Pattern.compile("Samostatne podnikajúca fyzická osoba");
- m = p.matcher(pravnaForma);
- if (!m.find()) {
- int count = 0;
- ArrayList<ArrayList<String>> vlastnici = new ArrayList<ArrayList<String>>();
- ArrayList<String> poziciaVlastnika = new ArrayList<String>();
- ArrayList<String> odDatum = new ArrayList<String>();
- ArrayList<String> doDatum = new ArrayList<String>();
- for (int n = 1; n < rows.size(); n++) {
- if (isPredstavenstvo(rows)) {
- row = rows.get(n);
- p = Pattern.compile("predstavenstvo");
- m = p.matcher(row.toString());
- p = Pattern.compile("konatelia");
- Matcher matcher = p.matcher(row.toString());
- if (!m.find()) {
- if (!matcher.find()) {
- cols = row.select("td");
- col = cols.get(0);
- String vlastnik = col.toString();
- p = Pattern.compile("-");
- m = p.matcher(row.toString());
- if (m.find()) {
- // rozdelMenoATituly(oddelPredstavitelov(vlastnik).get(0)); //vracia meno rozdelene na tituly a mena
- vlastnici.add(rozdelMenoATituly(oddelPredstavitelov(vlastnik).get(0)));
- Pattern p1 = Pattern.compile("člen");
- Pattern p2 = Pattern.compile("predseda");
- Matcher m1 = p1.matcher(vlastnik);
- Matcher m2 = p2.matcher(vlastnik);
- if (!m.find() && !m2.find())
- poziciaVlastnika.add("člen");
- else
- poziciaVlastnika.add(oddelPredstavitelov(vlastnik).get(1).replaceAll("<[^>]*>", "").replaceAll("-", ""));
- } else {
- // rozdelMenoATituly(oddelVlastnikov(vlastnik).get(0)); //vracia meno rozdelene na tituly a mena
- // vlastnici.add(oddelVlastnikov(vlastnik).get(0).replaceAll("<[^>]*>", ""));
- vlastnici.add(rozdelMenoATituly(oddelPredstavitelov(vlastnik).get(0)));
- poziciaVlastnika.add("člen");
- }
- col = cols.get(1);
- String datumy = col.toString();
- datumy = datumy.replaceAll("<[^>]*>", "");
- datumy = datumy.replaceAll(" ", "");
- datumy = datumy.replaceAll("od:", "");
- p = Pattern.compile("do");
- m = p.matcher(datumy);
- if (m.find()) {
- odDatum.add(oddeldatum(datumy).get(0));
- doDatum.add(oddeldatum(datumy).get(1));
- } else {
- odDatum.add(datumy);
- doDatum.add(null);
- }
- // System.out.println("Vlastnik: " + vlastnici.get(count));
- System.out.println("Vlstníci: ||TitP:" + vlastnici.get(count).get(0).trim() + ", M: " + vlastnici.get(count).get(1).trim() + " P: " + vlastnici.get(count).get(2).trim() + ",|| TitZa:" + vlastnici.get(count).get(3).trim());
- System.out.println("Pozicia vlastnika: " + poziciaVlastnika.get(count));
- System.out.println("Datum od: " + editDate(odDatum.get(count).replaceAll("[(\\|)]", "")));
- if (doDatum.get(count) == null) {
- System.out.println("Dátum do: Este aktualne");
- } else {
- System.out.println("Dátum do: " + editDate(doDatum.get(count).replaceAll("[(\\|)]", "")));
- }
- count++;
- }
- }
- } else {
- row = rows.get(n);
- cols = row.select("td");
- col = cols.get(0);
- String vlastnik = col.toString();
- // vlastnici.add(oddelVlastnikov(vlastnik).get(0).replaceAll("<[^>]*>", ""));
- vlastnici.add(rozdelMenoATituly(oddelVlastnikov(vlastnik).get(0)));
- col = cols.get(1);
- String datumy = col.toString();
- datumy = datumy.replaceAll("<[^>]*>", "");
- datumy = datumy.replaceAll(" ", "");
- datumy = datumy.replaceAll("od:", "");
- datumy = datumy.replaceAll("\\(:", "");
- p = Pattern.compile("do");
- m = p.matcher(datumy);
- if (m.find()) {
- odDatum.add(oddeldatum(datumy).get(0));
- doDatum.add(oddeldatum(datumy).get(1));
- } else {
- odDatum.add(datumy);
- doDatum.add(null);
- }
- System.out.println("Datum od:" + editDate(odDatum.get(count).replaceAll("[(\\|)]", "")));
- if (doDatum.get(count) == null) {
- System.out.println(" Este aktualne");
- } else {
- System.out.println("Dátum do: " + editDate(doDatum.get(count).replaceAll("[(\\|)]", "")));
- }
- // pri konateloch prvy riadok naprazdno
- // System.out.println("Vlaaastníci: " + vlastnici.get(count));
- System.out.println("Vlstník: TitilPred:" + vlastnici.get(count).get(0) + ", Meno: " + vlastnici.get(count).get(1) + " Priezvisko: " + vlastnici.get(count).get(2) + ", TitilZa:" + vlastnici.get(count).get(3));
- count++;
- }
- System.out.println("------------");
- }
- }
- tableSize = document.select("table").size();
- for (int l = 10; l < tableSize - 1; l++) {
- table = document.select("table").get(l);
- rows = table.select("tr");
- for (int n = 0; n < rows.size(); n++) {
- row = rows.get(n);
- p = Pattern.compile("Deň výmazu:");
- m = p.matcher(row.toString());
- if (m.find()) {
- helpDenVymazu = n;
- tableSize = 0;
- row = rows.get(helpDenVymazu);
- m = p.matcher(row.toString());
- if (m.find())
- row = rows.get(helpDenVymazu + 1);
- cols = row.select("td");
- col = cols.get(0);
- denVymazu = col.toString();
- denVymazu = denVymazu.replaceAll("<[^>]*>", "");
- break;
- }
- }
- if (l == tableSize)
- denVymazu = null;
- }
- if (denVymazu == null) {
- // System.out.println("Den výmazu: FRMA ESTE EXISTUJE");
- } else {
- // System.out.println("Den výmazu: " + editDate(denVymazu.replaceAll("[(\\|)]", "")));
- }
- } else {
- System.out.println("Nenašlo sa IČO....");
- }
- }
- }
- private static ArrayList<String> rozdelAdresu(String adresa) {
- ArrayList<String> rozdelenaAdresa = new ArrayList<String>();
- if (adresa != null) {
- adresa = adresa.replaceAll(",", "");
- char[] buff = adresa.toCharArray();
- String pom = "";
- int pocitadlomedzier = 0;
- for (int i = 1; i < buff.length + 1; i++) {
- if (buff[i] == ' ' && pocitadlomedzier == 1) {
- //pom = String.valueOf(buff).trim();
- //System.out.println(pom);
- rozdelenaAdresa.add(pom);
- pocitadlomedzier++;
- pom = "";
- }
- if (buff[i] == ' ' && pocitadlomedzier == 2 && i < buff.length && Character.isDigit(buff[i + 1]) ) {
- //pom = String.valueOf(buff).trim();
- //System.out.println(øpom);
- rozdelenaAdresa.add(pom);
- pom = "";
- pocitadlomedzier++;
- }
- if (buff[i] == ' ' && pocitadlomedzier == 3) { //pre psc
- pom = String.valueOf(buff).trim();
- // System.out.println(pom);
- rozdelenaAdresa.add(pom);
- break;
- }
- if (buff[i] == ' ' && Character.isDigit(buff[i + 1])) { //preto lebo ulica moze byt viac slovna
- rozdelenaAdresa.add(pom);
- //System.out.println(pom);
- pom = "";
- pocitadlomedzier++;
- } else {
- pom = pom + String.valueOf(buff[i]);
- buff[i] = ' ';
- }
- }
- }
- return rozdelenaAdresa;
- }
- private static ArrayList<String> oddelPredstavitelov(String predstavitel) {
- ArrayList<String> rozdeleniePredstavitelov = new ArrayList<String>();
- if (predstavitel != null) {
- char[] buff = predstavitel.toCharArray();
- String pom = "";
- for (int i = 1; i < buff.length + 1; i++) {
- if (buff[i] == '-' && buff[i + 1] == ' ') { //lebo moze v mene byt -
- pom = pom.replaceAll("td width=\"67%\">", "");
- pom = pom.replaceAll("- ", "");
- rozdeleniePredstavitelov.add(pom);
- pom = "";
- }
- if (buff[i] == '<' && buff[i + 1] == 'b' && buff[i + 2] == 'r' && buff[i + 3] == '>') {
- pom = pom.replaceAll("td width=\"67%\">", "");
- Pattern p = Pattern.compile("člen");
- Pattern p2 = Pattern.compile("predseda");
- Matcher m = p.matcher(pom);
- Matcher m2 = p2.matcher(pom);
- if (!m.find() && !m2.find())
- pom = "člen";
- rozdeleniePredstavitelov.add(pom);
- break;
- } else {
- pom = pom + String.valueOf(buff[i]);
- buff[i] = ' ';
- }
- }
- }
- return rozdeleniePredstavitelov;
- }
- private static ArrayList<String> oddelVlastnikov(String vlastnik) {
- ArrayList<String> rozdelenieVlastnikov = new ArrayList<String>();
- if (vlastnik != null) {
- char[] buff = vlastnik.toCharArray();
- String pom = "";
- int medzera = 0;
- for (int i = 1; i < buff.length + 1; i++) {
- if (buff[i] == '<' && buff[i + 1] == 'b' && buff[i + 2] == 'r' && buff[i + 3] == '>') {
- pom = pom.replaceAll("td width=\"67%\">", "");
- rozdelenieVlastnikov.add(pom);
- //System.out.println(pom);
- break;
- } else {
- pom = pom + String.valueOf(buff[i]);
- buff[i] = ' ';
- }
- }
- }
- return rozdelenieVlastnikov;
- }
- private static ArrayList<String> oddeldatum(String datumy) {
- ArrayList<String> rozdeleniedatumov = new ArrayList<String>();
- if (datumy != null) {
- char[] buff = datumy.toCharArray();
- String pom = "";
- int medzera = 0;
- for (int i = 1; i < buff.length + 1; i++) {
- if (buff[i] == 'd') {
- pom = pom.replaceAll("\\(", "");
- rozdeleniedatumov.add(pom);
- pom = "";
- medzera++;
- }
- if (medzera == 1 && i == buff.length - 1) {
- pom = pom.replaceAll("do:", "");
- pom = pom.replaceAll("\\(", "");
- pom = pom.replaceAll("\\)", "");
- rozdeleniedatumov.add(pom);
- break;
- } else {
- pom = pom + String.valueOf(buff[i]);
- buff[i] = ' ';
- }
- }
- }
- return rozdeleniedatumov;
- }
- private static boolean isPredstavenstvo(Elements rows) {
- Element row = rows.get(0);
- Pattern p = Pattern.compile("predstavenstvo");
- Matcher m = p.matcher(row.toString());
- return m.find();
- }
- private static Date editDate(String date) throws ParseException {
- SimpleDateFormat format = new SimpleDateFormat("dd.MM.yyyy");
- java.util.Date parsed = format.parse(date.trim());
- return new Date(parsed.getTime());
- }
- private static ArrayList<String> rozdelMenoATituly(String meno) {
- ArrayList<String> rozdelene = new ArrayList<>();
- //0 - pred
- //1 - meno
- //2 - priezvisko
- //3 - titulza
- char[] buff = meno.toCharArray();
- ArrayList<String> pomocnePole = new ArrayList<>(); //pole, kde si rozdelim data medzi '> <'
- int pos1 = 0;
- int pos2 = 0;
- for (int i = 0; i < buff.length; i++) {
- if (pos1 != 0 && pos2 != 0 && pos1 < pos2) {
- if (meno.substring(pos1 + 1, pos2).length() > 2) { //kvoli htmltagom kde su medzeri medzi nimi len
- pomocnePole.add(meno.substring(pos1 + 1, pos2));
- pos1 = 0;
- pos2 = 0;
- }
- }
- if (buff[i] == '>') {
- pos1 = i;
- }
- if (buff[i] == '<') {
- pos2 = i;
- }
- }
- switch (pomocnePole.size()) {
- case 2: { //meno a priezvisko
- rozdelene.add(" ");
- rozdelene.add(pomocnePole.get(0));
- rozdelene.add(pomocnePole.get(1));
- rozdelene.add(" ");
- break;
- }
- case 3: { //meno a priezvisko a 1 titul bud pred alebo za
- if (pomocnePole.get(0).contains(".") || pomocnePole.get(0).contains(",")) {
- rozdelene.add(pomocnePole.get(0));
- rozdelene.add(pomocnePole.get(1));
- rozdelene.add(pomocnePole.get(2));
- rozdelene.add(" ");
- } else {
- rozdelene.add(" ");
- rozdelene.add(pomocnePole.get(0));
- rozdelene.add(pomocnePole.get(1));
- rozdelene.add(pomocnePole.get(2));
- }
- break;
- }
- case 4: { //meno a priezvisko a titul pred aj za
- rozdelene.add((pomocnePole.get(0)));
- rozdelene.add(pomocnePole.get(1));
- rozdelene.add(pomocnePole.get(2));
- rozdelene.add((pomocnePole.get(3)));
- break;
- }
- default: {
- rozdelene.add(" ");
- rozdelene.add(" ");
- rozdelene.add(" ");
- rozdelene.add(" ");
- break;
- }
- }
- /*System.out.println("_____________________________________________________");
- System.out.println("Titil pred: " + rozdelene.get(0));
- System.out.println("Meno: " + rozdelene.get(1));
- System.out.println("Priezvisko: " + rozdelene.get(2));
- System.out.println("Titul za:" + rozdelene.get(3));
- System.out.println("_____________________________________________________");*/
- return rozdelene;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement