Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package sk.tuke.tssu;
- import org.jsoup.Jsoup;
- import org.jsoup.nodes.Document;
- import org.jsoup.nodes.Element;
- import org.jsoup.select.Elements;
- //import org.openqa.selenium.WebElement;
- import java.io.IOException;
- import java.sql.*;
- import java.text.ParseException;
- import java.text.SimpleDateFormat;
- import java.util.ArrayList;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
- public class Orsr {
- private static final String JDBC_DRIVER = "com.mysql.jdbc.Driver";
- private static final String DB_URL = "jdbc:mysql://localhost/";
- // Database credentials
- private static final String USER = "tssuuser";
- private static final String PASS = "tssuuserPW";
- private static ArrayList<Long> pouziteICO = new ArrayList<Long>();
- private static MySQLAccess mySQLAccess;
- private static int IdStatut = 0;
- public static void main(String[] args) throws IOException, ParseException, InterruptedException {
- ArrayList<Long> icoList = new ArrayList<Long>();
- Element row = null;
- String html;
- int i = 0;
- Connection conn = null;
- Statement stmt = null;
- mySQLAccess = new MySQLAccess();
- //ORSR
- try {
- //STEP 2: Register JDBC driver
- Class.forName("com.mysql.jdbc.Driver");
- //STEP 3: Open a connection
- System.out.println("Connecting to a selected database...");
- conn = DriverManager.getConnection(DB_URL, USER, PASS);
- System.out.println("Connected database successfully...");
- //STEP 4: Execute a query
- System.out.println("Creating statement...");
- stmt = conn.createStatement();
- String sql = "SELECT ICO FROM tssu.faktura";
- ResultSet rs = stmt.executeQuery(sql);
- //STEP 5: Extract data from result set
- while (rs.next()) {
- //Retrieve by column na
- long ico = rs.getLong("ICO");
- icoList.add(ico);
- //System.out.println(i+ " ICO: " + ico);
- i++;
- }
- rs.close();
- } catch (Exception e) {
- //Handle errors for Class.forName
- e.printStackTrace();
- } finally {
- //finally block used to close resources
- try {
- if (stmt != null)
- conn.close();
- } catch (SQLException se) {
- }// do nothing
- try {
- if (conn != null)
- conn.close();
- } catch (SQLException se) {
- se.printStackTrace();
- }//end finally try
- }//end try
- // System.out.println(icoList.size());
- //--------------------------DOWNLOAD----------------------------------------
- for (int j = 232; j < icoList.size(); j++) {
- System.out.println("***************************************************************************************");
- System.out.println("Cislo faktury v cykle: " + j);
- if (j % 100 == 0) {
- System.out.println("Ideme spinkať...");
- Thread.sleep(2500);
- }
- ////////////////////////////////////////////////
- ////////////tabulka dodavatel////////////////////
- boolean isOZPZO = false;
- String sidlo = null;
- String denVymazu = null;
- String denZapisu = null;
- String obchodnéMeno = null;
- String čisloDomu;
- String pravnaForma = null;
- String PSČ;
- String mesto;
- String ulica;
- String ico = String.valueOf(icoList.get(j));
- ///////////////////////////////////////////////////
- ////////////////////////////////////////////////
- ////////////tabulka statutarny organ////////////
- String statutarnyOrgan = null;
- ///////////////////////////////////////////////
- ico = ico.replaceAll(" ", "");
- String url = "http://orsr.sk/hladaj_ico.asp?ICO=+" + ico;
- // System.out.println("Adresa pre URL PRE HLADANIE:" + url);
- Document document = Jsoup.connect(url).get();
- Elements links = document.select("a[href]");
- String[] urls = new String[links.size()];
- for (i = 0; i < links.size(); i++) {
- urls[i] = links.get(i).attr("href");
- }
- url = "http://orsr.sk/" + urls[8];
- Pattern p = Pattern.compile("&P=1");
- Matcher m = p.matcher(url);
- if (m.find()) {
- System.out.println("Adresa pre URL PRE DETAIL:" + url);
- // System.out.println("=========================");
- document = Jsoup.connect(url).get();
- int tableSize = document.select("table").size();
- Element table = null;
- Elements rows = null;
- int helpName = 0;
- for (int l = 0; l < tableSize - 1; l++) {
- table = document.select("table").get(l);
- rows = table.select("tr");
- p = Pattern.compile("Obchodné meno"); ///menil som :
- m = p.matcher(rows.toString());
- if (m.find()) {
- helpName = l;
- break;
- }
- }
- table = document.select("table").get(helpName);
- rows = table.select("tr");
- p = Pattern.compile("Obchodné meno"); //menil som
- m = p.matcher(rows.toString());
- if (m.find()) {
- row = rows.get(1);
- Elements cols = row.select("td");
- Element col = cols.get(0);
- obchodnéMeno = col.toString();
- obchodnéMeno = obchodnéMeno.replaceAll("<[^>]*>", "");
- // System.out.println("Obchodné meno: " + obchodnéMeno);
- } else {
- isOZPZO = true;
- row = rows.get(1);
- Elements cols = row.select("td");
- Element col = cols.get(0);
- obchodnéMeno = col.toString();
- obchodnéMeno = obchodnéMeno.replaceAll("<[^>]*>", "");
- // System.out.println("Obchodné meno: " + obchodnéMeno);
- }
- ///////////////////////vlozenie obchodneho mena do tabulky dodavatel
- int helpSidlo = 0;
- int helpDenZapisu = 0;
- int helpDenVymazu = 0;
- int helpPravnaForma = 0;
- int helpStatutarnyOrgan = 0;
- for (int l = 0; l < tableSize - 1; l++) {
- table = document.select("table").get(l);//7
- rows = table.select("tr");
- for (int n = 0; n < rows.size(); n++) {
- row = rows.get(n);
- p = Pattern.compile("Sídlo:");
- m = p.matcher(row.toString());
- if (m.find()) {
- helpSidlo = n;
- tableSize = 0;
- break;
- }
- p = Pattern.compile("Miesto podnikania:");
- m = p.matcher(row.toString());
- if (m.find()) {
- helpSidlo = n;
- tableSize = 0;
- break;
- }
- p = Pattern.compile("Sídlo organizačnej zložky:");
- m = p.matcher(row.toString());
- if (m.find()) {
- helpSidlo = n;
- tableSize = 0;
- break;
- }
- }
- }
- m = p.matcher(row.toString());
- row = rows.get(helpSidlo);
- if (m.find())
- row = rows.get(helpSidlo + 1);
- Elements cols = row.select("td");
- Element col = cols.get(0);
- sidlo = col.toString();
- //sidlo = sidlo.replaceAll("<[^>]*>", "");
- ArrayList<String> rozdelenaAdresa = rozdelAdresuVlastnika(sidlo);
- ulica = rozdelenaAdresa.get(0).trim();
- čisloDomu = rozdelenaAdresa.get(1).trim();
- mesto = rozdelenaAdresa.get(2).trim();
- PSČ = rozdelenaAdresa.get(3).trim();
- /////////////////////////vlozenie adresy do tabulky dodavatel
- tableSize = document.select("table").size();
- for (int l = 7; l < tableSize - 1; l++) {
- table = document.select("table").get(l);//7
- rows = table.select("tr");
- for (int n = 0; n < rows.size(); n++) {
- row = rows.get(n);
- p = Pattern.compile("Deň zápisu:");
- m = p.matcher(row.toString());
- if (m.find()) {
- helpDenZapisu = n;
- tableSize = 0;
- break;
- }
- }
- }
- row = rows.get(helpDenZapisu);
- m = p.matcher(row.toString());
- if (m.find())
- row = rows.get(helpDenZapisu + 1);
- cols = row.select("td");
- col = cols.get(0);
- denZapisu = col.toString();
- denZapisu = denZapisu.replaceAll("<[^>]*>", "");
- // System.out.println("Den zápisu: " + denZapisu);
- ///////////vlozenie dna zapisu dodavatel
- // System.out.println("---------------------");
- tableSize = document.select("table").size();
- for (int l = 9; l < tableSize - 1; l++) {
- table = document.select("table").get(l);
- rows = table.select("tr");
- for (int n = 0; n < rows.size(); n++) {
- row = rows.get(n);
- p = Pattern.compile("Právna forma:");
- m = p.matcher(row.toString());
- if (m.find()) {
- helpPravnaForma = n;
- tableSize = 0;
- break;
- }
- }
- }
- row = rows.get(helpPravnaForma);
- m = p.matcher(row.toString());
- if (m.find())
- row = rows.get(helpPravnaForma + 1);
- cols = row.select("td");
- col = cols.get(0);
- pravnaForma = col.toString();
- pravnaForma = pravnaForma.replaceAll("<[^>]*>", "");
- // System.out.println("Právna forma: " + pravnaForma);
- ////////////////vlozenie pravnej formy
- // System.out.println("=================================================================================================");
- tableSize = document.select("table").size();
- int tableNum = 0;
- for (int l = 10; l < tableSize - 1; l++) {
- table = document.select("table").get(l);
- rows = table.select("tr");
- for (int n = 0; n < rows.size(); n++) {
- row = rows.get(n);
- p = Pattern.compile("Štatutárny orgán:");
- m = p.matcher(row.toString());
- Pattern p2 = Pattern.compile("Vedúci organizačnej zložky:");
- Matcher m2 = p2.matcher(row.toString());
- if (m.find()) {
- helpStatutarnyOrgan = n;
- tableNum = l;
- tableSize = 0;
- break;
- } else if (m2.find()) {
- helpStatutarnyOrgan = n;
- tableNum = l;
- tableSize = 0;
- break;
- }
- }
- }
- row = rows.get(helpStatutarnyOrgan);
- m = p.matcher(row.toString());
- if (m.find())
- row = rows.get(helpStatutarnyOrgan + 1);
- cols = row.select("td");
- col = cols.get(0);
- statutarnyOrgan = col.toString();
- statutarnyOrgan = statutarnyOrgan.replaceAll("<[^>]*>", "");
- // System.out.println("Štatutárny orgán: " + statutarnyOrgan);
- ///////////////////vlozenie statutarny organ
- // System.out.println("=================================================================================================");
- p = Pattern.compile("Samostatne podnikajúca fyzická osoba");
- m = p.matcher(pravnaForma);
- if (m.find() || isOZPZO == true) {
- statutarnyOrgan = null;
- }
- //-----------ČLENOVIA------------------------
- table = document.select("table").get(tableNum);
- rows = table.select("tr");
- p = Pattern.compile("Samostatne podnikajúca fyzická osoba");
- m = p.matcher(pravnaForma);
- if (!m.find()) {
- int count = 0;
- ArrayList<String> vlastnici = new ArrayList<String>();
- ArrayList<String> ulicaVlastnika = new ArrayList<String>();
- ArrayList<String> cisloDomuVlastnika = new ArrayList<String>();
- ArrayList<String> mestoVlastnika = new ArrayList<String>();
- ArrayList<String> pscVlastnika = new ArrayList<String>();
- ArrayList<String> poziciaVlastnika = new ArrayList<String>();
- ArrayList<String> odDatum = new ArrayList<String>();
- ArrayList<String> doDatum = new ArrayList<String>();
- for (int n = 1; n < rows.size(); n++) {
- if (isPredstavenstvo(rows)) {
- row = rows.get(n);
- p = Pattern.compile("predstavenstvo");
- m = p.matcher(row.toString());
- p = Pattern.compile("konatelia");
- Matcher matcher = p.matcher(row.toString());
- if (!m.find()) {
- if (!matcher.find()) {
- cols = row.select("td");
- col = cols.get(0);
- String vlastnik = col.toString();
- p = Pattern.compile("-");
- m = p.matcher(row.toString());
- if (m.find()) {
- Pattern p1 = Pattern.compile("predstavenstvo");
- Matcher m1 = p1.matcher(vlastnik);
- //Pattern p2 = Pattern.compile("konatelia");
- //Matcher m2 = p2.matcher(vlastnik);
- if (!m1.find()) {
- System.out.println("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++");
- rozdelMenoATituly(oddelPredstavitelov(vlastnik).get(0)); //vracia meno rozdelene na tituly a mena
- vlastnici.add(oddelPredstavitelov(vlastnik).get(0).replaceAll("<[^>]*>", ""));
- //spúšťa rozdelenie adresy vlastníka, ktorá vracia rozdelenú adresu
- p1 = Pattern.compile("člen");
- Pattern p2 = Pattern.compile("predseda");
- m1 = p1.matcher(vlastnik);
- Matcher m2 = p2.matcher(vlastnik);
- if (!m.find() && !m2.find())
- poziciaVlastnika.add("člen");
- else {
- poziciaVlastnika.add(oddelPredstavitelov(vlastnik).get(1).replaceAll("<[^>]*>", "").replaceAll("-", ""));
- }
- } else {
- rozdelMenoATituly(oddelVlastnikov(vlastnik).get(0));
- vlastnici.add(oddelVlastnikov(vlastnik).get(0).replaceAll("<[^>]*>", ""));
- poziciaVlastnika.add("člen");
- }
- }
- col = cols.get(1);
- String datumy = col.toString();
- datumy = datumy.replaceAll("<[^>]*>", "");
- datumy = datumy.replaceAll(" ", "");
- datumy = datumy.replaceAll("od:", "");
- p = Pattern.compile("do");
- m = p.matcher(datumy);
- if (m.find()) {
- odDatum.add(oddeldatum(datumy).get(0));
- doDatum.add(oddeldatum(datumy).get(1));
- } else {
- odDatum.add(datumy);
- doDatum.add(null);
- }
- System.out.println("Pozicia vlastnika: " + poziciaVlastnika.get(count));
- System.out.println("Datum od: " + editDate(odDatum.get(count).replaceAll("[(\\|)]", "").trim()));
- if (doDatum.get(count) == null) {
- System.out.println("Dátum do: Este aktualne");
- } else {
- System.out.println("Dátum do: " + editDate(doDatum.get(count).replaceAll("[(\\|)]", "")));
- }
- System.out.println("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++");
- count++;
- }
- }
- } else {
- row = rows.get(n);
- cols = row.select("td");
- col = cols.get(0);
- String vlastnik = col.toString();
- Pattern p1 = Pattern.compile("konateľ");
- Matcher m1 = p1.matcher(vlastnik);
- Pattern p2 = Pattern.compile("konatelia");
- Matcher m2 = p2.matcher(vlastnik);
- if (!m1.find() && !m2.find()) {
- vlastnici.add(oddelVlastnikov(vlastnik).get(0).replaceAll("<[^>]*>", ""));
- ulicaVlastnika.add(oddelVlastnikov(vlastnik).get(1));
- cisloDomuVlastnika.add(oddelVlastnikov(vlastnik).get(2));
- mestoVlastnika.add(oddelVlastnikov(vlastnik).get(3));
- pscVlastnika.add(oddelVlastnikov(vlastnik).get(4));
- }
- col = cols.get(1);
- String datumy = col.toString();
- datumy = datumy.replaceAll("<[^>]*>", "");
- datumy = datumy.replaceAll(" ", "");
- datumy = datumy.replaceAll("od:", "");
- datumy = datumy.replaceAll("\\(:", "");
- p = Pattern.compile("do");
- m = p.matcher(datumy);
- if (m.find()) {
- odDatum.add(oddeldatum(datumy).get(0));
- doDatum.add(oddeldatum(datumy).get(1));
- } else {
- odDatum.add(datumy);
- doDatum.add(null);
- }
- // System.out.println("Datum od:" + editDate(odDatum.get(count).replaceAll("[(\\|)]", "")));
- if (doDatum.get(count) == null) {
- // System.out.println(" Este aktualne");
- } else {
- // System.out.println("Dátum do: " + editDate(doDatum.get(count).replaceAll("[(\\|)]", "")));
- }
- //System.out.println("Vlstníci: " + vlastnici.get(count));
- count++;
- }
- }
- }
- tableSize = document.select("table").size();
- for (int l = 10; l < tableSize - 1; l++) {
- table = document.select("table").get(l);
- rows = table.select("tr");
- for (int n = 0; n < rows.size(); n++) {
- row = rows.get(n);
- p = Pattern.compile("Deň výmazu:");
- m = p.matcher(row.toString());
- if (m.find()) {
- helpDenVymazu = n;
- tableSize = 0;
- row = rows.get(helpDenVymazu);
- m = p.matcher(row.toString());
- if (m.find())
- row = rows.get(helpDenVymazu + 1);
- cols = row.select("td");
- col = cols.get(0);
- denVymazu = col.toString();
- denVymazu = denVymazu.replaceAll("<[^>]*>", "");
- break;
- }
- }
- if (l == tableSize)
- denVymazu = null;
- }
- ///////////////////////////////////////////////////////////////////////////////////////////////
- ////////////////////////////////////////tu sa bude vkladat//////////////////////////////////////
- ///////////////////////////////////////////////////////////////////////////////////////////////
- System.out.println("__________________________________________Tabulka Dodavatel___________________________________________");
- if (denVymazu == null) {
- System.out.println("Obchodne meno: " + obchodnéMeno + " |ICO: " + ico + " |Zapis: " + editDate(denZapisu.replaceAll("[(\\|)]", "")) + " |Vymaz:*******|");
- } else {
- System.out.println("Obchodne meno: " + obchodnéMeno + " |ICO: " + ico + " |Zapis: " + editDate(denZapisu.replaceAll("[(\\|)]", "")) + " |Vymaz:" + editDate(denVymazu.replaceAll("[(\\|)]", "")));
- }
- System.out.println("Právna forma: " + pravnaForma.trim().replaceAll("\\.", "") + " |Ulica: " + ulica + " |CD: " + čisloDomu + " |PSC: " + PSČ + " |Mesto: " + mesto);
- System.out.println("______________________________________________________________________________________________________");
- System.out.println("__________________________________________Statutarny Ogan_____________________________________________");
- if (pravnaForma.trim().equals("Samostatne podnikajúca fyzická osoba")) {
- statutarnyOrgan = "Živnostník";
- } else if (pravnaForma.trim().equals("Organizačná zložka podniku zahraničnej osoby.")) {
- statutarnyOrgan = "Zahraničná osoba";
- }
- System.out.println("ICO: " + ico + " |Typ: " + statutarnyOrgan.trim() + " |ID: " + IdStatut);
- System.out.println("______________________________________________________________________________________________________");
- if (statutarnyOrgan.equals("Živnostník")) {
- System.out.println("__________________________________________Zivnostnik______________________________________________");
- System.out.println("Meno: " + "|Priezvisko: " + "|TitulPred: " + "|TitulZa: " + "|Ulica: " + "|CD: " + "|PSC: " + "|Mesto: " + "|DatumPriradenia: " + "|DatumUkoncenia: " + "|IDStatutOrganu: " + IdStatut);
- System.out.println("__________________________________________________________________________________________________");
- } else if (pravnaForma.trim().equals("Akciová spoločnosť")) {
- System.out.println("__________________________________________Predstavenstvo__________________________________________");
- //kazdeho clena este
- System.out.println("__________________________________________________________________________________________________");
- } else if (pravnaForma.trim().equals("Spoločnosť s ručením obmedzeným")) {
- System.out.println("__________________________________________Konatel_________________________________________________");
- //kazdeho konatela
- System.out.println("__________________________________________________________________________________________________");
- } else if (statutarnyOrgan.trim().equals("Zahraničná osoba")) {
- System.out.println("__________________________________________Zahranicna osoba________________________________________");
- System.out.println("Meno: " + "|Priezvisko: " + "|TitulPred: " + "|TitulZa: " + "|Ulica: " + "|CD: " + "|PSC: " + "|Mesto: " + "|DatumPriradenia: " + "|DatumUkoncenia: " + "|IDStatutOrganu: " + IdStatut);
- System.out.println("__________________________________________________________________________________________________");
- } else {
- System.out.println("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!NOVY STATUTARNY ORGAN!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!");
- }
- } else {
- System.out.println("Nenašlo sa IČO...");
- }
- IdStatut++;
- }
- }
- private static ArrayList<String> rozdelAdresuVlastnika(String adresa) {
- ArrayList<String> rozdelene = new ArrayList<>();
- //0 - ulica
- //1 - cislo
- //2 - mesto
- //3 - psc
- char[] buff = adresa.toCharArray();
- ArrayList<String> pomocnePole = new ArrayList<>(); //pole, kde si rozdelim data medzi '> <'
- // System.out.println(adresa);
- int pos1 = 0;
- int pos2 = 0;
- for (int i = 0; i < buff.length; i++) {
- if (pos1 != 0 && pos2 != 0 && pos1 < pos2) {
- if (adresa.substring(pos1 + 1, pos2).length() > 2) { //kvoli htmltagom kde su medzeri medzi nimi len
- pomocnePole.add(adresa.substring(pos1 + 1, pos2));
- pos1 = 0;
- pos2 = 0;
- }
- }
- if (buff[i] == '>') {
- pos1 = i;
- }
- if (buff[i] == '<') {
- pos2 = i;
- }
- }
- switch (pomocnePole.size()) {
- case 3: { //ulica, cislo ulice, mesto
- rozdelene.add(pomocnePole.get(0));
- rozdelene.add(pomocnePole.get(1));
- rozdelene.add(pomocnePole.get(2));
- rozdelene.add("XXXXX");
- break;
- }
- case 4: { //ulica, cislo ulice, mesto,psc
- rozdelene.add((pomocnePole.get(0)));
- rozdelene.add(pomocnePole.get(1));
- rozdelene.add(pomocnePole.get(2));
- if (pomocnePole.get(3) != null) {
- char[] pom = pomocnePole.get(3).toCharArray();
- if (Character.isAlphabetic(pom[3])) {
- rozdelene.add("XXXXX");
- } else
- rozdelene.add((pomocnePole.get(3)));
- } else
- rozdelene.add("XXXXX");
- break;
- }
- case 5: { //ulica, cislo ulice, mesto,psc
- rozdelene.add((pomocnePole.get(0)));
- rozdelene.add(pomocnePole.get(1));
- rozdelene.add(pomocnePole.get(2));
- if (pomocnePole.get(3) != null) {
- char[] pom = pomocnePole.get(3).toCharArray();
- if (Character.isAlphabetic(pom[3])) {
- rozdelene.add("XXXXX");
- } else
- rozdelene.add((pomocnePole.get(3)));
- } else
- rozdelene.add("XXXXX");
- break;
- }
- case 6: { //ulica, cislo ulice, mesto,psc
- rozdelene.add((pomocnePole.get(0)));
- rozdelene.add(pomocnePole.get(1));
- rozdelene.add(pomocnePole.get(2));
- if (pomocnePole.get(3) != null) {
- char[] pom = pomocnePole.get(3).toCharArray();
- if (Character.isAlphabetic(pom[3])) {
- rozdelene.add("XXXXX");
- } else
- rozdelene.add((pomocnePole.get(3)));
- } else
- rozdelene.add("XXXXX");
- break;
- }
- case 7: { //ulica, cislo ulice, mesto,psc
- rozdelene.add((pomocnePole.get(0)));
- rozdelene.add(pomocnePole.get(1));
- rozdelene.add(pomocnePole.get(2));
- if (pomocnePole.get(3) != null) {
- char[] pom = pomocnePole.get(3).toCharArray();
- if (Character.isAlphabetic(pom[3])) {
- rozdelene.add("XXXXX");
- } else
- rozdelene.add((pomocnePole.get(3)));
- } else
- rozdelene.add("XXXXX");
- break;
- }
- case 8: { //ulica, cislo ulice, mesto,psc
- rozdelene.add((pomocnePole.get(0)));
- rozdelene.add(pomocnePole.get(1));
- rozdelene.add(pomocnePole.get(2));
- if (pomocnePole.get(3) != null) {
- char[] pom = pomocnePole.get(3).toCharArray();
- if (Character.isAlphabetic(pom[3])) {
- rozdelene.add("XXXXX");
- } else
- rozdelene.add((pomocnePole.get(3)));
- } else
- rozdelene.add("XXXXX");
- break;
- }
- default: {
- // System.out.println(pomocnePole.size());
- rozdelene.add(" ");
- rozdelene.add(" ");
- rozdelene.add(" ");
- rozdelene.add(" ");
- }
- }
- /* System.out.println("_____________________________________________________");
- System.out.println("Ulica: " + rozdelene.get(0));
- System.out.println("Čislo: " + rozdelene.get(1));
- System.out.println("Mesto: " + rozdelene.get(2));
- System.out.println("PSČ: " + rozdelene.get(3));
- System.out.println("_____________________________________________________");*/
- return rozdelene;
- }
- private static ArrayList<String> oddelPredstavitelov(String predstavitel) {
- ArrayList<String> rozdeleniePredstavitelov = new ArrayList<String>();
- if (predstavitel != null) {
- char[] buff = predstavitel.toCharArray();
- int medzera = 0;
- String pom = "";
- for (int i = 0; i < buff.length -1; i++) {
- if (medzera == 0 && buff[i] == '<' && buff[i + 1] == 'b' && buff[i + 2] == 'r' && buff[i + 3] == '>') {
- pom = pom.replaceAll("td width=\"67%\">", "");
- Pattern p = Pattern.compile("člen");
- Pattern p2 = Pattern.compile("predseda");
- Matcher m = p.matcher(pom);
- Matcher m2 = p2.matcher(pom);
- if (!m.find() && !m2.find())
- pom = "člen";
- rozdeleniePredstavitelov.add(pom);
- // System.out.println(pom);
- pom = "";
- medzera++;
- }
- if (buff[i] == '-' && buff[i + 1] == ' ') { //lebo moze v mene byt -
- pom = pom.replaceAll("td width=\"67%\">", "");
- pom = pom.replaceAll("- ", "");
- rozdeleniePredstavitelov.add(pom);
- pom = "";
- }
- if (i == buff.length - 2) {
- rozdeleniePredstavitelov.add(rozdelAdresuVlastnika(pom).get(0));
- rozdeleniePredstavitelov.add(rozdelAdresuVlastnika(pom).get(1));
- rozdeleniePredstavitelov.add(rozdelAdresuVlastnika(pom).get(2));
- rozdeleniePredstavitelov.add(rozdelAdresuVlastnika(pom).get(3));
- pom = "";
- break;
- } else {
- pom = pom + String.valueOf(buff[i]);
- buff[i] = ' ';
- }
- }
- }
- return rozdeleniePredstavitelov;
- }
- private static ArrayList<String> oddelVlastnikov(String vlastnik) {
- ArrayList<String> rozdelenieVlastnikov = new ArrayList<String>();
- if (vlastnik != null) {
- char[] buff = vlastnik.toCharArray();
- String pom = "";
- int medzera = 0;
- for (int i = 1; i < buff.length + 1; i++) {
- if ((medzera == 0) && buff[i] == '<' && buff[i + 1] == 'b' && buff[i + 2] == 'r' && buff[i + 3] == '>') {
- pom = pom.replaceAll("td width=\"67%\">", "");
- rozdelenieVlastnikov.add(pom);
- pom = "";
- medzera++;
- // ;break;
- }
- if (i == buff.length) {
- rozdelenieVlastnikov.add(rozdelAdresuVlastnika(pom).get(0));
- rozdelenieVlastnikov.add(rozdelAdresuVlastnika(pom).get(1));
- rozdelenieVlastnikov.add(rozdelAdresuVlastnika(pom).get(2));
- rozdelenieVlastnikov.add(rozdelAdresuVlastnika(pom).get(3));
- break;
- } else {
- pom = pom + String.valueOf(buff[i]);
- buff[i] = ' ';
- }
- }
- }
- return rozdelenieVlastnikov;
- }
- private static ArrayList<String> oddeldatum(String datumy) {
- ArrayList<String> rozdeleniedatumov = new ArrayList<String>();
- if (datumy != null) {
- char[] buff = datumy.toCharArray();
- String pom = "";
- int medzera = 0;
- for (int i = 1; i < buff.length + 1; i++) {
- if (buff[i] == 'd') {
- pom = pom.replaceAll("\\(", "");
- rozdeleniedatumov.add(pom);
- pom = "";
- medzera++;
- }
- if (medzera == 1 && i == buff.length - 1) {
- pom = pom.replaceAll("do:", "");
- pom = pom.replaceAll("\\(", "");
- pom = pom.replaceAll("\\)", "");
- rozdeleniedatumov.add(pom);
- medzera++;
- break;
- } else {
- pom = pom + String.valueOf(buff[i]);
- buff[i] = ' ';
- }
- }
- }
- return rozdeleniedatumov;
- }
- private static boolean isPredstavenstvo(Elements rows) {
- Element row = rows.get(0);
- Pattern p = Pattern.compile("predstavenstvo");
- Matcher m = p.matcher(row.toString());
- return m.find();
- }
- private static Date editDate(String date) throws ParseException {
- SimpleDateFormat format = new SimpleDateFormat("dd.MM.yyyy");
- java.util.Date parsed = format.parse(date.trim());
- return new Date(parsed.getTime());
- }
- private static ArrayList<String> rozdelMenoATituly(String meno) {
- ArrayList<String> rozdelene = new ArrayList<>();
- //0 - pred
- //1 - meno
- //2 - priezvisko
- //3 - titulza
- char[] buff = meno.toCharArray();
- ArrayList<String> pomocnePole = new ArrayList<>(); //pole, kde si rozdelim data medzi '> <'
- int pos1 = 0;
- int pos2 = 0;
- for (int i = 0; i < buff.length; i++) {
- if (pos1 != 0 && pos2 != 0 && pos1 < pos2) {
- if (meno.substring(pos1 + 1, pos2).length() > 2) { //kvoli htmltagom kde su medzeri medzi nimi len
- pomocnePole.add(meno.substring(pos1 + 1, pos2));
- pos1 = 0;
- pos2 = 0;
- }
- }
- if (buff[i] == '>') {
- pos1 = i;
- }
- if (buff[i] == '<') {
- pos2 = i;
- }
- }
- switch (pomocnePole.size()) {
- case 2: { //meno a priezvisko
- rozdelene.add(" ");
- rozdelene.add(pomocnePole.get(0));
- rozdelene.add(pomocnePole.get(1));
- rozdelene.add(" ");
- break;
- }
- case 3: { //meno a priezvisko a 1 titul bud pred alebo za
- if (pomocnePole.get(0).contains(".") || pomocnePole.get(0).contains(",")) {
- rozdelene.add(pomocnePole.get(0));
- rozdelene.add(pomocnePole.get(1));
- rozdelene.add(pomocnePole.get(2));
- rozdelene.add(" ");
- } else {
- rozdelene.add(" ");
- rozdelene.add(pomocnePole.get(0));
- rozdelene.add(pomocnePole.get(1));
- rozdelene.add(pomocnePole.get(2));
- }
- break;
- }
- case 4: { //meno a priezvisko a titul pred aj za
- rozdelene.add((pomocnePole.get(0)));
- rozdelene.add(pomocnePole.get(1));
- rozdelene.add(pomocnePole.get(2));
- rozdelene.add((pomocnePole.get(3)));
- break;
- }
- default: {
- rozdelene.add(" ");
- rozdelene.add(" ");
- rozdelene.add(" ");
- rozdelene.add(" ");
- }
- }
- // System.out.println("_____________________________________________________");
- System.out.println("Titil pred: " + rozdelene.get(0));
- System.out.println("Meno: " + rozdelene.get(1));
- System.out.println("Priezvisko: " + rozdelene.get(2));
- System.out.println("Titul za:" + rozdelene.get(3));
- // System.out.println("_____________________________________________________");
- return rozdelene;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement