Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package core;
- import java.io.BufferedReader;
- import java.io.BufferedWriter;
- import java.io.File;
- import java.io.FileReader;
- import java.io.FileWriter;
- import java.io.IOException;
- public class ParserDargovskychHrdinov {
- public static String removeTags(String line) {
- line = line.trim();
- line = line.replaceAll("\\<.*?\\>", "");
- return line;
- }
- public static String[] zmluvnaStranaParser(String line) {
- line = line.replace("Odberateľ:", "");
- line = line.replace("Dodávateľ:", ",");
- String[] parts = line.split(",");
- for (int i = 0; i < parts.length; i++) {
- parts[i] = parts[i].trim();
- }
- return parts;
- }
- public static String createHeader() {
- StringBuffer head = new StringBuffer();
- head.append("CREATE TABLE IF NOT EXISTS fakturyDargov (");
- head.append("id int,");
- head.append("typ_faktury varchar(255),");
- head.append("splatnost varchar(255),");
- head.append("datum_vystavenia varchar(255),");
- head.append("datum_evidencie varchar(255),");
- head.append("datum_zdan_plnenia varchar(255),");
- head.append("viaze_sa_k varchar(255),");
- head.append("sposob_platby varchar(255),");
- head.append("odberatel_nazov varchar(255),");
- head.append("odberatel_ico varchar(255),");
- head.append("odberatel_adresa varchar(255),");
- head.append("odberatel_mesto varchar(255),");
- head.append("odberatel_psc varchar(255),");
- head.append("dodavatel_nazov varchar(255),");
- head.append("dodavatel_ico varchar(255),");
- head.append("dodavatel_adresa varchar(255),");
- head.append("dodavatel_mesto varchar(255),");
- head.append("dodavatel_psc varchar(255),");
- head.append("predmet varchar(255),");
- head.append("suma_s_DPH varchar(255),");
- head.append("mena varchar(255),");
- head.append("vystavena varchar(255),");
- head.append("poznamka varchar(255)");
- head.append(");");
- head.append('\n');
- head.append("INSERT INTO fakturyDargov (id, typ_faktury, splatnost, datum_vystavenia, datum_evidencie, datum_zdan_plnenia, ");
- head.append("viaze_sa_k, sposob_platby, odberatel_nazov, odberatel_ico, odberatel_adresa, odberatel_mesto, odberatel_psc, ");
- head.append("dodavatel_nazov, dodavatel_ico, dodavatel_adresa, dodavatel_mesto, dodavatel_psc, predmet, suma_s_DPH, mena, vystavena, poznamka)");
- head.append('\n');
- head.append("VALUES");
- return head.toString();
- }
- public static void appendLine(String line) {
- BufferedWriter bw = null;
- FileWriter fw = null;
- try {
- File file = new File("83612-63612.sql");
- if (!file.exists()) {
- file.createNewFile();
- }
- fw = new FileWriter(file.getAbsoluteFile(), true);
- bw = new BufferedWriter(fw);
- bw.write(line);
- bw.write('\n');
- } catch (IOException e) {
- e.printStackTrace();
- } finally {
- try {
- if (bw != null)
- bw.close();
- if (fw != null)
- fw.close();
- } catch (IOException ex) {
- ex.printStackTrace();
- }
- }
- }
- public static void read(String name, Integer id) {
- try (BufferedReader br = new BufferedReader(new FileReader(name))) {
- Integer write = 0;
- Integer write2 = 0;
- StringBuffer lineOut = new StringBuffer();
- lineOut.append("(");
- String prePreviousLine = "";
- String previousLine = "";
- String currentLine;
- lineOut.append("'");
- lineOut.append(String.valueOf(id));
- lineOut.append("',");
- while ((currentLine = br.readLine()) != null) {
- if (previousLine.contains("<td><strong>Typ faktúry</strong></td>")) {
- String line = removeTags(currentLine);
- lineOut.append("'");
- lineOut.append(line);
- lineOut.append("',");
- if (!removeTags(line).equals(""))
- write++;
- }
- if (previousLine.contains("<td><strong>Splatnosť</strong></td>")) {
- String line = removeTags(currentLine);
- lineOut.append("'");
- lineOut.append(line);
- lineOut.append("',");
- if (!removeTags(line).equals(""))
- write++;
- }
- if (previousLine.contains("<td><strong>Dátum vystavenia</strong></td>")) {
- String line = removeTags(currentLine);
- lineOut.append("'");
- lineOut.append(line);
- lineOut.append("',");
- if (!removeTags(line).equals(""))
- write++;
- }
- if (previousLine.contains("<td><strong>Dátum evidencie</strong></td>")) {
- String line = removeTags(currentLine);
- lineOut.append("'");
- lineOut.append(line);
- lineOut.append("',");
- if (!removeTags(line).equals(""))
- write++;
- }
- if (previousLine.contains("<td><strong>Dátum zdan. plnenia</strong></td>")) {
- String line = removeTags(currentLine);
- lineOut.append("'");
- lineOut.append(line);
- lineOut.append("',");
- if (!removeTags(line).equals(""))
- write++;
- }
- if (previousLine.contains("<td><strong>Viaže sa k</strong></td>")) {
- String line = removeTags(currentLine);
- lineOut.append("'");
- lineOut.append(line);
- lineOut.append("',");
- if (!removeTags(line).equals(""))
- write++;
- }
- if (previousLine.contains("<td><strong>Spôsob platby</strong></td>")) {
- String line = removeTags(currentLine);
- lineOut.append("'");
- lineOut.append(line);
- lineOut.append("',");
- if (!removeTags(line).equals(""))
- write++;
- }
- if (prePreviousLine.contains("<td><strong>Zmluvná strana</strong></td>")) {
- String[] parts = zmluvnaStranaParser(removeTags(currentLine));
- for (int i = 0; i < parts.length; i++) {
- lineOut.append("'");
- lineOut.append(parts[i]);
- lineOut.append("',");
- }
- write2 = parts.length;
- }
- if (previousLine.contains("<td><strong>Predmet</strong></td>")) {
- String line = removeTags(currentLine);
- lineOut.append("'");
- lineOut.append(line);
- lineOut.append("',");
- if (!removeTags(line).equals(""))
- write++;
- }
- if (previousLine.contains("<td><strong>Suma s DPH</strong></td>")) {
- String line = removeTags(currentLine);
- lineOut.append("'");
- lineOut.append(line);
- lineOut.append("',");
- if (!removeTags(line).equals(""))
- write++;
- }
- if (previousLine.contains("<td><strong>Mena</strong></td>")) {
- String line = removeTags(currentLine);
- lineOut.append("'");
- lineOut.append(line);
- lineOut.append("',");
- if (!removeTags(line).equals(""))
- write++;
- }
- if (previousLine.contains("<td><strong>Vystavená</strong></td>")) {
- String line = removeTags(currentLine);
- lineOut.append("'");
- lineOut.append(line);
- lineOut.append("',");
- if (!removeTags(line).equals(""))
- write++;
- }
- if (previousLine.contains("<td><strong>Poznámka</strong></td>")) {
- String line = removeTags(currentLine);
- lineOut.append("'");
- lineOut.append(line);
- lineOut.append("'");
- if (!removeTags(line).equals(""))
- write++;
- }
- prePreviousLine = previousLine;
- previousLine = currentLine;
- }
- lineOut.append("),");
- if (write > 4 && write2 == 10)
- appendLine(lineOut.toString());
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- public static void readAll() {
- for (int i = 83612; i > 63612; i--) {
- read("DargovskychHrdinov/83612-63612/" + String.valueOf(i) + ".html", i);
- System.out.println(i);
- }
- }
- public static void main(String[] args) {
- appendLine(createHeader());
- readAll();
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement