Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package core;
- import java.io.BufferedReader;
- import java.io.BufferedWriter;
- import java.io.File;
- import java.io.FileReader;
- import java.io.FileWriter;
- import java.io.IOException;
- public class ParserTahanovce {
- public static String removeTags(String line) {
- line = line.trim();
- line = line.replaceAll("\\<.*?\\>", "");
- return line;
- }
- public static String[] zmluvnaStranaParser(String line) {
- line = line.replace("Odberateľ:", "");
- line = line.replace("Dodávateľ:", ",");
- String[] parts = line.split(",");
- for (int i = 0; i < parts.length; i++) {
- parts[i] = parts[i].trim();
- }
- return parts;
- }
- public static String createHeader() {
- StringBuffer head = new StringBuffer();
- head.append("CREATE TABLE IF NOT EXISTS fakturyTahanovce (");
- head.append("id int,");
- head.append("typ_dokumentu varchar(255),");
- head.append("predmet varchar(255),");
- head.append("datum_zverejnenia varchar(255),");
- head.append("datum_vystavenia varchar(255),");
- head.append("datum_prijatia varchar(255),");
- head.append("obchodne_meno varchar(255),");
- head.append("ico varchar(255),");
- head.append("adresa varchar(255),");
- head.append("suma varchar(255),");
- head.append("vratane_dph varchar(255)");
- head.append(");");
- head.append('\n');
- head.append("INSERT INTO fakturyTahanovce (id, typ_dokumentu, predmet, datum_zverejnenia, datum_vystavenia, datum_prijatia, ");
- head.append("obchodne_meno, ico, adresa, suma, vratane_dph)");
- head.append('\n');
- head.append("VALUES");
- return head.toString();
- }
- public static void appendLine(String line) {
- BufferedWriter bw = null;
- FileWriter fw = null;
- try {
- File file = new File("tahanovce.sql");
- if (!file.exists()) {
- file.createNewFile();
- }
- fw = new FileWriter(file.getAbsoluteFile(), true);
- bw = new BufferedWriter(fw);
- bw.write(line);
- bw.write('\n');
- } catch (IOException e) {
- e.printStackTrace();
- } finally {
- try {
- if (bw != null)
- bw.close();
- if (fw != null)
- fw.close();
- } catch (IOException ex) {
- ex.printStackTrace();
- }
- }
- }
- public static void read(String name, Integer id) {
- try (BufferedReader br = new BufferedReader(new FileReader(name))) {
- Integer write = 0;
- StringBuffer lineOut = new StringBuffer();
- lineOut.append("(");
- String previousLine = "";
- String currentLine;
- lineOut.append("'");
- lineOut.append(String.valueOf(id));
- lineOut.append("',");
- while ((currentLine = br.readLine()) != null) {
- if (previousLine.contains("<td class=\"label\">Typ dokumentu: </td>")) {
- String line = removeTags(currentLine);
- lineOut.append("'");
- lineOut.append(line);
- lineOut.append("',");
- if (!removeTags(line).equals(""))
- write++;
- }
- if (previousLine.contains("<td class=\"label\">Predmet: </td>")) {
- String line = removeTags(currentLine);
- lineOut.append("'");
- lineOut.append(line);
- lineOut.append("',");
- if (!removeTags(line).equals(""))
- write++;
- }
- if (previousLine.contains("<td class=\"label\">Dátum zverejnenia: </td>")) {
- String line = removeTags(currentLine);
- lineOut.append("'");
- lineOut.append(line);
- lineOut.append("',");
- if (!removeTags(line).equals(""))
- write++;
- }
- if (previousLine.contains("<td class=\"label\">Dátum vystavenia: </td>")) {
- String line = removeTags(currentLine);
- lineOut.append("'");
- lineOut.append(line);
- lineOut.append("',");
- if (!removeTags(line).equals(""))
- write++;
- }
- if (previousLine.contains("<td class=\"label\">Dátum prijatia: </td>")) {
- String line = removeTags(currentLine);
- lineOut.append("'");
- lineOut.append(line);
- lineOut.append("',");
- if (!removeTags(line).equals(""))
- write++;
- }
- if (previousLine.contains("<td class=\"label\">Objednávku podpísal: </td>")) {
- String line = removeTags(currentLine);
- lineOut.append("'");
- lineOut.append(line);
- lineOut.append("',");
- if (!removeTags(line).equals(""))
- write++;
- }
- if (previousLine.contains("<td class=\"label\">Obchodné meno: </td>")) {
- String line = removeTags(currentLine);
- lineOut.append("'");
- lineOut.append(line);
- lineOut.append("',");
- if (!removeTags(line).equals(""))
- write++;
- }
- if (previousLine.contains("<td class=\"label\">IČO: </td>")) {
- String line = removeTags(currentLine);
- lineOut.append("'");
- lineOut.append(line);
- lineOut.append("',");
- if (!removeTags(line).equals(""))
- write++;
- }
- if (previousLine.contains("<td class=\"label\">Adresa: </td>")) {
- String line = removeTags(currentLine);
- lineOut.append("'");
- lineOut.append(line);
- lineOut.append("',");
- if (!removeTags(line).equals(""))
- write++;
- }
- if (previousLine.contains("<td class=\"label\">Suma: </td>")) {
- String line = removeTags(currentLine);
- lineOut.append("'");
- lineOut.append(line);
- lineOut.append("',");
- if (!removeTags(line).equals(""))
- write++;
- }
- if (previousLine.contains("<td class=\"label\">Vrátane DPH: </td>")) {
- String line = removeTags(currentLine);
- lineOut.append("'");
- lineOut.append(line);
- lineOut.append("'");
- if (!removeTags(line).equals(""))
- write++;
- }
- previousLine = currentLine;
- }
- lineOut.append("),");
- System.out.println(write);
- if (write == 10)
- appendLine(lineOut.toString());
- System.out.println(lineOut.toString());
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- public static void readAll() {
- for (int i = 2418; i > 0; i--) {
- read("Tahanovce/" + String.valueOf(i) + ".html", i);
- System.out.println(i);
- }
- }
- public static void main(String[] args) {
- appendLine(createHeader());
- readAll();
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement