Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package test;
- /**
- *
- * @author User
- */
- import java.io.BufferedReader;
- import java.io.File;
- import java.io.FileNotFoundException;
- import java.io.FileOutputStream;
- import java.io.IOException;
- import java.io.InputStreamReader;
- import java.io.OutputStream;
- import java.io.OutputStreamWriter;
- import java.io.PrintWriter;
- import java.io.Writer;
- import static java.lang.Thread.sleep;
- import java.net.HttpURLConnection;
- import java.net.MalformedURLException;
- import java.net.URL;
- import java.net.URLConnection;
- import java.util.Scanner;
- public class AllegroCoinsSearcher {
- public static void saveStringBuilderToCSV(StringBuilder string, String nameOfOutputFile) throws FileNotFoundException, IOException
- {
- OutputStream outputStream = new FileOutputStream(nameOfOutputFile+".csv");
- Writer outputStreamWriter = new OutputStreamWriter(outputStream);
- outputStreamWriter.write(string.toString());
- outputStreamWriter.close();
- }
- public static void makeSomeMoreLinks(File file) throws FileNotFoundException, IOException
- {
- StringBuilder collectedValue = new StringBuilder();
- String filters = "?order=p&allegro-smart-standard=1&stan=nowe&super-sprzedawca=1&monety=1&offerTypeBuyNow=1&price_to=250";
- String pagination = "&p=";
- int howManyPages = 10;
- try
- {
- Scanner read = new Scanner(file);
- String readLine = "";
- while ((readLine = read.nextLine()) != null)
- {
- for (int i = 1; i < howManyPages; i++)
- {
- collectedValue.append(readLine+filters+pagination+i+"\n");
- }
- }
- }
- catch (Exception e)
- {
- System.out.println("Error");
- }
- System.out.println("CollectedValue:");
- System.out.println(collectedValue);
- saveStringBuilderToCSV(collectedValue,"więcejLinków");
- }
- public static void main(String[] args) throws MalformedURLException, IOException
- {
- // String inputLine;
- // URL link = new URL("https://allegro.pl/kategoria/smartfony-i-telefony-komorkowe-165?price_to=500&offerTypeBuyNow=1&allegro-smart-standard=1&monety=1&super-sprzedawca=1&order=p");
- // URLConnection con = link.openConnection();
- // con.setRequestProperty("User-Agent", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.2.2) Gecko/20100316 Firefox/3.6.2");
- //
- // BufferedReader in = new BufferedReader( new InputStreamReader(con.getInputStream(),"UTF-8"));
- // int i = 1;
- // while ((inputLine = in.readLine()) != null)
- // {
- // System.out.println("Linijka " + i + ": " + inputLine);
- // i++;
- // }
- // in.close();
- //
- File plik = new File("input.txt");
- makeSomeMoreLinks(plik);
- Scanner read = new Scanner(new File("input.txt"));
- StringBuilder savedValues = new StringBuilder();
- savedValues.append("Link;Ilość monet;Cena\n");
- int index1, index2, index3; // zmienne liczbowe do wyszukiwania linków
- String pharse1, pharse2, pharse3; // zmienne tekstowe od wyszukiwania linków
- String pharse4, pharse5, pharse6; // zmienne tekstowe do wyszukiwania ilości monet
- int index4, index5; // zmienne liczbowe do wyszukiwania ilości monet
- String inputLine; // zmienna wykorzystywana do przechowywania wczytanej lini z kodu źrodłowego strony
- String readLine = "";
- try
- {
- while ((readLine = read.nextLine()) != null)
- {
- System.out.println("Wczytana linia tekstu to: " + readLine); // readLine to link do strony
- URL link = new URL(readLine);
- URLConnection con = link.openConnection();
- con.setRequestProperty("User-Agent", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.2.2) Gecko/20100316 Firefox/3.6.2");
- BufferedReader in = new BufferedReader( new InputStreamReader(con.getInputStream(),"UTF-8"));
- int i = 1;
- while ((inputLine = in.readLine()) != null)
- {
- i++;
- String text = inputLine;
- int helpfulVariable = 0;
- while(helpfulVariable != -1)
- {
- pharse1 = "https://allegro.pl/";
- pharse2 = ".html";
- index1 = text.indexOf(pharse1, helpfulVariable);
- if(index1 != -1)
- {
- System.out.println("Znaleziono HTTPS. Nr indeksu to: " + index1);
- index2 = text.indexOf(pharse2, index1);
- System.out.println("Szukam .html");
- if(index2 != -1)
- {
- System.out.println("Znaleziono .html na miejscu: " + index2);
- pharse3 = text.substring(index1, index2+5);
- System.out.println(pharse3);
- helpfulVariable = index2;
- if(pharse3.length()<150)
- {
- // Tutaj rozpoczniemy szukanie MONET
- pharse4 = "otrzymasz<!-- --> <!-- -->";
- pharse5 = "</span>";
- System.out.println("Szukam frazy: otrzymasz<!-- --> <!-- -->");
- index4 = text.indexOf(pharse4, helpfulVariable);
- if(index4 != -1)
- {
- System.out.println("Znaleziono: otrzymasz<!-- --> <!-- -->");
- System.out.println("Miejsce znalezionego indexu dla otrzymasz to: " + index4);
- index5 = text.indexOf(pharse5, index4);
- System.out.println("Miejsce znalezionego indexu dla SPAN to: " + index5);
- pharse6 = text.substring(index4+pharse4.length(),index5);
- System.out.println("Znaleziona fraza od otrzymasz -> SPAN to: " + pharse6);
- helpfulVariable = index5;
- if(pharse5.length()<45)
- {
- savedValues.append(pharse3);
- savedValues.append(";");
- savedValues.append(pharse6);
- savedValues.append(";\n");
- }
- }
- }
- }
- else
- {
- helpfulVariable = -1;
- }
- }
- else
- {
- System.out.println("W tej linii tekstu nie znaleziono HTTPS.");
- helpfulVariable = -1;
- }
- }
- }
- sleep(2000);
- }
- }
- catch (Exception e)
- {
- System.out.println("Koniec wczytywania z pliku.");
- };
- System.out.println("-----Wynik:-----");
- System.out.println(savedValues);
- // // Zapis
- // OutputStream outputStream = new FileOutputStream("outputTERAZ.csv");
- // Writer outputStreamWriter = new OutputStreamWriter(outputStream);
- // outputStreamWriter.write(savedValues.toString());
- // outputStreamWriter.close();
- saveStringBuilderToCSV(savedValues,"gotowyArkusz");
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement