Advertisement
Pytypaldy

Crawler - Tahanovce, Darg hrdinov

Dec 18th, 2017
103
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 1.74 KB | None | 0 0
  1. package core;
  2.  
  3. import java.io.BufferedReader;
  4. import java.io.BufferedWriter;
  5. import java.io.File;
  6. import java.io.FileOutputStream;
  7. import java.io.IOException;
  8. import java.io.InputStreamReader;
  9. import java.io.OutputStreamWriter;
  10. import java.net.HttpURLConnection;
  11. import java.net.URL;
  12. import java.util.ArrayList;
  13.  
  14. public class Crawler {
  15.  
  16.     private static void writeFile1(ArrayList<String> page, String fileName) throws IOException {
  17.         File fout = new File("Tahanovce/"+fileName);
  18.         FileOutputStream fos = new FileOutputStream(fout);
  19.  
  20.         BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fos));
  21.  
  22.         for (String s : page) {
  23.             bw.write(s);
  24.             bw.newLine();
  25.         }
  26.  
  27.         bw.close();
  28.     }
  29.  
  30.     private static void DownloadPages() throws IOException {
  31.         String base = "http://www.tahanovce.net/documents/info/?id=";
  32.         for (int i = 2418; i > 0 ; i--) {
  33.             getHtml(base, i);
  34.         }
  35.        
  36.     }
  37.    
  38.     public static void getHtml(String url, Integer index)throws IOException {
  39.         ArrayList<String> page = new ArrayList<>();
  40.         URL obj = new URL(url+index);
  41.         HttpURLConnection con = (HttpURLConnection) obj.openConnection();
  42.         con.setRequestMethod("GET");
  43.  
  44.         int responseCode = con.getResponseCode();
  45.         System.out.println("Sending 'GET' request to URL : " + obj);
  46.         System.out.println("Response Code : " + responseCode);
  47.  
  48.         BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream()));
  49.         String inputLine;
  50.  
  51.         while ((inputLine = in.readLine()) != null) {
  52.             page.add(inputLine);
  53.         }
  54.         in.close();
  55.         StringBuffer name = new StringBuffer();
  56.         name.append(index.toString());
  57.         name.append(".html");
  58.         writeFile1(page, name.toString());
  59.     }
  60.  
  61.     public static void main(String[] args) throws IOException {
  62.         DownloadPages();
  63.     }
  64.  
  65. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement