Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package core;
- import java.io.BufferedReader;
- import java.io.BufferedWriter;
- import java.io.File;
- import java.io.FileOutputStream;
- import java.io.IOException;
- import java.io.InputStreamReader;
- import java.io.OutputStreamWriter;
- import java.net.HttpURLConnection;
- import java.net.URL;
- import java.util.ArrayList;
- public class Crawler {
- private static void writeFile1(ArrayList<String> page, String fileName) throws IOException {
- File fout = new File("Tahanovce/"+fileName);
- FileOutputStream fos = new FileOutputStream(fout);
- BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fos));
- for (String s : page) {
- bw.write(s);
- bw.newLine();
- }
- bw.close();
- }
- private static void DownloadPages() throws IOException {
- String base = "http://www.tahanovce.net/documents/info/?id=";
- for (int i = 2418; i > 0 ; i--) {
- getHtml(base, i);
- }
- }
- public static void getHtml(String url, Integer index)throws IOException {
- ArrayList<String> page = new ArrayList<>();
- URL obj = new URL(url+index);
- HttpURLConnection con = (HttpURLConnection) obj.openConnection();
- con.setRequestMethod("GET");
- int responseCode = con.getResponseCode();
- System.out.println("Sending 'GET' request to URL : " + obj);
- System.out.println("Response Code : " + responseCode);
- BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream()));
- String inputLine;
- while ((inputLine = in.readLine()) != null) {
- page.add(inputLine);
- }
- in.close();
- StringBuffer name = new StringBuffer();
- name.append(index.toString());
- name.append(".html");
- writeFile1(page, name.toString());
- }
- public static void main(String[] args) throws IOException {
- DownloadPages();
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement