JSoup SCP title fetcher

//This is a simple program in Java using JSoup to grab a list of text from the SCP Wiki website and save it to a local text file
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Scanner;

import org.jsoup.Connection.Response;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

public class JSoupSCP {

    //Create important global variables
    static int maxSCP = 5010; //This value determines the final SCP we write to our file at the end. Any titles that come after it are discarded
    static int listSize = (int) (1000 * (1 + Math.floor(maxSCP / 1000))); //Used to determine the length of our required arrays based on maxSCP
    static int bigSize = 0; //bigSize denotes the point at which the method getSCPs last left off so the individual results from each call
                            //can be combined without any gaps or overlap. Starts at 0 for the first execution
    static String[] bigList = new String[listSize]; //bigList is the combined list of all titles we fetch from each getSCPs method call
    static String[] oldbigList = new String[listSize]; //oldbigList ist the same as bigList but for the results from the latest saved txt file

    public static void main(String[] args) throws IOException {

        System.out.println("Number of articles to fetch: " + listSize);

        //Set the path for our resulting text-file
        String path = "E:\\User\\Downloads\\SCP titles.txt";

        //getSCPs is designed to fetch titles from only one page, so we call it once for every series of SCPs, passing the number of SCPs to fetch and
        //also the page number (empty for the first series)
        getSCPs("", 999);
        getSCPs("-2", 1000);
        getSCPs("-3", 1000);
        getSCPs("-4", 1000);
        getSCPs("-5", 1000);
        getSCPs("-6", 1000);

        System.out.println("");

        //Check if the file in our specified path already exists. If it does, read the file and compare its contents to our bigList
        File txtFile = new File(path);
        if(txtFile.exists()) {
            //readFromFile reads the contents of the existing txt file which will be overwritten at the end of the program
            readFromFile(path);
            //compareLists compares the data from the file to the newly fetched data and points out any changes
            compareLists();
        }

        //saveToFile saves the fetched titles into a local txt file for further use, the path of which is specified here
        saveToFile(path);

    }

    //getSCPs is the most important part of the program because it finds and organizes the data we want from the internet
    public static void getSCPs(String page, int length) throws IOException {

        System.out.println("Fetching data from http://www.scp-wiki.net/scp-series" + page + "...");

        //First fetch text from the SCP Wiki, the targeted page being determined by the value passed from the main when calling the method
        //This code is copied from an online post to avoid error code 500 which suddenly started appearing
        org.jsoup.Connection con = Jsoup.connect("http://www.scp-wiki.net/scp-series" + page).userAgent("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.21 (KHTML, like Gecko) Chrome/19.0.1042.0 Safari/535.21");
        con.timeout(180000).ignoreHttpErrors(true).followRedirects(true);
        Response resp = con.execute();
        Document doc = null;
        if (resp.statusCode() == 200) {
            doc = con.get();
        }

        //This is the way I used to do it. It stopped working consistently for some reason
        //Document doc = Jsoup.connect("http://www.scp-wiki.net/scp-series" + page).get();

        //Only use text from the html class "content-panel standalone series"
        Elements tags = doc.getElementsByClass("content-panel standalone series");
        //Only fetch elements with the tag "ul" (unordered list) within this class. These contain the individual SCPs as list items (tagged as "li")
        tags = tags.get(0).getElementsByTag("ul");

        //Create a new array with the length we need for our titles (passed over from the main)
        String[] list = new String[length];

        //Now pass over the "li" elements from the elements array "tags" to the new array "list"
        for(int i = 0, totalSize = 0; i + 1 < tags.size(); i++) {
            Elements tags2 = tags.get(i + 1).getElementsByTag("li");
            int size = tags2.size();
            //Each "ul" tag contains 100 SCPs (99 for the first batch of series 1) in the form of "li" elements. This is why we need two loops:
            //The main loop goes through each "ul" element from "tags" and the second loop saves all the "li" elements within into our new array
            for (int a = 0; a < size; a++) {
                list[a + totalSize] = tags2.get(a).text() + ";;"; //We also add ";;" to the end of each line for easier parsing later
            }

            //Update "totalSize" with the amount of list elements we extracted in the current loop so the next loop can continue where this one left off
            totalSize = totalSize + size;
        }

        System.out.println("Cleaning up...");

        //This loop here is used to remove excessive or problematic characters and lines so our end result is consistent and only contains what we want
        //It also saves any lines from the list array that aren't faulty into our final array bigList
        for(int i = 0; i < list.length; i++) {
            if(list[i] != null) {
                if(list[i].contains(" - ")) {
                    list[i] = list[i].substring(list[i].indexOf(" - ")+1);
                    if(list[i].startsWith("- ")) {
                        list[i] = list[i].substring(2);
                    }
                    list[i] = list[i].replace("\"", "'");
                }
                bigList[i + bigSize] = list[i];
            }

        }

        //Update the bigSize variable so the program knows which array index we stopped at
        //The next time we call getSCPs it will save new elements to indexes past this point
        bigSize = bigSize + list.length;

    }

    //For further use of the data we gathered we write it into a txt file
    public static void saveToFile(String path) throws IOException {

        //Initiate the FileWriter with the designated file path from our main
        FileWriter f = new FileWriter(path);
        BufferedWriter bW = new BufferedWriter(f);

        System.out.println("Saving data to file...");

        //Go through our bigList array and write down all lines until we hit the maxSCP number
        for(int i = 0; i < maxSCP; i++) {
            bW.write(bigList[i]);
            bW.newLine();
        }

        bW.close();

        System.out.println("File overwritten successfully!");

    }

    //Get the lines from the existing txt file and save them into the array oldBigList
    public static void readFromFile(String path) throws IOException {

        //Start the scanner with our specified path from the main
        File f = new File(path);
        Scanner s = new Scanner(f);

        System.out.println("Reading file content...");

        //Go through the lines and save them into the array
        for(int i = 0; s.hasNext(); i++) {
            oldbigList[i] = s.nextLine();
        }

        s.close();

    }

    //Compare bigList to oldBigList and point out any differences
    public static void compareLists() {

        System.out.println("Comparing new data to old file...");

        //Compare the elements of both arrays in order and give out an alert, if an entry's title has changed
        for(int i = 0; i < maxSCP; i++) {
            if(bigList[i].equals(oldbigList[i]) ) {}
            else {
                System.out.println("Changes to SCP " + (i + 1) + " detected!");
            }
        }

        System.out.println("");

    }

}