Jakebot code

Jakebot code below...
package jakebot;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;

/**
 * A bot to tag new unreferenced articles on the English Wikipedia.
 * @author King jakob c 2
 */
public class Jakebot {

    /**
     * @param args the command line arguments
     */
    public static void main(String[] args) throws MalformedURLException, IOException {
        //Special:NewPages
        //Lines 21-30 shamelessly lifted and adapted from
        //http://stackoverflow.com/questions/6188901/reading-the-content-of-web-page
        URL newpages = new URL("https://en.wikipedia.org/w/index.php?title=Speci"
                + "al:NewPages&offset=&limit=500");
        BufferedReader newpagesreader = new BufferedReader(
                new InputStreamReader(newpages.openStream()));
        String inputLine;
        String wholepage = ""; //This will contain the HTML of Special:NewPages

        while ((inputLine = newpagesreader.readLine()) != null) {
            wholepage += inputLine;
        }

        //The names of the 500 newest articles
        String[] newpageslist = new String[500];

        //Each <li> tag except for the first 5 <li> tags preceeds an article
        int litags = 0;
        int newpagesfilled = 0;
        for (int i = 0; i < wholepage.length() - 4; i++) {
            if (wholepage.charAt(i) == '<' && wholepage.charAt(i + 1) == 'l'
                    && wholepage.charAt(i + 2) == 'i' && wholepage.charAt(i + 3) == '>') {
                litags++;

                if (litags > 5) {
                    //The content between 32 characters after the <li>, and the
                    //next & sign is the name of the article.
                    newpageslist[newpagesfilled] = parseFromNewPages
        (wholepage.substring(i));
                    newpagesfilled++;
                }
            }

        }

        //Checking if each page is unreferenced and then tagging it if so.
        for (int i = 0; i < newpageslist.length; i++) {
            //For some reason, there are a bunch of "null"s in the newpageslist.
            //Want to avoid those.
            if (!newpageslist[i].equals("null")) {
                //Loading up the edit window of a page to get the wiki markup.
                URL anewpage = new URL("https://en.wikipedia.org/w/index.php?ti"
                        + "tle=" + newpageslist[i] + "&action=edit");
                BufferedReader pagereader = new BufferedReader(
                        new InputStreamReader(anewpage.openStream()));
                String inputLine2;
                String article = "";

                while ((inputLine2 = pagereader.readLine()) != null) {
                    article += inputLine2;
                }

                //Cleanarticle = the page with the wiki markup, not HTML.
                String cleanarticle = parseArticle(article);

                //Use the APISandbox to tag as unreferenced, assuming it is.
                if(isEligibleForTagging(cleanarticle)){
                Process p=Runtime.getRuntime().exec("cmd /c start " +
                        "https://en.wikipedia.org/w/api.php?action=edit&format="
                        + "json&title=" + cleanarticle+"&summary=Tagging unref"
                        + "erenced article(%5B%5BWP%3ABOT%7CBot%20edit"
                        + "%5D%5D)&bot=&appendtext={{Unreferenced}}&assert=bot&"
                        + "prop=info");
                p.destroy(); //and close the window
                }
            }
        }
    }

    /**
     * Parses out an article title from the HTML in Special:NewPages
     * @param s a piece of the HTML of Special:NewPages
     * @return A properly formatted article name
     */
    public static String parseFromNewPages(String s) {
        String cleanpagename = ""; //this will be returned
        //There are 32 characters between the <li> and the start of the article
        //title.
        for (int i = 32; i < s.length(); i++) {
            //Add characters to cleanpagename until we hit the & sign.
            if (s.charAt(i) == '&') {
                return cleanpagename;
            } else {
                cleanpagename += s.charAt(i);
            }
        }
        return ""; //this should not be reached
    }

    /**
     * Gets the wiki markup content of an article from the HTML of the edit window
     * @param article the HTML of the edit window of an article
     * @return wiki markup of an article
     */
    public static String parseArticle(String article) {
        String articlecontent = "";
        //Begin here.
        int beginpage = article.indexOf('"' + "wpTextbox1" + '"' + ">");

        //Adding the wiki markup
        while (true) {
            articlecontent += article.charAt(beginpage+13);
            beginpage++;
            if(articlecontent.contains("</textarea>")){
                return articlecontent;
            }
        }
    }

    /**
     * Check if the bot should tag the page as unreferenced or not
     * @param article the wiki markup of an article
     * @return true if the article should be tagged as unreferenced
     */
    public static boolean isEligibleForTagging(String article){
        //If there is no reflist, external links, or further reading, return true.
        //Return false if it is already tagged is a unreferenced, if it is a disambiguation page,
        //or if it is up for speedy deletion.
        article = article.toLowerCase();
        if(!(article.contains("reflist") && article.contains("<ref>")) &&
                !article.contains("==referneces==") && !article.contains("http")
                && !article.contains("further reading") && !article.contains("{{db")
        && !article.contains("{{unreferenced")
                && !article.contains("{{bots}}") &&
                !article.contains("disambiguation")){
            return true;
        }
        return false;
    }
}