package jakebot; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; /** * A bot to tag new unreferenced articles on the English Wikipedia. * * @author King jakob c 2 */ public class Jakebot { /** * @param args the command line arguments */ public static void main(String[] args) throws MalformedURLException, IOException { //Special:NewPages //Lines 21-30 shamelessly lifted and adapted from //http://stackoverflow.com/questions/6188901/reading-the-content-of-web-page URL newpages = new URL("https://en.wikipedia.org/w/api.php?action=query" + "&list=recentchanges&rctype=new&rcprop=title|timestamp&" + "rcnamespace=0&rclimit=500"); BufferedReader newpagesreader = new BufferedReader( new InputStreamReader(newpages.openStream())); String inputLine; String wholepage = ""; //This will contain the code of the new pages API //query. while ((inputLine = newpagesreader.readLine()) != null) { wholepage += inputLine; } //The names of the 500 newest articles String[] newpageslist = new String[500]; int newpagesfilled = 0; for (int i = 0; i < wholepage.length() - 4; i++) { if (wholepage.charAt(i) == 't' && wholepage.charAt(i + 1) == 'i' && wholepage.charAt(i + 2) == 't' && wholepage.charAt(i + 3) == 'l' && wholepage.charAt(i + 4) == 'e' && wholepage.charAt(i + 5) == '=') { newpageslist[newpagesfilled] = parseFromNewPages(wholepage.substring(i)); newpagesfilled++; } } //Checking if each page is a unreferenced and then tagging it //if it is unreferenced. for (int i = 0; i < newpageslist.length; i++) { //For some reason, there are a bunch of "null"s in the newpageslist. //Want to avoid those. if (!newpageslist[i].equals("null")) { //Loading up the edit window of a page to get the wiki markup. URL anewpage = new URL("https://en.wikipedia.org/w/index.php?ti" + "tle=" + newpageslist[i] + "&action=edit"); BufferedReader pagereader = new BufferedReader( new InputStreamReader(anewpage.openStream())); String inputLine2; String article = ""; while ((inputLine2 = pagereader.readLine()) != null) { article += inputLine2; } //Cleanarticle = the page with the wiki markup, not HTML. String cleanarticle = parseArticle(article); //Use the APISandbox to tag as a unreferenced, assuming it is one. if (isEligibleForTagging(cleanarticle, newpageslist[i]) && noblpcat(newpageslist[i]) && !alreadyedited(newpageslist[i])) { String url = "https://en.wikipedia.org/w/api.php?action=edit&format=" + "json&title=" + cleanarticle + "&summary=Tagging%20short" + "%20article%20as%20stub%20(%5B%5BWP%3ABOT%7CBot%20edit" + "%5D%5D)&bot=&prependtext={{Unreferenced}}&assert=bot&" + "prop=info"; Runtime.getRuntime().exec("rundll32 url.dll,FileProtocolHandler" + url); } //If it is a BLP, tag it with {{BLP unsourced}} instead. if (isEligibleForTagging(cleanarticle, newpageslist[i]) && !noblpcat(newpageslist[i])) { String url = "https://en.wikipedia.org/w/api.php?action=edit&format=" + "json&title=" + newpageslist[i] + "&summary=Tagging%20short" + "%20article%20as%20stub%20(%5B%5BWP%3ABOT%7CBot%20edit" + "%5D%5D)&bot=&prependtext={{BLP unsourced}}&assert=bot&" + "prop=info"; Runtime.getRuntime().exec("rundll32 url.dll,FileProtocolHandler" + url); } } } } /** * Parses out an article title from the HTML in Special:NewPages * * @param s a piece of the HTML of Special:NewPages * @return A properly formatted article name */ public static String parseFromNewPages(String s) { String cleanpagename = ""; //this will be returned //There are 32 characters between the