package jakebot; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; /** * A bot to tag new unreferenced articles on the English Wikipedia. * @author King jakob c 2 */ public class Jakebot { /** * @param args the command line arguments */ public static void main(String[] args) throws MalformedURLException, IOException { //Special:NewPages //Lines 21-30 shamelessly lifted and adapted from //http://stackoverflow.com/questions/6188901/reading-the-content-of-web-page URL newpages = new URL("https://en.wikipedia.org/w/index.php?title=Speci" + "al:NewPages&offset=50&limit=500"); BufferedReader newpagesreader = new BufferedReader( new InputStreamReader(newpages.openStream())); String inputLine; String wholepage = ""; //This will contain the HTML of Special:NewPages while ((inputLine = newpagesreader.readLine()) != null) { wholepage += inputLine; } //The names of the 50th to 550th newest articles String[] newpageslist = new String[500]; //Each
  • tag except for the first 5
  • tags preceeds an article int litags = 0; int newpagesfilled = 0; for (int i = 0; i < wholepage.length() - 4; i++) { if (wholepage.charAt(i) == '<' && wholepage.charAt(i + 1) == 'l' && wholepage.charAt(i + 2) == 'i' && wholepage.charAt(i + 3) == '>') { litags++; if (litags > 5) { //The content between 32 characters after the
  • , and the //next & sign is the name of the article. newpageslist[newpagesfilled] = parseFromNewPages (wholepage.substring(i)); newpagesfilled++; } } } //Checking if each page is a unreferenced and then tagging it //if it is unreferenced. for (int i = 0; i < newpageslist.length; i++) { //For some reason, there are a bunch of "null"s in the newpageslist. //Want to avoid those. if (!newpageslist[i].equals("null")) { //Loading up the edit window of a page to get the wiki markup. URL anewpage = new URL("https://en.wikipedia.org/w/index.php?ti" + "tle=" + newpageslist[i] + "&action=edit"); BufferedReader pagereader = new BufferedReader( new InputStreamReader(anewpage.openStream())); String inputLine2; String article = ""; while ((inputLine2 = pagereader.readLine()) != null) { article += inputLine2; } //Cleanarticle = the page with the wiki markup, not HTML. String cleanarticle = parseArticle(article); //Use the APISandbox to tag as a unreferenced, assuming it is one. if(isEligibleForTagging(cleanarticle, newpageslist[i]) && !cleanarticle.contains("[[Category:Living people]]")){ Process p=Runtime.getRuntime().exec("cmd /c start " + "https://en.wikipedia.org/w/api.php?action=edit&format=" + "json&title=" + cleanarticle+"&summary=Tagging%20short" + "%20article%20as%20stub%20(%5B%5BWP%3ABOT%7CBot%20edit" + "%5D%5D)&bot=&appendtext={{Unreferenced}}&assert=bot&" + "prop=info"); p.destroy(); //and close the window } //If it is a BLP, tag it with {{BLP Unsourced}} instead. if(isEligibleForTagging(cleanarticle, newpageslist[i]) && cleanarticle.contains("[[Category:Living people]]")){ Process p=Runtime.getRuntime().exec("cmd /c start " + "https://en.wikipedia.org/w/api.php?action=edit&format=" + "json&title=" + newpageslist[i]+"&summary=Tagging%20short" + "%20article%20as%20stub%20(%5B%5BWP%3ABOT%7CBot%20edit" + "%5D%5D)&bot=&appendtext={{BLP Unsourced}}&assert=bot&" + "prop=info"); p.destroy(); //and close the window } } } } /** * Parses out an article title from the HTML in Special:NewPages * @param s a piece of the HTML of Special:NewPages * @return A properly formatted article name */ public static String parseFromNewPages(String s) { String cleanpagename = ""; //this will be returned //There are 32 characters between the
  • and the start of the article //title. for (int i = 32; i < s.length(); i++) { //Add characters to cleanpagename until we hit the & sign. if (s.charAt(i) == '&') { return cleanpagename; } else { cleanpagename += s.charAt(i); } } return ""; //this should not be reached } /** * Gets the wiki markup content of an article from the HTML of the edit window * @param article the HTML of the edit window of an article * @return wiki markup of an article */ public static String parseArticle(String article) { String articlecontent = ""; //Begin here. int beginpage = article.indexOf('"' + "wpTextbox1" + '"' + ">"); //Adding the wiki markup while (true) { articlecontent += article.charAt(beginpage+13); beginpage++; if(articlecontent.contains("")){ return articlecontent; } } } /** * Check if the bot should tag the page as unreferenced or not * @param article the wiki markup of an article * @return true if the article should be tagged as unreferenced */ public static boolean isEligibleForTagging(String article, String title) throws IOException{ //If the article lacks a reflist (or variants), tags (or variants), //an {{sfn}} template, external links, further reading, a references section, //or a notes section, it is consdiered unreferenced. If it is a disambiguation //page, already tagged as unreferenced, or has {{nobots}}, it won't //be tagged. article = article.toLowerCase(); if(!article.contains("reflist") && !article.contains("") && !article.contains("") && !article.contains("") && noextlinks(title) && !article.contains("further reading") && nodabs(title) && !article.contains("{{nobots}}") && !article.contains("{{unreferenced")){ return true; } return false; } /** * Uses a Wikipedia API query to search for a dmbox template * @param title article title * @return true if the page is not a disambiguation page * @throws MalformedURLException * @throws IOException */ public static boolean nodabs(String title) throws MalformedURLException, IOException{ URL u = new URL("https://en.wikipedia.org/w/api.php?action=query&prop=" + "templates&title="+title); BufferedReader dabsearch = new BufferedReader( new InputStreamReader(u.openStream())); String inputLine; String templates = ""; while ((inputLine = dabsearch.readLine()) != null) { templates += inputLine; } return !templates.contains("Template:Dmbox"); } /** * Uses a Wikipedia API query to search for external links in an article. * @param title article title * @return true if there are no external links * @throws MalformedURLException * @throws IOException */ public static boolean noextlinks(String title) throws MalformedURLException, IOException{ URL u = new URL("https://en.wikipedia.org/w/api.php?action=query&pr" + "op=extlinks&titles="+title); BufferedReader linksearch = new BufferedReader( new InputStreamReader(u.openStream())); String inputLine; String links = ""; while ((inputLine = linksearch.readLine()) != null) { links += inputLine; } return !links.contains(""); } }