View difference between Paste ID: <a href="/miRVsakD">miRVsakD</a> and <a href="/kx1qCYC8">kx1qCYC8</a>

Jakebot code below...
1		Jakebot code below...
2		package jakebot;
3
4		import java.io.BufferedReader;
5		import java.io.IOException;
6		import java.io.InputStreamReader;
7		import java.net.MalformedURLException;
8		import java.net.URL;
9
10		/**
11		* A bot to tag new unreferenced articles on the English Wikipedia.
12		* @author King jakob c 2
13		*/
14		public class Jakebot {
15
16		/**
17		* @param args the command line arguments
18		*/
19		public static void main(String[] args) throws MalformedURLException, IOException {
20		//Special:NewPages
21		//Lines 21-30 shamelessly lifted and adapted from
22		//http://stackoverflow.com/questions/6188901/reading-the-content-of-web-page
23		URL newpages = new URL("https://en.wikipedia.org/w/index.php?title=Speci"
24		+ "al:NewPages&offset=&limit=500");
25		BufferedReader newpagesreader = new BufferedReader(
26		new InputStreamReader(newpages.openStream()));
27		String inputLine;
28		String wholepage = ""; //This will contain the HTML of Special:NewPages
29
30		while ((inputLine = newpagesreader.readLine()) != null) {
31		wholepage += inputLine;
32		}
33
34		//The names of the 500 newest articles
35		String[] newpageslist = new String[500];
36
37		//Each <li> tag except for the first 5 <li> tags preceeds an article
38		int litags = 0;
39		int newpagesfilled = 0;
40		for (int i = 0; i < wholepage.length() - 4; i++) {
41		if (wholepage.charAt(i) == '<' && wholepage.charAt(i + 1) == 'l'
42		&& wholepage.charAt(i + 2) == 'i' && wholepage.charAt(i + 3) == '>') {
43		litags++;
44
45		if (litags > 5) {
46		//The content between 32 characters after the <li>, and the
47		//next & sign is the name of the article.
48		newpageslist[newpagesfilled] = parseFromNewPages
49		(wholepage.substring(i));
50		newpagesfilled++;
51		}
52		}
53
54		}
55
56	-	//Checking if each page is a stub and then tagging it if it is a stub.
56	+	//Checking if each page is unreferenced and then tagging it if so.
57		for (int i = 0; i < newpageslist.length; i++) {
58		//For some reason, there are a bunch of "null"s in the newpageslist.
59		//Want to avoid those.
60		if (!newpageslist[i].equals("null")) {
61		//Loading up the edit window of a page to get the wiki markup.
62		URL anewpage = new URL("https://en.wikipedia.org/w/index.php?ti"
63		+ "tle=" + newpageslist[i] + "&action=edit");
64		BufferedReader pagereader = new BufferedReader(
65		new InputStreamReader(anewpage.openStream()));
66		String inputLine2;
67		String article = "";
68
69		while ((inputLine2 = pagereader.readLine()) != null) {
70		article += inputLine2;
71		}
72
73		//Cleanarticle = the page with the wiki markup, not HTML.
74		String cleanarticle = parseArticle(article);
75
76	-	//Use the APISandbox to tag as a stub, assuming it is one.
76	+	//Use the APISandbox to tag as unreferenced, assuming it is.
77		if(isEligibleForTagging(cleanarticle)){
78		Process p=Runtime.getRuntime().exec("cmd /c start " +
79		"https://en.wikipedia.org/w/api.php?action=edit&format="
80	-	+ "json&title=" + cleanarticle+"&summary=Tagging%20short"
80	+	+ "json&title=" + cleanarticle+"&summary=Tagging unref"
81	-	+ "%20article%20as%20stub%20(%5B%5BWP%3ABOT%7CBot%20edit"
81	+	+ "erenced article(%5B%5BWP%3ABOT%7CBot%20edit"
82		+ "%5D%5D)&bot=&appendtext={{Unreferenced}}&assert=bot&"
83		+ "prop=info");
84		p.destroy(); //and close the window
85		}
86		}
87		}
88		}
89
90		/**
91		* Parses out an article title from the HTML in Special:NewPages
92		* @param s a piece of the HTML of Special:NewPages
93		* @return A properly formatted article name
94		*/
95		public static String parseFromNewPages(String s) {
96		String cleanpagename = ""; //this will be returned
97		//There are 32 characters between the <li> and the start of the article
98		//title.
99		for (int i = 32; i < s.length(); i++) {
100		//Add characters to cleanpagename until we hit the & sign.
101		if (s.charAt(i) == '&') {
102		return cleanpagename;
103		} else {
104		cleanpagename += s.charAt(i);
105		}
106		}
107		return ""; //this should not be reached
108		}
109
110		/**
111		* Gets the wiki markup content of an article from the HTML of the edit window
112		* @param article the HTML of the edit window of an article
113		* @return wiki markup of an article
114		*/
115		public static String parseArticle(String article) {
116		String articlecontent = "";
117		//Begin here.
118		int beginpage = article.indexOf('"' + "wpTextbox1" + '"' + ">");
119
120		//Adding the wiki markup
121		while (true) {
122		articlecontent += article.charAt(beginpage+13);
123		beginpage++;
124		if(articlecontent.contains("</textarea>")){
125		return articlecontent;
126		}
127		}
128		}
129
130		/**
131	-	* Check if the bot should tag the page as a stub or not
131	+	* Check if the bot should tag the page as unreferenced or not
132		* @param article the wiki markup of an article
133	-	* @return true if the article should be tagged as a stub
133	+	* @return true if the article should be tagged as unreferenced
134		*/
135		public static boolean isEligibleForTagging(String article){
136	-	//If there are under 1500 characters of wikimarkup, there are under 1500
136	+	//If there is no reflist, external links, or further reading, return true.
137	-	//characters of readable prose, which is sometimes used as a cutoff point
137	+	//Return false if it is already tagged is a unreferenced, if it is a disambiguation page,
138	-	//for stubs/start-class articles. This method will also return false if
138	+
139	-	//it is already tagged is a stub, if it is a list or a disambiguation page,
139	+
140		if(!(article.contains("reflist") && article.contains("<ref>")) &&
141		!article.contains("==referneces==") && !article.contains("http")
142		&& !article.contains("further reading") && !article.contains("{{db")
143		&& !article.contains("{{unreferenced")
144		&& !article.contains("{{bots}}") &&
145		!article.contains("disambiguation")){
146		return true;
147		}
148		return false;
149		}
150		}