View difference between Paste ID: <a href="/KE48mJFV">KE48mJFV</a> and <a href="/miRVsakD">miRVsakD</a>

Jakebot code below...
1	-	Jakebot code below...
1	+
2
3		import java.io.BufferedReader;
4		import java.io.IOException;
5		import java.io.InputStreamReader;
6		import java.net.MalformedURLException;
7		import java.net.URL;
8
9		/**
10		* A bot to tag new unreferenced articles on the English Wikipedia.
11		* @author King jakob c 2
12		*/
13		public class Jakebot {
14
15		/**
16		* @param args the command line arguments
17		*/
18		public static void main(String[] args) throws MalformedURLException, IOException {
19		//Special:NewPages
20		//Lines 21-30 shamelessly lifted and adapted from
21		//http://stackoverflow.com/questions/6188901/reading-the-content-of-web-page
22		URL newpages = new URL("https://en.wikipedia.org/w/index.php?title=Speci"
23		+ "al:NewPages&offset=50&limit=500");
24	-	+ "al:NewPages&offset=&limit=500");
24	+
25		new InputStreamReader(newpages.openStream()));
26		String inputLine;
27		String wholepage = ""; //This will contain the HTML of Special:NewPages
28
29		while ((inputLine = newpagesreader.readLine()) != null) {
30		wholepage += inputLine;
31		}
32
33		//The names of the 50th to 550th newest articles
34	-	//The names of the 500 newest articles
34	+
35
36		//Each <li> tag except for the first 5 <li> tags preceeds an article
37		int litags = 0;
38		int newpagesfilled = 0;
39		for (int i = 0; i < wholepage.length() - 4; i++) {
40		if (wholepage.charAt(i) == '<' && wholepage.charAt(i + 1) == 'l'
41		&& wholepage.charAt(i + 2) == 'i' && wholepage.charAt(i + 3) == '>') {
42		litags++;
43
44		if (litags > 5) {
45		//The content between 32 characters after the <li>, and the
46		//next & sign is the name of the article.
47		newpageslist[newpagesfilled] = parseFromNewPages
48		(wholepage.substring(i));
49		newpagesfilled++;
50		}
51		}
52
53		}
54
55		//Checking if each page is a unreferenced and then tagging it
56	-	//Checking if each page is unreferenced and then tagging it if so.
56	+	//if it is unreferenced.
57		for (int i = 0; i < newpageslist.length; i++) {
58		//For some reason, there are a bunch of "null"s in the newpageslist.
59		//Want to avoid those.
60		if (!newpageslist[i].equals("null")) {
61		//Loading up the edit window of a page to get the wiki markup.
62		URL anewpage = new URL("https://en.wikipedia.org/w/index.php?ti"
63		+ "tle=" + newpageslist[i] + "&action=edit");
64		BufferedReader pagereader = new BufferedReader(
65		new InputStreamReader(anewpage.openStream()));
66		String inputLine2;
67		String article = "";
68
69		while ((inputLine2 = pagereader.readLine()) != null) {
70		article += inputLine2;
71		}
72
73		//Cleanarticle = the page with the wiki markup, not HTML.
74		String cleanarticle = parseArticle(article);
75
76	-	//Use the APISandbox to tag as unreferenced, assuming it is.
76	+	//Use the APISandbox to tag as a unreferenced, assuming it is one.
77	-	if(isEligibleForTagging(cleanarticle)){
77	+	if(isEligibleForTagging(cleanarticle, newpageslist[i]) && !cleanarticle.contains("[[Category:Living people]]")){
78		Process p=Runtime.getRuntime().exec("cmd /c start " +
79		"https://en.wikipedia.org/w/api.php?action=edit&format="
80	-	+ "json&title=" + cleanarticle+"&summary=Tagging unref"
80	+	+ "json&title=" + cleanarticle+"&summary=Tagging%20short"
81	-	+ "erenced article(%5B%5BWP%3ABOT%7CBot%20edit"
81	+	+ "%20article%20as%20stub%20(%5B%5BWP%3ABOT%7CBot%20edit"
82		+ "%5D%5D)&bot=&appendtext={{Unreferenced}}&assert=bot&"
83		+ "prop=info");
84		p.destroy(); //and close the window
85		}
86		//If it is a BLP, tag it with {{BLP Unsourced}} instead.
87		if(isEligibleForTagging(cleanarticle, newpageslist[i]) && cleanarticle.contains("[[Category:Living people]]")){
88		Process p=Runtime.getRuntime().exec("cmd /c start " +
89		"https://en.wikipedia.org/w/api.php?action=edit&format="
90		+ "json&title=" + newpageslist[i]+"&summary=Tagging%20short"
91		+ "%20article%20as%20stub%20(%5B%5BWP%3ABOT%7CBot%20edit"
92		+ "%5D%5D)&bot=&appendtext={{BLP Unsourced}}&assert=bot&"
93		+ "prop=info");
94		p.destroy(); //and close the window
95		}
96		}
97		}
98		}
99
100		/**
101		* Parses out an article title from the HTML in Special:NewPages
102		* @param s a piece of the HTML of Special:NewPages
103		* @return A properly formatted article name
104		*/
105		public static String parseFromNewPages(String s) {
106		String cleanpagename = ""; //this will be returned
107		//There are 32 characters between the <li> and the start of the article
108		//title.
109		for (int i = 32; i < s.length(); i++) {
110		//Add characters to cleanpagename until we hit the & sign.
111		if (s.charAt(i) == '&') {
112		return cleanpagename;
113		} else {
114		cleanpagename += s.charAt(i);
115		}
116		}
117		return ""; //this should not be reached
118		}
119
120		/**
121		* Gets the wiki markup content of an article from the HTML of the edit window
122		* @param article the HTML of the edit window of an article
123		* @return wiki markup of an article
124		*/
125		public static String parseArticle(String article) {
126		String articlecontent = "";
127		//Begin here.
128		int beginpage = article.indexOf('"' + "wpTextbox1" + '"' + ">");
129
130		//Adding the wiki markup
131		while (true) {
132		articlecontent += article.charAt(beginpage+13);
133	-	* @return true if the article should be tagged as unreferenced
133	+
134		if(articlecontent.contains("</textarea>")){
135	-	public static boolean isEligibleForTagging(String article){
135	+
136	-	//If there is no reflist, external links, or further reading, return true.
136	+
137	-	//Return false if it is already tagged is a unreferenced, if it is a disambiguation page,
137	+
138	-	//or if it is up for speedy deletion.
138	+
139
140	-	if(!(article.contains("reflist") && article.contains("<ref>")) &&
140	+
141	-	!article.contains("==referneces==") && !article.contains("http")
141	+
142	-	&& !article.contains("further reading") && !article.contains("{{db")
142	+
143	-	&& !article.contains("{{unreferenced")
143	+	* @return true if the article should be tagged as unreferenced
144	-	&& !article.contains("{{bots}}") &&
144	+
145	-	!article.contains("disambiguation")){
145	+	public static boolean isEligibleForTagging(String article, String title) throws IOException{
146		//If the article lacks a reflist (or variants), <ref> tags (or variants),
147		//an {{sfn}} template, external links, further reading, a references section,
148		//or a notes section, it is consdiered unreferenced. If it is a disambiguation
149		//page, already tagged as unreferenced, or has {{nobots}}, it won't
150		//be tagged.
151		article = article.toLowerCase();
152		if(!article.contains("reflist") &&
153		!article.contains("<ref>") && !article.contains("<ref name") &&
154		!article.contains("{{sfn") &&
155		!article.contains("==referneces==") && !article.contains("== references ==") &&
156		!article.contains("==notes==") && !article.contains("== notes ==") &&
157		!article.contains("<references/>") && !article.contains("<references />") &&
158		noextlinks(title)
159		&& !article.contains("further reading") && nodabs(title)
160		&& !article.contains("{{nobots}}")
161		&& !article.contains("{{unreferenced")){
162		return true;
163		}
164		return false;
165		}
166
167		/**
168		* Uses a Wikipedia API query to search for a dmbox template
169		* @param title article title
170		* @return true if the page is not a disambiguation page
171		* @throws MalformedURLException
172		* @throws IOException
173		*/
174		public static boolean nodabs(String title) throws MalformedURLException, IOException{
175		URL u = new URL("https://en.wikipedia.org/w/api.php?action=query&prop="
176		+ "templates&title="+title);
177		BufferedReader dabsearch = new BufferedReader(
178		new InputStreamReader(u.openStream()));
179		String inputLine;
180		String templates = "";
181
182		while ((inputLine = dabsearch.readLine()) != null) {
183		templates += inputLine;
184		}
185		return !templates.contains("Template:Dmbox");
186		}
187
188		/**
189		* Uses a Wikipedia API query to search for external links in an article.
190		* @param title article title
191		* @return true if there are no external links
192		* @throws MalformedURLException
193		* @throws IOException
194		*/
195		public static boolean noextlinks(String title) throws MalformedURLException, IOException{
196		URL u = new URL("https://en.wikipedia.org/w/api.php?action=query&pr"
197		+ "op=extlinks&titles="+title);
198		BufferedReader linksearch = new BufferedReader(
199		new InputStreamReader(u.openStream()));
200		String inputLine;
201		String links = "";
202
203		while ((inputLine = linksearch.readLine()) != null) {
204		links += inputLine;
205		}
206		return !links.contains("<el xml:space=\"preserve\">");
207		}
208
209		}