import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
/**
* DailyProgrammer Challenge #110 Intermediate - Creepy Crawlies
*
* Gets the top 100 stories from /r/nosleep, prints them out and saves them to
* stories.txt
*
* @author Quentin/HolocaustHank
*
*/
public class DP110I {
public static void main(String[] args) {
new DP110I();
}
public DP110I() {
printStories();
}
void printStories() {
try {
ArrayList<String> topLinks = getTopLinks();
ArrayList<String> stories = new ArrayList<String>();
for (int i = 0; i < topLinks.size(); i++) {
URL url = new URL(topLinks.get(i));
HttpURLConnection con = (HttpURLConnection) url.openConnection();
BufferedReader br = new BufferedReader(new InputStreamReader(con.getInputStream()));
String line;
boolean started = false, quote = false;
while ((line = br.readLine()) != null) {
line = line.replaceAll("<strong>", "").replaceAll("</strong>", "").replaceAll("'", "'").replaceAll(""", "\"")
.replaceAll("&", "&").replaceAll("’", "'").replaceAll("<em>", "").replaceAll("</em>", "");
if (started) {
if (line.startsWith("<blockquote>"))
quote = true;
if (line.startsWith("</blockquote>"))
quote = false;
if (line.startsWith("<p>")) {
String s = "";
if (quote)
s += ">";
line = line.replaceAll("<p>", "").replaceAll("</p>", "");
s += line;
stories.add(s);
}
if (line.startsWith("</div>"))
started = false;
}
if (line.contains("a class=\"title")) {
started = true;
stories.add("=== " + getTitle(line) + " ===");
line = line.substring(line.indexOf("class=\"md") + 14);
stories.add(line.replaceAll("<p>", "").replaceAll("</p>", ""));
}
}
br.close();
stories.add("");
System.out.println("Got story " + (i + 1));
}
File file = new File("stories.txt");
BufferedWriter bw = new BufferedWriter(new FileWriter(file));
for (int i = 0; i < stories.size(); i++) {
System.out.println(stories.get(i));
bw.write(stories.get(i) + "\n");
}
bw.close();
} catch (Exception e) {
e.printStackTrace();
}
}
String getTitle(String s) {
String result = s.substring(s.indexOf("a class=\"title"));
result = result.substring(result.indexOf(">") + 1, result.indexOf("<"));
return result.replaceAll(""", "\"").replaceAll("&", "&");
}
ArrayList<String> getTopLinks() throws IOException {
ArrayList<String> links = new ArrayList<String>();
String format = "http://www.reddit.com/r/nosleep?count=%s&after=%s", lastLink = "";
for (int i = 0; i < 4; i++) {
URL url = new URL(String.format(format, i * 25, lastLink));
HttpURLConnection con = (HttpURLConnection) url.openConnection();
BufferedReader br = new BufferedReader(new InputStreamReader(con.getInputStream()));
String line;
while ((line = br.readLine()) != null)
if (line.contains("<!--IE6sux-->")) {
String[] all = line.split("<div class=\"clearleft\"><!--IE6sux--></div></div><div class=\"clearleft\"><!--IE6sux--></div>");
for (int j = 0; j < 25; j++) {
String s = all[j];
s = s.substring(s.indexOf("<a class=\"title"));
s = s.substring(s.indexOf("href") + 6);
s = s.substring(0, s.indexOf("\""));
links.add("http://www.reddit.com" + s);
if (j == 24) {
s = s.substring(s.indexOf("comments/") + 9);
s = s.substring(0, s.indexOf("/"));
lastLink = "t3_" + s;
}
}
}
br.close();
}
return links;
}
}