Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.io.File;
- import java.io.FileOutputStream;
- import java.io.IOException;
- import java.util.Arrays;
- import org.apache.commons.io.FileUtils;
- import org.jsoup.*;
- import org.jsoup.Connection.Response;
- import org.jsoup.nodes.*;
- import org.jsoup.select.*;
- public class main {
- public static void main(String[] args) throws IOException {
- String username = System.getProperty("user.name");
- String outputFolder = "C:";
- //initial setup with user-editable variables
- if (System.getProperty("os.name").startsWith("Windows")) {
- // includes: Windows 2000, Windows 95, Windows 98, Windows NT, Windows Vista, Windows XP
- outputFolder = ("C:\\Users\\" + username + "\\Homestuck\\");
- } else {
- outputFolder = ("/home/" + username + "/Homestuck/");
- System.out.println("UNIX/Linux system.");
- }
- //Override system DNS setting with Google free DNS server
- System.setProperty("sun.net.spi.nameservice.nameservers", "8.8.8.8");
- System.setProperty("sun.net.spi.nameservice.provider.1", "dns,sun");
- boolean success = (new File(outputFolder)).mkdirs();
- if (!success) {
- System.out.println("Directory creation failed or directory already exists.");
- }
- //String current_id = JOptionPane.showInputDialog("Enter the current comic id"),
- //7415
- String current_id = "007432", imgurl = null, imgurl2 = null, flash_object;
- //location of the image link in array
- int pic_id = 8, img_count = 0, first, last = 0;
- boolean multimage = false, flash = false, extras_downloaded = false, scratch = false, scratch_notify = false, sbahj = false, cascade=false, dota=false, isInSwf=false;
- //the homepage
- String pageurl = "http://www.mspaintadventures.com/?s=6&p=", id = "001901";
- // The page CSS is awful, and uses only tables. This grabs the images used to format the page
- while (extras_downloaded == false) {
- File q = new File(outputFolder + "v2_blankstrip.gif");
- File w = new File(outputFolder + "v2_blanksquare2.gif");
- File e = new File(outputFolder + "v2_blanksquare3.gif");
- File r = new File(outputFolder + "spacer.gif");
- File t = new File(outputFolder + "header_cascade.gif");
- if (q.isFile()) {
- System.out.println("1/4 spacers");
- } else {
- Response resultImageResponse = Jsoup.connect("http://www.mspaintadventures.com/images/v2_blankstrip.gif")
- .ignoreContentType(true).execute();
- FileOutputStream out = (new FileOutputStream(q));
- out.write(resultImageResponse.bodyAsBytes()); // resultImageResponse.body() is where the image's contents are.
- out.close(); }
- if (w.isFile()) {
- System.out.println("2/4 spacers");
- } else {
- Response resultImageResponse = Jsoup.connect("http://www.mspaintadventures.com/images/v2_blanksquare2.gif")
- .ignoreContentType(true).execute();
- FileOutputStream out = (new FileOutputStream(w));
- out.write(resultImageResponse.bodyAsBytes()); // resultImageResponse.body() is where the image's contents are.
- out.close();
- }
- if (e.isFile()) {
- System.out.println("3/4 spacers");
- } else {
- Response resultImageResponse = Jsoup.connect("http://www.mspaintadventures.com/images/v2_blanksquare3.gif")
- .ignoreContentType(true).execute();
- FileOutputStream out = (new FileOutputStream(e));
- out.write(resultImageResponse.bodyAsBytes()); // resultImageResponse.body() is where the image's contents are.
- out.close(); }
- if (r.isFile()) {
- System.out.println("4/4 spacers");
- } else {
- Response resultImageResponse = Jsoup.connect("http://www.mspaintadventures.com/images/spacer.gif")
- .ignoreContentType(true).execute();
- FileOutputStream out = (new FileOutputStream(r));
- out.write(resultImageResponse.bodyAsBytes()); // resultImageResponse.body() is where the image's contents are.
- out.close(); }
- if (t.isFile()) {
- System.out.println("extra heading for Cascade");
- } else {
- Response resultImageResponse = Jsoup.connect("http://www.mspaintadventures.com/images/header_cascade.gif")
- .ignoreContentType(true).execute();
- FileOutputStream out = (new FileOutputStream(t));
- out.write(resultImageResponse.bodyAsBytes()); // resultImageResponse.body() is where the image's contents are.
- out.close(); }
- System.out.println("Finished checking/collecting extra files.");
- extras_downloaded = true;
- }
- /*
- *
- *
- * This is the main download loop.
- *
- *
- */
- while (Integer.parseInt(id) < Integer.parseInt(current_id)) {
- File f = new File(outputFolder + id + ".html");
- File l = new File(outputFolder + id + ".gif");
- File u = new File(outputFolder + id + "_1.gif");
- File x = new File(outputFolder + id + ".swf");
- // Some pages are randomly skipped, and need to be manually added to this array.
- String[] dead_ids = { "004299", "004938", "004988" };
- if (Arrays.asList(dead_ids).contains(id)) {
- id = "00" + (Integer.parseInt(id) + 1);
- }
- //Avoid re-downloading files that already exist
- //This skips everything
- if(!f.isFile()&&(!l.isFile()||!u.isFile()||!x.isFile())) {
- Document doc = null;
- try {
- doc = Jsoup.connect(pageurl+id).get();
- } catch (IOException e) {
- e.printStackTrace();
- }
- System.out.println(pageurl+id);
- Elements imports = doc.select("[src]");
- // MSPA doesn't use divs or any css identifiers, so I put all the src elements into an array
- Object[] linklist;
- linklist = imports.toArray();
- //right now it's the eighth element, but this may change.
- //System.out.println((linklist[9]));
- /*
- * The website changes styles and decoration during a certain bit. There's an additional image header, meaning the comic id is one more
- */
- System.out.println("Comic ID: " + Integer.parseInt(id));
- if (Integer.parseInt(id)>=5664 && Integer.parseInt(id)<=5981)
- {
- if (scratch_notify == false) {
- System.out.println("Post-scratch mode is activated.");
- }
- imgurl = (linklist[pic_id+1]).toString();
- scratch = true;
- scratch_notify = true;
- }
- else if (Integer.parseInt(id)==5982)
- {
- //After this, the site gets even more messed up and requires operations for individual pages.
- sbahj = true;
- System.out.println("sweet bro & hella jeff");
- imgurl = (linklist[pic_id-2]).toString();
- System.out.println(linklist[pic_id-2].toString());
- }
- else if (Integer.parseInt(id)==5985)
- {
- System.out.println("LINKS. IN .SWF FILES.");
- isInSwf=true;
- }
- else if (Integer.parseInt(id)==6009)
- {
- cascade = true;
- imgurl = (linklist[pic_id+1]).toString();
- }
- else if (Integer.parseInt(id)==6715) {
- dota=true;
- }
- else
- {
- imgurl = (linklist[pic_id]).toString();
- }
- int first_quotes = imgurl.indexOf("\"");
- int last_quotes = imgurl.lastIndexOf("\"");
- imgurl = imgurl.substring((first_quotes+1), (last_quotes-1));
- if (imgurl.contains("AC_RunActiveContent.js") && !cascade) {
- System.out.println("Downloading flash content.");
- if (scratch==true) {
- imgurl = linklist[pic_id+2].toString();
- } else {
- imgurl = linklist[pic_id+1].toString();
- }
- //System.out.println(imgurl + "\n" + img_count);
- imgurl = imgurl.replace("<embed src=\"", "");
- first = imgurl.indexOf("http://www.mspaintadventures.com/storyfiles/hs2/");
- last = imgurl.indexOf("\"");
- //System.out.println(imgurl);
- imgurl = imgurl.substring(first,last);
- flash = true;
- }
- if ((imgurl.contains("_") && imgurl == "http://www.mspaintadventures.com/storyfiles/hs2/00898_1.gif") || multimage == true) {
- System.out.println("Multimage!");
- imgurl2 = linklist[(pic_id+1)].toString();
- first_quotes = imgurl2.indexOf("\"");
- last_quotes = imgurl2.lastIndexOf("\"");
- imgurl2 = imgurl2.substring((first_quotes+1), (last_quotes-1));
- //Open a URL Stream
- Response resultImageResponse = Jsoup.connect(imgurl2).ignoreContentType(true).execute();
- // output here
- FileOutputStream out = (new FileOutputStream(new java.io.File(outputFolder + id + "_2" + ".gif")));
- out.write(resultImageResponse.bodyAsBytes()); // resultImageResponse.body() is where the image's contents are.
- out.close();
- multimage = true;
- }
- imgurl = imgurl.replace("http://www.mspaintadventures.com/?s=6&p=", pageurl);
- System.out.println(imgurl);
- //you need to download the image before the html becuase you need to edit the path in the html to the
- //local path of the image
- //Open a URL Stream
- if (!cascade) {
- Response resultImageResponse = Jsoup.connect(imgurl).ignoreContentType(true).execute();
- // write the image
- if (flash == false) {
- FileOutputStream out = (new FileOutputStream(new java.io.File(outputFolder + id + ".gif")));
- out.write(resultImageResponse.bodyAsBytes()); // resultImageResponse.body() is where the image's contents are.
- out.close();
- }
- else {
- FileOutputStream out = (new FileOutputStream(new java.io.File(outputFolder + id + ".swf")));
- out.write(resultImageResponse.bodyAsBytes()); // resultImageResponse.body() is where the image's contents are.
- out.close();
- }
- }
- else if (cascade) {
- }
- imgurl = imgurl.replace(pageurl, "http://www.mspaintadventures.com/");
- //now save just the html
- String html = doc.html();
- //replace web link for image to local link
- html = html.replace(imgurl, outputFolder + id + ".gif");
- if (flash==true) {
- first = html.indexOf("<script language=\"javascript\">AC_FL_RunContent = 0;</script>");
- last = html.indexOf("</object>");
- flash_object = html.substring(first, last);
- html = html.replace(flash_object, ("<a href=\"" + outputFolder + id + ".swf" + "\">Click here for flash content</a>" ));
- }
- // replace web link to local link for the next comic
- if (!cascade)
- {
- html = html.replace(("?s=6&p=" + "00" + (Integer.parseInt(id) + 1)), (outputFolder + ("00" + (Integer.parseInt(id) + 1)) + ".html"));
- }
- else if (cascade) {
- html = html.replace(html.substring(0, html.length()), ("<a href=\"" + outputFolder + ("00" + (Integer.parseInt(id) + 1)) + ".html\">Click here for the next page.</a>"));
- }
- if (multimage == true) {
- html = html.replace(imgurl2, outputFolder + id + "_2" + ".gif");
- }
- //remove junk html
- if (scratch && !sbahj) {
- first = html.indexOf("<!-- FULL LOGO HEADER --> ");
- last = html.indexOf("</html>");
- html = html.replace(html.substring(first, last), "");
- first = html.indexOf("<td width=\"100%\"> <span style=\"font-size: 9px; line-height: normal\"><b> ");
- last = html.indexOf("</font></font></b></span>");
- html = html.replace("-->", "");
- html = html.replace(html.substring(first, last), "");
- //System.out.println(html);
- } else if (!sbahj && !cascade)
- //this is used for most things
- {
- html = html.substring(0, html.indexOf("<td background=\"images/bannerframe.png\" width=\"950\" height=\"110\" valign=\"middle\">"));
- first = html.indexOf("<!-- begin nav -->");
- last = html.indexOf("<!-- end nav -->");
- html = html.replace(html.substring(first, last), "");
- }
- else if (sbahj) {
- // the *one* sweet bro & hella jeff page
- html = html.replace(html.substring(html.indexOf("<td width=\"100%\"> <span style=\"font-size: 9px; line-height: normal\"><b>"),
- html.indexOf("<font color=\"#bbbbbb\"> </font></font>")), "");
- html = html.substring(0, html.indexOf("<!-- FULL LOGO HEADER -->"));
- }
- //For this comic, the link to the next comic is inside the .swf object.
- if (Integer.parseInt(id) == 5984) {
- html = html.replace(("<td width=\"100%\" bgcolor=\"#EEEEEE\">"),
- ("<td width=\"100%\" bgcolor=\"#EEEEEE\">" + "<a href=\""+ outputFolder + "00" + (Integer.parseInt(id)+1) + ".html" + "\">[S] Attempt rare and highly dangerous 5x SHOWDOWN COMBO.</a>" ));
- }
- //fix pesterlogs
- //System.out.println(id);
- if (html.contains("<button")) {
- first = html.indexOf("<button type=\"button\" class=\"button\" onmouseover");
- if (html.contains("Hide Pesterlog")) {
- last = html.indexOf("Hide Pesterlog</button>");
- }
- else if (html.contains("Hide Spritelog")) {
- last = html.indexOf("Hide Spritelog</button>");
- }
- else if (html.contains("Hide Recap log")) {
- last = html.indexOf("Hide Recap log</button>");
- }
- else if (html.contains("Hide Journalog")) {
- last = html.indexOf("Hide Journalog</button>");
- }
- else if (html.contains("Hide Serious Business")) {
- last = html.indexOf("Hide Serious Business</button>");
- }
- else if (html.contains("Hide Dialoglog")) {
- last = html.indexOf("Hide Dialoglog</button>");
- }
- html = html.replace(html.substring(first, last+14), "");
- System.out.println("Button code detected and fixed.");
- }
- //fix links to image spacers
- html = html.replace("images/", "");
- FileUtils.writeStringToFile(f, html);
- id = "00" + (Integer.parseInt(id) + 1);
- img_count = (img_count+1);
- //avoid program crashes due to frequency of requests
- try {
- Thread.sleep(10);
- } catch (InterruptedException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- multimage = false;
- flash = false;
- sbahj = false;
- cascade = false;
- isInSwf=false;
- }
- else {
- id = "00" + (Integer.parseInt(id) + 1);
- }
- }
- System.out.println("Done. Hussie's webmaster breathes a sigh of relief.");
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement