Advertisement
zinc55

Homestuck

Jan 11th, 2013
108
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 13.48 KB | None | 0 0
  1. import java.io.File;
  2. import java.io.FileOutputStream;
  3. import java.io.IOException;
  4. import java.util.Arrays;
  5.  
  6. import org.apache.commons.io.FileUtils;
  7.  
  8. import org.jsoup.*;
  9. import org.jsoup.Connection.Response;
  10. import org.jsoup.nodes.*;
  11. import org.jsoup.select.*;
  12.  
  13. public class main {
  14.  
  15.     public static void main(String[] args) throws IOException {
  16.        
  17.     String username = System.getProperty("user.name");
  18.     String outputFolder = "C:";
  19.        
  20.     //initial setup with user-editable variables
  21.     if (System.getProperty("os.name").startsWith("Windows")) {
  22.         // includes: Windows 2000,  Windows 95, Windows 98, Windows NT, Windows Vista, Windows XP
  23.         outputFolder = ("C:\\Users\\" + username + "\\Homestuck\\");
  24.     } else {
  25.         outputFolder = ("/home/" + username + "/Homestuck/");
  26.         System.out.println("UNIX/Linux system.");
  27.     }
  28.    
  29.     //Override system DNS setting with Google free DNS server
  30.     System.setProperty("sun.net.spi.nameservice.nameservers", "8.8.8.8");
  31.     System.setProperty("sun.net.spi.nameservice.provider.1", "dns,sun");
  32.  
  33.     boolean success = (new File(outputFolder)).mkdirs();
  34.     if (!success) {
  35.         System.out.println("Directory creation failed or directory already exists.");
  36.     }
  37.    
  38.     //String current_id = JOptionPane.showInputDialog("Enter the current comic id"),
  39.     //7415
  40.     String current_id = "007432", imgurl = null, imgurl2 = null, flash_object;
  41.    
  42.     //location of the image link in array
  43.     int pic_id = 8, img_count = 0, first, last = 0;
  44.    
  45.     boolean multimage = false, flash = false, extras_downloaded = false, scratch = false, scratch_notify = false, sbahj = false, cascade=false, dota=false, isInSwf=false;
  46.    
  47.     //the homepage
  48.     String pageurl = "http://www.mspaintadventures.com/?s=6&p=", id = "001901";
  49.    
  50.     // The page CSS is awful, and uses only tables. This grabs the images used to format the page
  51.     while (extras_downloaded == false) {
  52.        
  53.         File q = new File(outputFolder + "v2_blankstrip.gif");
  54.         File w = new File(outputFolder + "v2_blanksquare2.gif");
  55.         File e = new File(outputFolder + "v2_blanksquare3.gif");
  56.         File r = new File(outputFolder + "spacer.gif");
  57.         File t = new File(outputFolder + "header_cascade.gif");
  58.        
  59.         if (q.isFile()) {
  60.             System.out.println("1/4 spacers");
  61.         } else {
  62.         Response resultImageResponse = Jsoup.connect("http://www.mspaintadventures.com/images/v2_blankstrip.gif")
  63.                 .ignoreContentType(true).execute();
  64.         FileOutputStream out = (new FileOutputStream(q));
  65.         out.write(resultImageResponse.bodyAsBytes());           // resultImageResponse.body() is where the image's contents are.
  66.         out.close(); }
  67.        
  68.         if (w.isFile()) {
  69.             System.out.println("2/4 spacers");
  70.         } else {
  71.         Response resultImageResponse = Jsoup.connect("http://www.mspaintadventures.com/images/v2_blanksquare2.gif")
  72.                 .ignoreContentType(true).execute();
  73.         FileOutputStream out = (new FileOutputStream(w));
  74.         out.write(resultImageResponse.bodyAsBytes());           // resultImageResponse.body() is where the image's contents are.
  75.         out.close();
  76.         }
  77.        
  78.         if (e.isFile()) {
  79.             System.out.println("3/4 spacers");
  80.         } else {
  81.         Response resultImageResponse = Jsoup.connect("http://www.mspaintadventures.com/images/v2_blanksquare3.gif")
  82.                 .ignoreContentType(true).execute();
  83.         FileOutputStream out = (new FileOutputStream(e));
  84.         out.write(resultImageResponse.bodyAsBytes());           // resultImageResponse.body() is where the image's contents are.
  85.         out.close(); }
  86.        
  87.         if (r.isFile()) {
  88.             System.out.println("4/4 spacers");
  89.         } else {
  90.         Response resultImageResponse = Jsoup.connect("http://www.mspaintadventures.com/images/spacer.gif")
  91.                 .ignoreContentType(true).execute();
  92.         FileOutputStream out = (new FileOutputStream(r));
  93.         out.write(resultImageResponse.bodyAsBytes());           // resultImageResponse.body() is where the image's contents are.
  94.         out.close(); }
  95.        
  96.         if (t.isFile()) {
  97.             System.out.println("extra heading for Cascade");
  98.         } else {
  99.         Response resultImageResponse = Jsoup.connect("http://www.mspaintadventures.com/images/header_cascade.gif")
  100.                 .ignoreContentType(true).execute();
  101.         FileOutputStream out = (new FileOutputStream(t));
  102.         out.write(resultImageResponse.bodyAsBytes());           // resultImageResponse.body() is where the image's contents are.
  103.         out.close(); }
  104.        
  105.         System.out.println("Finished checking/collecting extra files.");
  106.         extras_downloaded = true;
  107.     }
  108.    
  109.     /* 
  110.      *
  111.      *
  112.      * This is the main download loop.
  113.      *
  114.      *
  115.      */
  116.    
  117.     while (Integer.parseInt(id) < Integer.parseInt(current_id)) {
  118.        
  119.         File f = new File(outputFolder + id + ".html");
  120.         File l = new File(outputFolder + id + ".gif");
  121.         File u = new File(outputFolder + id + "_1.gif");
  122.         File x = new File(outputFolder + id + ".swf");
  123.        
  124.         // Some pages are randomly skipped, and need to be manually added to this array.
  125.         String[] dead_ids = { "004299", "004938", "004988" };
  126.  
  127.         if (Arrays.asList(dead_ids).contains(id)) {
  128.             id = "00" + (Integer.parseInt(id) + 1);
  129.         }
  130.        
  131.         //Avoid re-downloading files that already exist
  132.         //This skips everything
  133.        
  134.         if(!f.isFile()&&(!l.isFile()||!u.isFile()||!x.isFile())) {
  135.         Document doc = null;
  136.         try {
  137.             doc = Jsoup.connect(pageurl+id).get();
  138.         } catch (IOException e) {
  139.             e.printStackTrace();
  140.         }
  141.        
  142.         System.out.println(pageurl+id);
  143.        
  144.         Elements imports = doc.select("[src]");
  145.        
  146.         // MSPA doesn't use divs or any css identifiers, so I put all the src elements into an array
  147.         Object[] linklist;
  148.         linklist = imports.toArray();
  149.  
  150.         //right now it's the eighth element, but this may change.
  151.         //System.out.println((linklist[9]));
  152.        
  153.         /*
  154.          * The website changes styles and decoration during a certain bit. There's an additional image header, meaning the comic id is one more
  155.          */
  156.        
  157.         System.out.println("Comic ID: " + Integer.parseInt(id));
  158.        
  159.         if (Integer.parseInt(id)>=5664 && Integer.parseInt(id)<=5981)
  160.         {
  161.             if (scratch_notify == false) {
  162.                 System.out.println("Post-scratch mode is activated.");
  163.             }
  164.             imgurl = (linklist[pic_id+1]).toString();
  165.             scratch = true;
  166.             scratch_notify = true;
  167.         }
  168.        
  169.         else if (Integer.parseInt(id)==5982)
  170.         {
  171.             //After this, the site gets even more messed up and requires operations for individual pages.
  172.             sbahj = true;
  173.             System.out.println("sweet bro & hella jeff");
  174.             imgurl = (linklist[pic_id-2]).toString();
  175.             System.out.println(linklist[pic_id-2].toString());
  176.  
  177.         }
  178.        
  179.         else if (Integer.parseInt(id)==5985)
  180.         {
  181.             System.out.println("LINKS. IN .SWF FILES.");
  182.              isInSwf=true;
  183.         }
  184.        
  185.         else if (Integer.parseInt(id)==6009)
  186.         {
  187.             cascade = true;
  188.             imgurl = (linklist[pic_id+1]).toString();
  189.         }
  190.        
  191.         else if (Integer.parseInt(id)==6715) {
  192.             dota=true;
  193.         }
  194.        
  195.         else
  196.         {
  197.             imgurl = (linklist[pic_id]).toString();
  198.         }
  199.        
  200.         int first_quotes = imgurl.indexOf("\"");
  201.         int last_quotes = imgurl.lastIndexOf("\"");
  202.        
  203.         imgurl = imgurl.substring((first_quotes+1), (last_quotes-1));
  204.        
  205.         if (imgurl.contains("AC_RunActiveContent.js") && !cascade) {
  206.            
  207.             System.out.println("Downloading flash content.");
  208.             if (scratch==true) {
  209.                 imgurl = linklist[pic_id+2].toString();
  210.             } else {
  211.             imgurl = linklist[pic_id+1].toString();
  212.             }
  213.             //System.out.println(imgurl + "\n" + img_count);
  214.             imgurl = imgurl.replace("<embed src=\"", "");
  215.             first = imgurl.indexOf("http://www.mspaintadventures.com/storyfiles/hs2/");
  216.             last = imgurl.indexOf("\"");
  217.             //System.out.println(imgurl);
  218.             imgurl = imgurl.substring(first,last);
  219.             flash = true;
  220.         }
  221.  
  222.         if ((imgurl.contains("_") && imgurl == "http://www.mspaintadventures.com/storyfiles/hs2/00898_1.gif") || multimage == true) {
  223.            
  224.             System.out.println("Multimage!");
  225.            
  226.             imgurl2 = linklist[(pic_id+1)].toString();
  227.             first_quotes = imgurl2.indexOf("\"");
  228.             last_quotes = imgurl2.lastIndexOf("\"");
  229.             imgurl2 = imgurl2.substring((first_quotes+1), (last_quotes-1));
  230.            
  231.             //Open a URL Stream
  232.             Response resultImageResponse = Jsoup.connect(imgurl2).ignoreContentType(true).execute();
  233.            
  234.             // output here
  235.             FileOutputStream out = (new FileOutputStream(new java.io.File(outputFolder + id + "_2" + ".gif")));
  236.             out.write(resultImageResponse.bodyAsBytes());           // resultImageResponse.body() is where the image's contents are.
  237.             out.close();
  238.            
  239.             multimage = true;
  240.         }
  241.        
  242.         imgurl = imgurl.replace("http://www.mspaintadventures.com/?s=6&p=", pageurl);
  243.         System.out.println(imgurl);
  244.        
  245.         //you need to download the image before the html becuase you need to edit the path in the html to the
  246.         //local path of the image
  247.  
  248.         //Open a URL Stream
  249.         if (!cascade) {
  250.         Response resultImageResponse = Jsoup.connect(imgurl).ignoreContentType(true).execute();
  251.        
  252.         // write the image
  253.        
  254.         if (flash == false) {
  255.         FileOutputStream out = (new FileOutputStream(new java.io.File(outputFolder + id + ".gif")));
  256.         out.write(resultImageResponse.bodyAsBytes());           // resultImageResponse.body() is where the image's contents are.
  257.         out.close();
  258.         }
  259.         else {
  260.         FileOutputStream out = (new FileOutputStream(new java.io.File(outputFolder + id + ".swf")));
  261.         out.write(resultImageResponse.bodyAsBytes());           // resultImageResponse.body() is where the image's contents are.
  262.         out.close();
  263.         }
  264.        
  265.         }
  266.        
  267.         else if (cascade) {
  268.            
  269.         }
  270.        
  271.         imgurl = imgurl.replace(pageurl, "http://www.mspaintadventures.com/");
  272.        
  273.         //now save just the html
  274.        
  275.         String html = doc.html();
  276.        
  277.         //replace web link for image to local link
  278.         html = html.replace(imgurl, outputFolder + id + ".gif");
  279.        
  280.         if (flash==true) {
  281.             first = html.indexOf("<script language=\"javascript\">AC_FL_RunContent = 0;</script>");
  282.             last = html.indexOf("</object>");
  283.            
  284.             flash_object = html.substring(first, last);
  285.            
  286.             html = html.replace(flash_object, ("<a href=\"" + outputFolder + id + ".swf" + "\">Click here for flash content</a>" ));
  287.         }
  288.        
  289.         // replace web link to local link for the next comic
  290.         if (!cascade)
  291.         {
  292.             html = html.replace(("?s=6&amp;p=" + "00" + (Integer.parseInt(id) + 1)), (outputFolder + ("00" + (Integer.parseInt(id) + 1)) + ".html"));
  293.         }
  294.         else if (cascade) {
  295.             html = html.replace(html.substring(0, html.length()), ("<a href=\"" + outputFolder + ("00" + (Integer.parseInt(id) + 1)) + ".html\">Click here for the next page.</a>"));
  296.         }
  297.            
  298.        
  299.         if (multimage == true) {
  300.             html = html.replace(imgurl2, outputFolder + id + "_2" + ".gif");
  301.         }
  302.        
  303.         //remove junk html
  304.        
  305.         if (scratch && !sbahj) {
  306.            
  307.             first = html.indexOf("<!--  FULL LOGO HEADER  --> ");
  308.             last = html.indexOf("</html>");
  309.            
  310.             html = html.replace(html.substring(first, last), "");
  311.            
  312.             first = html.indexOf("<td width=\"100%\"> <span style=\"font-size: 9px; line-height: normal\"><b> ");
  313.             last = html.indexOf("</font></font></b></span>");
  314.            
  315.             html = html.replace("-->", "");
  316.            
  317.             html = html.replace(html.substring(first, last), "");
  318.            
  319.             //System.out.println(html);
  320.        
  321.         } else if (!sbahj && !cascade)
  322.         //this is used for most things
  323.         {
  324.             html = html.substring(0, html.indexOf("<td background=\"images/bannerframe.png\" width=\"950\" height=\"110\" valign=\"middle\">"));
  325.        
  326.             first = html.indexOf("<!-- begin nav -->");
  327.             last = html.indexOf("<!-- end nav -->");
  328.        
  329.             html = html.replace(html.substring(first, last), "");
  330.         }
  331.         else if (sbahj) {
  332.             // the *one* sweet bro & hella jeff page
  333.             html = html.replace(html.substring(html.indexOf("<td width=\"100%\"> <span style=\"font-size: 9px; line-height: normal\"><b>"),
  334.                     html.indexOf("<font color=\"#bbbbbb\"> </font></font>")), "");
  335.            
  336.             html = html.substring(0, html.indexOf("<!--  FULL LOGO HEADER  -->"));
  337.         }
  338.        
  339.         //For this comic, the link to the next comic is inside the .swf object.
  340.         if (Integer.parseInt(id) == 5984) {
  341.             html = html.replace(("<td width=\"100%\" bgcolor=\"#EEEEEE\">"),
  342.                     ("<td width=\"100%\" bgcolor=\"#EEEEEE\">" + "<a href=\""+ outputFolder + "00" + (Integer.parseInt(id)+1) + ".html" + "\">[S] Attempt rare and highly dangerous 5x SHOWDOWN COMBO.</a>" ));
  343.         }
  344.        
  345.         //fix pesterlogs
  346.         //System.out.println(id);
  347.         if (html.contains("<button")) {
  348.         first = html.indexOf("<button type=\"button\" class=\"button\" onmouseover");
  349.        
  350.        
  351.         if (html.contains("Hide Pesterlog")) {
  352.         last = html.indexOf("Hide Pesterlog</button>");
  353.         }
  354.         else if (html.contains("Hide Spritelog")) {
  355.             last = html.indexOf("Hide Spritelog</button>");
  356.         }
  357.         else if (html.contains("Hide Recap log")) {
  358.             last = html.indexOf("Hide Recap log</button>");
  359.         }
  360.         else if (html.contains("Hide Journalog")) {
  361.             last = html.indexOf("Hide Journalog</button>");
  362.         }
  363.         else if (html.contains("Hide Serious Business")) {
  364.             last = html.indexOf("Hide Serious Business</button>");
  365.         }
  366.         else if (html.contains("Hide Dialoglog")) {
  367.             last = html.indexOf("Hide Dialoglog</button>");
  368.         }
  369.        
  370.         html = html.replace(html.substring(first, last+14), "");
  371.         System.out.println("Button code detected and fixed.");
  372.         }
  373.        
  374.         //fix links to image spacers
  375.         html = html.replace("images/", "");
  376.        
  377.         FileUtils.writeStringToFile(f, html);
  378.            
  379.         id = "00" + (Integer.parseInt(id) + 1);
  380.        
  381.         img_count = (img_count+1);
  382.        
  383.         //avoid program crashes due to frequency of requests
  384.         try {
  385.             Thread.sleep(10);
  386.         } catch (InterruptedException e) {
  387.             // TODO Auto-generated catch block
  388.             e.printStackTrace();
  389.         }
  390.        
  391.         multimage = false;
  392.         flash = false;
  393.         sbahj = false;
  394.         cascade = false;
  395.         isInSwf=false;
  396.     }
  397.        
  398.     else {
  399.         id = "00" + (Integer.parseInt(id) + 1);
  400.     }
  401.  
  402.     }
  403.    
  404.     System.out.println("Done. Hussie's webmaster breathes a sigh of relief.");
  405.    
  406.     }
  407.  
  408. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement