Advertisement
zinc55

Untitled

Jan 11th, 2013
111
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 12.76 KB | None | 0 0
  1. import java.io.File;
  2. import java.io.FileOutputStream;
  3. import java.io.IOException;
  4. import java.util.Arrays;
  5.  
  6. import org.apache.commons.io.FileUtils;
  7.  
  8. import org.jsoup.*;
  9. import org.jsoup.Connection.Response;
  10. import org.jsoup.nodes.*;
  11. import org.jsoup.select.*;
  12.  
  13. public class homestuckDownloader {
  14.  
  15. public static void main(String[] args) throws IOException {
  16.  
  17. String username = System.getProperty("user.name");
  18.  
  19. //initial setup with user-editable variables
  20. String outputFolder = ("C:\\Users\\" + username + "\\Homestuck\\");
  21.  
  22. //Override system DNS setting with Google free DNS server
  23. System.setProperty("sun.net.spi.nameservice.nameservers", "8.8.8.8");
  24. System.setProperty("sun.net.spi.nameservice.provider.1", "dns,sun");
  25.  
  26. boolean success = (new File(outputFolder)).mkdirs();
  27. if (!success) {
  28. System.out.println("Directory creation failed or directory already exists.");
  29. }
  30.  
  31. //String current_id = JOptionPane.showInputDialog("Enter the current comic id"),
  32. //7415
  33. String current_id = "007432", imgurl = null, imgurl2 = null, flash_object;
  34.  
  35. //location of the image link in array
  36. int pic_id = 8, img_count = 0, first, last = 0;
  37.  
  38. boolean multimage = false, flash = false, extras_downloaded = false,
  39. scratch = false, scratch_notify = false, sbahj = false, cascade=false, dota=false;
  40.  
  41. //the homepage
  42. String pageurl = "http://www.mspaintadventures.com/?s=6&p=", id = "001901";
  43.  
  44. // The page CSS is awful, and uses only tables. This grabs the images used to format the page
  45. while (extras_downloaded == false) {
  46.  
  47. File q = new File(outputFolder + "v2_blankstrip.gif");
  48. File w = new File(outputFolder + "v2_blanksquare2.gif");
  49. File e = new File(outputFolder + "v2_blanksquare3.gif");
  50. File r = new File(outputFolder + "spacer.gif");
  51. File t = new File(outputFolder + "header_cascade.gif");
  52.  
  53. if (q.isFile()) {
  54. System.out.println("1/4 spacers");
  55. } else {
  56. Response resultImageResponse = Jsoup.connect("http://www.mspaintadventures.com/images/v2_blankstrip.gif")
  57. .ignoreContentType(true).execute();
  58. FileOutputStream out = (new FileOutputStream(q));
  59. out.write(resultImageResponse.bodyAsBytes()); // resultImageResponse.body() is where the image's contents are.
  60. out.close(); }
  61.  
  62. if (w.isFile()) {
  63. System.out.println("2/4 spacers");
  64. } else {
  65. Response resultImageResponse = Jsoup.connect("http://www.mspaintadventures.com/images/v2_blanksquare2.gif")
  66. .ignoreContentType(true).execute();
  67. FileOutputStream out = (new FileOutputStream(w));
  68. out.write(resultImageResponse.bodyAsBytes()); // resultImageResponse.body() is where the image's contents are.
  69. out.close();
  70. }
  71.  
  72. if (e.isFile()) {
  73. System.out.println("3/4 spacers");
  74. } else {
  75. Response resultImageResponse = Jsoup.connect("http://www.mspaintadventures.com/images/v2_blanksquare3.gif")
  76. .ignoreContentType(true).execute();
  77. FileOutputStream out = (new FileOutputStream(e));
  78. out.write(resultImageResponse.bodyAsBytes()); // resultImageResponse.body() is where the image's contents are.
  79. out.close(); }
  80.  
  81. if (r.isFile()) {
  82. System.out.println("4/4 spacers");
  83. } else {
  84. Response resultImageResponse = Jsoup.connect("http://www.mspaintadventures.com/images/spacer.gif")
  85. .ignoreContentType(true).execute();
  86. FileOutputStream out = (new FileOutputStream(r));
  87. out.write(resultImageResponse.bodyAsBytes()); // resultImageResponse.body() is where the image's contents are.
  88. out.close(); }
  89.  
  90. if (t.isFile()) {
  91. System.out.println("extra heading for Cascade");
  92. } else {
  93. Response resultImageResponse = Jsoup.connect("http://www.mspaintadventures.com/images/header_cascade.gif")
  94. .ignoreContentType(true).execute();
  95. FileOutputStream out = (new FileOutputStream(t));
  96. out.write(resultImageResponse.bodyAsBytes()); // resultImageResponse.body() is where the image's contents are.
  97. out.close(); }
  98.  
  99. System.out.println("Finished checking/collecting extra files.");
  100. extras_downloaded = true;
  101. }
  102.  
  103. /*
  104. *
  105. *
  106. * This is the main download loop.
  107. *
  108. *
  109. */
  110.  
  111. while (Integer.parseInt(id) < Integer.parseInt(current_id)) {
  112.  
  113. File f = new File(outputFolder + id + ".html");
  114.  
  115. // Skip over files that already exist
  116.  
  117. while (f.isFile()) {
  118. id = "00" + (Integer.parseInt(id) + 1);
  119. f = new File(outputFolder + id + ".html");
  120. img_count = (img_count+1);
  121. }
  122.  
  123. // Some pages are randomly skipped, and need to be manually added to this array.
  124. String[] dead_ids = { "004299", "004938", "004988" };
  125.  
  126. if (Arrays.asList(dead_ids).contains(id)) {
  127. id = "00" + (Integer.parseInt(id) + 1);
  128. }
  129.  
  130. Document doc = null;
  131. try {
  132. doc = Jsoup.connect(pageurl+id).get();
  133. } catch (IOException e) {
  134. e.printStackTrace();
  135. }
  136.  
  137. System.out.println(pageurl+id);
  138.  
  139. Elements imports = doc.select("[src]");
  140.  
  141. // MSPA doesn't use divs or any css identifiers, so I put all the src elements into an array
  142. Object[] linklist;
  143. linklist = imports.toArray();
  144.  
  145. //right now it's the eighth element, but this may change. diagnostic
  146. //System.out.println((linklist[9]));
  147.  
  148. /*
  149. * The website changes styles and decoration during a certain bit. There's an additional image header, meaning the comic id is one more
  150. */
  151.  
  152. //System.out.println(Integer.parseInt(id));
  153. if (Integer.parseInt(id)>=5664 && Integer.parseInt(id)<=5981)
  154. {
  155. if (scratch_notify == false) {
  156. System.out.println("Post-scratch mode is activated.");
  157. }
  158. imgurl = (linklist[pic_id+1]).toString();
  159. scratch = true;
  160. scratch_notify = true;
  161. }
  162. else if (Integer.parseInt(id)==5982) {
  163. //After this, the site gets even more messed up and requires operations for individual pages.
  164. sbahj = true;
  165. System.out.println("sweet bro & hella jeff");
  166. imgurl = (linklist[pic_id-2]).toString();
  167. System.out.println(linklist[pic_id-2].toString());
  168.  
  169. }
  170. else if (Integer.parseInt(id)==6009){
  171. cascade = true;
  172. imgurl = (linklist[pic_id+1]).toString();
  173. }
  174. else if (Integer.parseInt(id)==6715) {
  175. dota = true;
  176. }
  177. else
  178. {
  179. imgurl = (linklist[pic_id]).toString();
  180. }
  181.  
  182. int first_quotes = imgurl.indexOf("\"");
  183. int last_quotes = imgurl.lastIndexOf("\"");
  184.  
  185. imgurl = imgurl.substring((first_quotes+1), (last_quotes-1));
  186.  
  187. if (imgurl.contains("AC_RunActiveContent.js") && !cascade) {
  188.  
  189. System.out.println("Downloading flash content.");
  190. if (scratch==true) {
  191. imgurl = linklist[pic_id+2].toString();
  192. } else {
  193. imgurl = linklist[pic_id+1].toString();
  194. }
  195. //System.out.println(imgurl + "\n" + img_count);
  196. imgurl = imgurl.replace("<embed src=\"", "");
  197. first = imgurl.indexOf("http://www.mspaintadventures.com/storyfiles/hs2/");
  198. last = imgurl.indexOf("\"");
  199. //System.out.println(imgurl);
  200. imgurl = imgurl.substring(first,last);
  201. flash = true;
  202. }
  203.  
  204. if ((imgurl.contains("_") && imgurl == "http://www.mspaintadventures.com/storyfiles/hs2/00898_1.gif") || multimage == true) {
  205.  
  206. System.out.println("Multimage!");
  207.  
  208. imgurl2 = linklist[(pic_id+1)].toString();
  209. first_quotes = imgurl2.indexOf("\"");
  210. last_quotes = imgurl2.lastIndexOf("\"");
  211. imgurl2 = imgurl2.substring((first_quotes+1), (last_quotes-1));
  212.  
  213. //Open a URL Stream
  214. Response resultImageResponse = Jsoup.connect(imgurl2).ignoreContentType(true).execute();
  215.  
  216. // output here
  217. FileOutputStream out = (new FileOutputStream(new java.io.File(outputFolder + id + "_2" + ".gif")));
  218. out.write(resultImageResponse.bodyAsBytes()); // resultImageResponse.body() is where the image's contents are.
  219. out.close();
  220.  
  221. multimage = true;
  222. }
  223.  
  224. imgurl = imgurl.replace("http://www.mspaintadventures.com/", pageurl);
  225. System.out.println(imgurl);
  226.  
  227. //you need to download the image before the html becuase you need to edit the path in the html to the
  228. //local path of the image
  229.  
  230. //Open a URL Stream
  231. if (!cascade) {
  232. Response resultImageResponse = Jsoup.connect(imgurl).ignoreContentType(true).execute();
  233.  
  234. // write the image
  235.  
  236. if (flash == false) {
  237. FileOutputStream out = (new FileOutputStream(new java.io.File(outputFolder + id + ".gif")));
  238. out.write(resultImageResponse.bodyAsBytes()); // resultImageResponse.body() is where the image's contents are.
  239. out.close();
  240. }
  241. else {
  242. FileOutputStream out = (new FileOutputStream(new java.io.File(outputFolder + id + ".swf")));
  243. out.write(resultImageResponse.bodyAsBytes()); // resultImageResponse.body() is where the image's contents are.
  244. out.close();
  245. }
  246. }
  247.  
  248. else if (cascade) {
  249. // Ideally, this will download Cascade via torrent. Maglink is:
  250. // magnet:?xt=urn:btih:4chfzscsp6bpdo3zw45ebeyrdiafbj5u&dn=cascade_download.zip&xl=54872032&fc=1
  251. }
  252.  
  253. imgurl = imgurl.replace(pageurl, "http://www.mspaintadventures.com/");
  254.  
  255. //now save just the html
  256.  
  257. String html = doc.html();
  258.  
  259. //replace web link for image to local link
  260. html = html.replace(imgurl, outputFolder + id + ".gif");
  261.  
  262. if (flash==true) {
  263. first = html.indexOf("<script language=\"javascript\">AC_FL_RunContent = 0;</script>");
  264. last = html.indexOf("</object>");
  265.  
  266. flash_object = html.substring(first, last);
  267.  
  268. html = html.replace(flash_object, ("<a href=\"" + outputFolder + id + ".swf" + "\">Click here for flash content</a>" ));
  269. }
  270.  
  271. // replace web link to local link for the next comic
  272. if (!cascade)
  273. {
  274. html = html.replace(("?s=6&amp;p=" + "00" + (Integer.parseInt(id) + 1)), (outputFolder + ("00" + (Integer.parseInt(id) + 1)) + ".html"));
  275. }
  276. else if (cascade) {
  277. html = html.replace(html.substring(0, html.length()), ("<a href=\"" + outputFolder + ("00" + (Integer.parseInt(id) + 1)) + ".html\">Click here for the next page.</a>"));
  278. }
  279.  
  280.  
  281. if (multimage == true) {
  282. html = html.replace(imgurl2, outputFolder + id + "_2" + ".gif");
  283. }
  284.  
  285. //remove junk html
  286. if (scratch && !sbahj) {
  287.  
  288. first = html.indexOf("<td width=\"100%\"> <span style=\"font-size: 9px; line-height: normal\"><b>");
  289. last = html.indexOf("</font></font></b></span>");
  290.  
  291. html = html.replace(html.substring(first, last), "");
  292. html = html.substring(0, html.indexOf("<!-- FULL LOGO HEADER --> "));
  293.  
  294.  
  295. first = html.indexOf("<!-- end comic content -->");
  296. last = html.indexOf("<!-- end footer -->");
  297.  
  298. html = html.substring(first, last);
  299.  
  300. } else if (!sbahj && !cascade)
  301. //this is used for most things
  302. {
  303. html = html.substring(0, html.indexOf("<td background=\"images/bannerframe.png\" width=\"950\" height=\"110\" valign=\"middle\">"));
  304.  
  305. first = html.indexOf("<!-- begin nav -->");
  306. last = html.indexOf("<!-- end nav -->");
  307.  
  308. html = html.replace(html.substring(first, last), "");
  309. }
  310. else if (sbahj) {
  311. // the *one* sweet bro & hella jeff page
  312. html = html.replace(html.substring(html.indexOf("<td width=\"100%\"> <span style=\"font-size: 9px; line-height: normal\"><b>"),
  313. html.indexOf("<font color=\"#bbbbbb\"> </font></font>")), "");
  314.  
  315. html = html.substring(0, html.indexOf("<!-- FULL LOGO HEADER -->"));
  316. }
  317.  
  318.  
  319. //For this comic, the link to the next comic is inside the .swf object.
  320. if (Integer.parseInt(id) == 5984) {
  321. html = html.replace(("<td width=\"100%\" bgcolor=\"#EEEEEE\">"),
  322. ("<td width=\"100%\" bgcolor=\"#EEEEEE\">" + "<a href=\""+ outputFolder + "00" + (Integer.parseInt(id)+1) + ".html" + "\">[S] Attempt rare and highly dangerous 5x SHOWDOWN COMBO.</a>" ));
  323. }
  324.  
  325. //fix pesterlogs
  326. //System.out.println(id);
  327. if (html.contains("<button")) {
  328. first = html.indexOf("<button type=\"button\" class=\"button\" onmouseover");
  329.  
  330.  
  331. if (html.contains("Hide Pesterlog")) {
  332. last = html.indexOf("Hide Pesterlog</button>");
  333. }
  334. else if (html.contains("Hide Spritelog")) {
  335. last = html.indexOf("Hide Spritelog</button>");
  336. }
  337. else if (html.contains("Hide Recap log")) {
  338. last = html.indexOf("Hide Recap log</button>");
  339. }
  340. else if (html.contains("Hide Journalog")) {
  341. last = html.indexOf("Hide Journalog</button>");
  342. }
  343. else if (html.contains("Hide Serious Business")) {
  344. last = html.indexOf("Hide Serious Business</button>");
  345. }
  346. else if (html.contains("Hide Dialoglog")) {
  347. last = html.indexOf("Hide Dialoglog</button>");
  348. }
  349.  
  350. html = html.replace(html.substring(first, last+14), "");
  351. System.out.println("Button code detected and fixed.");
  352. }
  353.  
  354. //fix links to image spacers
  355. html = html.replace("images/", "");
  356.  
  357. FileUtils.writeStringToFile(f, html);
  358.  
  359. id = "00" + (Integer.parseInt(id) + 1);
  360.  
  361. img_count = (img_count+1);
  362.  
  363. multimage = false;
  364. flash = false;
  365. sbahj = false;
  366. cascade = false;
  367. }
  368.  
  369. System.out.println("Done");
  370.  
  371. }
  372.  
  373. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement