thatguyandrew1992

CHDownloader3

Apr 27th, 2012
61
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 4.83 KB | None | 0 0
  1. //This program is used to essentially download every Cyanide and Happiness comic
  2. //By entering the first link of the comic's page, the program will save that image to your computer
  3. //then go to the previous page and download that picture
  4. //It will continue until you stop the program
  5. //v3.0
  6. //v3.0 Changes - Ability to stop program when duplicates found
  7. import java.io.File;
  8. import java.io.FileOutputStream;
  9. import java.io.FileReader;
  10. import java.io.IOException;
  11. import java.net.URL;
  12. import java.nio.channels.Channels;
  13. import java.nio.channels.ReadableByteChannel;
  14. import java.util.Scanner;
  15.  
  16.  
  17. public class CHDownloader3 {
  18.  
  19.    
  20.     @SuppressWarnings("finally")
  21.     public static void main(String[] args) throws IOException {
  22.         System.out.println("About CHDownloader:");
  23.         System.out.println("This program will essentially download every comic from Cyanide and Happiness");
  24.         System.out.println("It will start at your link, then move to PREVIOUS pages");
  25.         System.out.println("Enter the Cyanide and Happiness comic number to start with (It's found in the URL. Ex 2776)");
  26.         Scanner userInput = new Scanner(System.in);
  27.         int theURL = userInput.nextInt();
  28.         System.out.println("Stop the program when a duplicate is found? Yes or No");
  29.         String repeatQuit = userInput.next();
  30.         System.out.println("Press Ctrl + C to stop the program");
  31.  
  32.        
  33.         URL link = new URL("http://www.explosm.net/comics/"+theURL+"/");//Created the first URL using the link above
  34.         boolean stop = false;
  35.         while( stop == false){
  36.             if( theURL == 14){//The comic ends at comic 15. This if statement will stop the program
  37.                 stop = true;
  38.                 System.out.println("All images downloaded");
  39.             }
  40.             try{
  41.             //This section downloads the HTML page
  42.             ReadableByteChannel rbc = Channels.newChannel(link.openStream());//Gets the html page
  43.             FileOutputStream fos = new FileOutputStream("page.html");//Creates the output name of the html page to be saved to the computer
  44.             fos.getChannel().transferFrom(rbc, 0, 1 << 24);
  45.             Scanner sc = new Scanner(new FileReader("page.html"));//Takes the downloaded HTML page and sends it to a scanner
  46.  
  47.  
  48.            
  49.            
  50.                                                                                    
  51.            
  52.             //This section converts the html page to the string
  53.             String contents = "";
  54.             while(sc.hasNextLine() || sc.hasNext()){
  55.                 contents = contents + sc.nextLine() + "\n"; //Contents is the html page as a string!
  56.             }
  57.             sc.close();//Closes the scanner file
  58.  
  59.            
  60.    
  61.             //This section finds the image to download
  62.             Scanner sc2 = new Scanner(contents);//Sends the html to a scanner
  63.             String theImage = "";
  64.             int counter2 = 0;//I think that the image url is on line 2 so I need a counter
  65.             while((sc2.hasNext() || sc2.hasNextLine()) && (counter2 != 1)){
  66.             theImage = sc2.findWithinHorizon("[h][t][t][p][:][/][/][w]?[w]?[w]?[.]?explosm.net/db/files/Comics/.*?[.][jJpPgG][pPnNiI][eE]?[gGfF]", 0);//This finds the image's url and saves it to a string
  67.             counter2 += 1;//Adds one to my counter
  68.             }
  69.             //Ends finding image
  70.            
  71.            
  72.            
  73.             //This section determines the file extension
  74.                 String imageType = "";
  75.                 if(theImage.matches(".*[.][jJ][pP][gG]")){
  76.                     imageType = ".jpg";
  77.                 }
  78.                 if(theImage.matches(".*[.][jJ][pP][eE][gG]")){
  79.                     imageType = ".jpeg";
  80.                 }
  81.                 if(theImage.matches(".*[.][pP][nN][gG]")){
  82.                     imageType = ".png";
  83.                 }
  84.                 if(theImage.matches(".*[.][gG][iI][Ff]")){
  85.                     imageType = ".gif";
  86.                 }
  87.            
  88.            
  89.            
  90.             //This section saves the image itself
  91.             URL link2 = new URL(theImage);//A new URL is created with the image's url that was found
  92.             String chLinkFinal = "CH-" + theURL + imageType;//The string that will be used to name the file
  93.             boolean exists = (new File(chLinkFinal)).exists();
  94.             if(exists){
  95.                 if(repeatQuit.equals("Yes") || repeatQuit.equals("yes")){ //Quits program when duplicate is found
  96.                     System.out.println("Duplicate found. Program Stopped.");
  97.                     System.exit(0);
  98.                 }
  99.                 else{
  100.                     System.out.println(chLinkFinal + " already exists. Skipping.");
  101.                 }
  102.             }
  103.             else{
  104.                 ReadableByteChannel rbc2 = Channels.newChannel(link2.openStream());//Gets the image
  105.                 FileOutputStream fos2 = new FileOutputStream(chLinkFinal);//The output of the file name
  106.                 fos2.getChannel().transferFrom(rbc2, 0, 1 << 24);
  107.                 System.out.println("Image Saved: " + chLinkFinal);//Prints the name of the file that is saved
  108.             }
  109.             theURL -= 1;//Decreases the link my 1 to capture the next image
  110.             link = new URL("http://www.explosm.net/comics/" + theURL + "/");//Sets up the next page to be read
  111.             }
  112.            
  113.             //Some comics using the decreasing number system don't exist. This catch loop skips them
  114.             catch(Exception e){
  115.                 theURL -=1;
  116.                 link = new URL("http://www.explosm.net/comics/" + theURL + "/");
  117.             }
  118.             //This finally block allows the program to continue past the catch block
  119.             finally{
  120.                 continue;
  121.             }
  122.         }
  123.     }
  124.  
  125. }
Add Comment
Please, Sign In to add comment