Advertisement
iAmTheSonic

MapleArchive Drop Fetcher - DropFetcher.java

Nov 9th, 2012
429
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 12.69 KB | None | 0 0
  1. package tools.dropfetcher;
  2.  
  3. import java.io.BufferedWriter;
  4. import java.io.File;
  5. import java.io.FileWriter;
  6. import java.io.IOException;
  7. import java.io.InputStream;
  8. import java.io.PrintWriter;
  9. import java.net.MalformedURLException;
  10. import java.net.URL;
  11. import java.text.SimpleDateFormat;
  12. import java.util.ArrayList;
  13. import java.util.Calendar;
  14. import java.util.Iterator;
  15. import java.util.Scanner;
  16.  
  17.  
  18. /**
  19.  * @author Simon
  20.  * @author Sonic
  21.  */
  22. public class DropFetcher {
  23.    
  24.     /**
  25.      * @param args
  26.      *        The command line arguments.
  27.      */
  28.     private static ArrayList<DropEntry> drop_entries = new ArrayList<DropEntry>();
  29.     private static final String BASE_URL = "http://maplearchive.com/";
  30.     private static final String MONSTER_PAGE = "mob-wp.php";
  31.     private static int NumberOfPages = 157; // As for v120, but it's probably going to change so idk
  32.     private static int CurrentPage = 1; // Hurr durr
  33.     public static final int VERSION = 120;
  34.     private static int MonstersDone = 0; // Total monsters done
  35.     private static int MonstersWithDrops = 0; // Monsters that have drop
  36.     private static int MonstersWithoutDrops = 0; // Monsters that have no drops
  37.     private static int Errors = 0; // How many times I failed with this script
  38.    
  39.     /**
  40.      * Crawls the mob data page with the given URL, fetching the drop data.
  41.      *
  42.      * @param url
  43.      *        The URL of the page to crawl.
  44.      */
  45.     private static void crawlPage(final String url) { // Recursive method bitches
  46.     try {
  47.         URL page = new URL(url);
  48.         InputStream is = page.openStream();
  49.         Scanner s = new Scanner(is);
  50.         String temp_data = "";
  51.         while (s.hasNext()) {
  52.         temp_data += s.nextLine() + "\n";
  53.         }
  54.         s.close();
  55.         is.close();
  56.         while (temp_data.contains("class=\"mobImage\"")) {
  57.         try {
  58.             String monster_section;
  59.             if (!temp_data.contains("<div class=\"entityBox\">")) {
  60.             monster_section = getStringBetween(temp_data, "class=\"mobImage\"", "<div class=\"pagination\">"); // Who cares, it works
  61.             } else {
  62.             monster_section = getStringBetween(temp_data, "class=\"mobImage\"", "<div class=\"entityBox\">");
  63.             }
  64.             parseMonsterSection(monster_section);
  65.             temp_data = trimUntil(temp_data, "<div class=\"entityBox\">");
  66.             if (temp_data == null) {
  67.             break;
  68.             }
  69.         } catch (StringIndexOutOfBoundsException ex) {
  70.             System.out.println("Whoops! Something went wrong. Skipping this one...");
  71.             Errors++;
  72.             break;
  73.         }
  74.         }
  75.         System.out.println("Finished crawling page " + CurrentPage + ".");
  76.         if (CurrentPage % 10 == 0) {
  77.         System.out.println();
  78.         System.out.println("Status so far:");
  79.         System.out.println("Monsters: " + MonstersDone + " || Monsters with drops: " + MonstersWithDrops + " || Monsters without drops: " + MonstersWithoutDrops + " || Items: " + drop_entries.size() + " || Errors: " + Errors);
  80.         System.out.println();
  81.         }
  82.     } catch (MalformedURLException mue) {
  83.         System.out.println("Error parsing URL: " + url);
  84.         Errors++;
  85.         return;
  86.     } catch (IOException ioe) {
  87.         System.out.println("Error reading from URL: " + ioe.getLocalizedMessage());
  88.         Errors++;
  89.         return;
  90.     }
  91.     }
  92.    
  93.     /**
  94.      * Builds an SQL file to insert the fetched data to a database.
  95.      */
  96.     public static void dumpQuery() {
  97.     String filename = "MapleArchive-Drops-v" + VERSION + ".sql";
  98.     final SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
  99.     try {
  100.         File output = new File(filename);
  101.         BufferedWriter bw = new BufferedWriter(new FileWriter(output));
  102.         PrintWriter pw = new PrintWriter(bw);
  103.         StringBuilder sb = new StringBuilder();
  104.         pw.write("/*\r\n * This file was built using the MapleArchive drop fetcher script by Sonic/Simon. \r\n * Creation date: " + sdf.format(Calendar.getInstance().getTime()) + "\r\n * MapleArchive Database version: GlobalMS v" + VERSION + "\r\n * \r\n * -Drop chances are estimated as they cannot be obtained!-\r\n */\r\n\r\n");
  105.         pw.write("TRUNCATE TABLE `drop_data`;\r\n");
  106.         pw.write("INSERT INTO `drop_data` (`dropperid`, `itemid`, `minimum_quantity`, `maximum_quantity`, `questid`, `chance`) VALUES ");
  107.         for (Iterator<DropEntry> i = drop_entries.iterator(); i.hasNext();) {
  108.         DropEntry de = i.next();
  109.         pw.write(de.getQuerySegment());
  110.         if (i.hasNext()) {
  111.             pw.write(", \r\n");
  112.         }
  113.         }
  114.         pw.write(sb.toString());
  115.         pw.close();
  116.         bw.close();
  117.     } catch (IOException ioe) {
  118.         System.out.println("Error writing to file: " + ioe.getLocalizedMessage());
  119.     }
  120.     }
  121.    
  122.     /**
  123.      * Returns the string lying between the two specified strings.
  124.      *
  125.      * @param line
  126.      *        The string to parse
  127.      * @param start
  128.      *        The first string
  129.      * @param end
  130.      *        The last string
  131.      * @return The string between the two specified strings
  132.      */
  133.     public static String getStringBetween(final String line, final String start, final String end) {
  134.     int start_offset = line.indexOf(start) + start.length();
  135.     return line.substring(start_offset, line.substring(start_offset).indexOf(end) + start_offset);
  136.     }
  137.    
  138.     public static void main(final String[] args) {
  139.     long startTime = System.currentTimeMillis();
  140.     System.out.println("MapleArchive Drop Data Fetcher\nOriginal script by Simon --- Modified version by Sonic");
  141.     System.out.println("---------------------------------------");
  142.     System.out.println("Here we go!");
  143.     System.out.println();
  144.     for (CurrentPage = 1; CurrentPage <= NumberOfPages; CurrentPage++) {
  145.         System.out.println("Starting to crawl page " + CurrentPage + " out of " + NumberOfPages + "...");
  146.         crawlPage(BASE_URL + MONSTER_PAGE + "?page=" + CurrentPage);
  147.     }
  148.     long dataEndTime = System.currentTimeMillis();
  149.    
  150.     System.out.println("Finished fetching the drop data.");
  151.     System.out.println();
  152.     System.out.println("Building the SQL file...");
  153.     long sqlStartTime = System.currentTimeMillis();
  154.     dumpQuery();
  155.     long sqlEndTime = System.currentTimeMillis();
  156.     System.out.println("Finished building the SQL file.");
  157.     System.out.println("------------------------");
  158.     System.out.println("Process finished!");
  159.     System.out.println("Total monsters parsed: " + MonstersDone + " || Total monsters with drops: " + MonstersWithDrops + "|| Total monsters without drops: " + MonstersWithoutDrops + " || Total items: " + drop_entries.size() + " || Total errors: " + Errors);
  160.     System.out.println("Data reading time: " + ((dataEndTime - startTime) / 1000) + " seconds || SQL building time: " + ((sqlEndTime - sqlStartTime) / 1000) + " seconds");
  161.     System.out.println("Total time: " + ((System.currentTimeMillis() - startTime) / 1000) + " seconds.");
  162.     System.out.println("------------------------");
  163.     System.out.println("The SQL script can be found in this directory under the name \"" + "MapleArchive-Drops-v" + VERSION + ".sql" + "\".");
  164.     System.out.println("Thanks for using this tool!");
  165.     System.out.println("~Sonic");
  166.     }
  167.    
  168.     /**
  169.      * Parses the item section from a given string and adds the data to the
  170.      * {@link drop_entries} variable.
  171.      *
  172.      * @param html_data
  173.      *        The string to parse.
  174.      * @param MonsterId
  175.      *        The monster ID the drop data belongs to.
  176.      */
  177.     private static void parseItemSection(final String html_data, final int MonsterId, final boolean isBoss) {
  178.     String temp_data = html_data;
  179.     while (temp_data.contains("AJAXLoad('Item', 'id=")) {
  180.         int ItemId = Integer.parseInt(getStringBetween(temp_data, "AJAXLoad('Item', 'id=", "');\">"));
  181.         drop_entries.add(new DropEntry(ItemId, MonsterId, isBoss, VERSION));
  182.         if (temp_data.contains("javascript:return ")) {
  183.         temp_data = trimUntil(temp_data, "javascript:return ");
  184.         } else { // Abusive shit and stuff
  185.         return;
  186.         }
  187.     }
  188.     }
  189.    
  190.     /**
  191.      * Parses a monster section from a given data string.
  192.      *
  193.      * @param html_data
  194.      *        The string containing the HTML data to parse from.
  195.      */
  196.     private static void parseMonsterSection(final String html_data) {
  197.     try {
  198.         MonstersDone++;
  199.         int MonsterId = Integer.parseInt(getStringBetween(html_data, "alt=\"Mob:", "\" />")); // Will it blend? ;-)
  200.         boolean isBoss = false;
  201.         String BossString = getStringBetween(html_data, "<tr><td class=\"statName\"><b>Boss:</b></td><td class=\"statValue\">", "</td></tr>");
  202.         if (BossString.equalsIgnoreCase("No")) {
  203.         isBoss = false;
  204.         } else if (BossString.equalsIgnoreCase("Yes")) {
  205.         isBoss = true;
  206.         }
  207.        
  208.         if (getStringBetween(html_data, "<td class=\"tdDrops\" ", "</td>").contains("<ul><li>None/Unknown</li></ul>")) {
  209.         //System.out.println("Whoops!");
  210.         MonstersWithoutDrops++;
  211.         return;
  212.         }
  213.         // If I missed any section, you are more than welcome to report that to me :D
  214.        
  215.         // Parse Equipment drops
  216.         if (html_data.contains(">Equipment</a>")) {
  217.         parseItemSection(getStringBetween(html_data, ">Equipment</a>", "</ul></li>"), MonsterId, isBoss);
  218.         }
  219.         // Parse Potion drops
  220.         if (html_data.contains(">Potion</a>")) {
  221.         parseItemSection(getStringBetween(html_data, ">Potion</a>", "</ul></li>"), MonsterId, isBoss);
  222.         }
  223.         // Parse Food drops
  224.         if (html_data.contains(">Food</a>")) {
  225.         parseItemSection(getStringBetween(html_data, ">Food</a>", "</ul></li>"), MonsterId, isBoss);
  226.         }
  227.         // Parse Arrow drops
  228.         if (html_data.contains(">Arrows</a>")) {
  229.         parseItemSection(getStringBetween(html_data, ">Arrows</a>", "</ul></li>"), MonsterId, isBoss);
  230.         }
  231.         //Parse Bullet drops
  232.         if (html_data.contains(">Bullet</a>")) {
  233.         parseItemSection(getStringBetween(html_data, ">Bullet</a>", "</ul></li>"), MonsterId, isBoss);
  234.         }
  235.         //Parse Throwing Star drops
  236.         if (html_data.contains(">Throwing Star</a>")) {
  237.         parseItemSection(getStringBetween(html_data, ">Throwing Star</a>", "</ul></li>"), MonsterId, isBoss);
  238.         }
  239.         //Parse Status Removal Potions drops (Anidote, tonic, etc.)
  240.         if (html_data.contains(">Status Removal Potion</a>")) {
  241.         parseItemSection(getStringBetween(html_data, ">Status Removal Potion</a>", "</ul></li>"), MonsterId, isBoss);
  242.         }
  243.         //Parse Mastery Book drops
  244.         if (html_data.contains(">Mastery Book</a>")) {
  245.         parseItemSection(getStringBetween(html_data, ">Mastery Book</a>", "</ul></li>"), MonsterId, isBoss);
  246.         }
  247.         //Parse Skill Book drops
  248.         if (html_data.contains(">Skill Book</a>")) {
  249.         parseItemSection(getStringBetween(html_data, ">Skill Book</a>", "</ul></li>"), MonsterId, isBoss);
  250.         }
  251.         // Parse Misc. Box (The hell is that? :O)
  252.         if (html_data.contains(">Misc. Box</a>")) {
  253.         parseItemSection(getStringBetween(html_data, ">Misc. Box</a>", "</ul></li>"), MonsterId, isBoss);
  254.         }
  255.         //Parse Summoning Sack drops
  256.         if (html_data.contains(">Summoning Sack</a>")) {
  257.         parseItemSection(getStringBetween(html_data, ">Summoning Sack</a>", "</ul></li>"), MonsterId, isBoss);
  258.         }
  259.         // Parse Familiar drops
  260.         if (html_data.contains(">Familiar</a>")) {
  261.         parseItemSection(getStringBetween(html_data, ">Familiar</a>", "</ul></li>"), MonsterId, isBoss);
  262.         }
  263.         //Parse Item Pot drops
  264.         if (html_data.contains(">Item Pot</a>")) {
  265.         parseItemSection(getStringBetween(html_data, ">Item Pot</a>", "</ul></li>"), MonsterId, isBoss);
  266.         }
  267.         //Parse Jett Core Modifier drops
  268.         if (html_data.contains(">Jett Core Modifier</a>")) {
  269.         parseItemSection(getStringBetween(html_data, ">Jett Core Modifier</a>", "</ul></li>"), MonsterId, isBoss);
  270.         }
  271.         //Parse Recipe drops
  272.         if (html_data.contains(">Recipe</a>")) {
  273.         parseItemSection(getStringBetween(html_data, ">Recipe</a>", "</ul></li>"), MonsterId, isBoss);
  274.         }
  275.         // Parse Setup drops
  276.         if (html_data.contains(">Setup</a>")) {
  277.         parseItemSection(getStringBetween(html_data, ">Setup</a>", "</ul></li>"), MonsterId, isBoss);
  278.         }
  279.         // Parse ETC drops
  280.         if (html_data.contains(">Etc</a>")) {
  281.         parseItemSection(getStringBetween(html_data, ">Etc</a>", "</ul></li>"), MonsterId, isBoss);
  282.         }
  283.     } catch (StringIndexOutOfBoundsException ex) {
  284.         System.out.println("Uh oh! Something went wrong. Skipping this one...");
  285.         Errors++;
  286.         ex.printStackTrace();
  287.     }
  288.    
  289.     MonstersWithDrops++;
  290.     }
  291.    
  292.     /**
  293.      * Trims a string until the first occurrence of the provided substring,
  294.      * including the substring itself.
  295.      *
  296.      * @param line
  297.      *        The string to trim.
  298.      * @param until
  299.      *        The substring to stop the trimming after.
  300.      * @return The trimmed string.
  301.      */
  302.     public static String trimUntil(final String line, final String until) {
  303.     int until_pos = line.indexOf(until);
  304.     if (until_pos == -1) {
  305.         return null;
  306.     } else {
  307.         return line.substring(until_pos + until.length());
  308.     }
  309.     }
  310. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement