Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.io.BufferedReader;
- import java.io.IOException;
- import java.io.InputStreamReader;
- import java.net.HttpURLConnection;
- import java.net.MalformedURLException;
- import java.net.URL;
- import java.util.ArrayList;
- public class scrapertest {
- public static void main(String[] args) throws MalformedURLException, IOException {
- System.out.println("Starting");
- BufferedReader stdIn = new BufferedReader(new InputStreamReader(System.in));
- String command;
- int count = 1;
- while((command = stdIn.readLine()) != null) {
- count = Integer.parseInt(command);
- break;
- }
- for(int i = 1; i <= count; i++) {
- ArrayList<String> al = getUrlAsInBrowser(new URL("http://www.mcbans.com/list/"+i));
- saveUsernamesOutOfMcBansHTML(al);
- for(String s : usernames)
- System.out.println(s);
- }
- System.out.println("Exiting");
- }
- private static void saveUsernamesOutOfMcBansHTML(ArrayList<String> html) {
- for(String s : html) {
- if(s.startsWith("<td><a href=\"/player/")) {
- s = s.replaceFirst("<td><a href=\"/player/", "");
- s = s.replaceAll("</a></td>", "");
- int i = s.indexOf("\">");
- s = s.substring(i+2);
- if(!s.equals("console") && !s.contains("["))
- addToUsernames(s);
- }
- }
- }
- private static void addToUsernames(String username) {
- if(!usernames.contains(username))
- usernames.add(username);
- }
- private static ArrayList<String> usernames = new ArrayList<String>();
- private static ArrayList<String> getUrlAsInBrowser(URL url) throws IOException {
- ArrayList<String> al = new ArrayList<String>();
- System.setProperty("http.agent", "");
- HttpURLConnection huc = (HttpURLConnection) url.openConnection();
- huc.setRequestProperty("User-Agent", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.2.2) Gecko/20100316 Firefox/3.6.2");
- huc.connect();
- BufferedReader br = new BufferedReader(new InputStreamReader(huc.getInputStream()));
- String s;
- while((s = br.readLine())!=null)
- al.add(s);
- huc.disconnect();
- return al;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement