Guest User

Untitled

a guest
Jun 25th, 2018
89
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 2.88 KB | None | 0 0
  1. package poke;
  2.  
  3. import java.io.BufferedReader;
  4. import java.io.FileReader;
  5. import java.io.IOException;
  6. import java.util.ArrayList;
  7. import java.util.HashMap;
  8.  
  9. public class pokescrape {
  10.     //ac = array-of-chars, O = open, C = close
  11.     static char[] actdO = "<td".toCharArray(), actdC = "</td>".toCharArray();
  12.     static char[] actrO = "<tr".toCharArray(), acPathEnd = {'.'};
  13.     static char[] acMultPathBegin = "/games/type/".toCharArray();
  14.     static char[] acTypePathBegin = "/pokedex-dp/type/".toCharArray();
  15.    
  16.     static final int nColumns = 18;
  17.     static final char separatorChar = '/';
  18.    
  19.     public static void main(String[] args) throws IOException {
  20.         buildSemanticTable(readTable("c:\\users\\russell\\downloads\\a.htm"));
  21.     }
  22.     static HashMap<Integer, String[]> buildSemanticTable(ArrayList<String[]> tbl) throws IOException {
  23.         HashMap<String, Integer> typeToBits = new HashMap<String, Integer>();
  24.         //starting at 1 because the top-left corner of the table is empty
  25.         for (int i = 1; i < nColumns; i++) {
  26.             String atk2 = tbl.get(0)[i];
  27.             //atk2 is the name of a vertical image, which has a 2 on the end of it (i.e. dragon2)
  28.             //since we'll only care about horizontal images later, which have no 2, we remove it
  29.             String atk = atk2.substring(0, atk2.length() - 1);
  30.             typeToBits.put(atk, 1 << i);
  31.         }
  32.         HashMap<Integer, String[]> semanticTable = new HashMap<Integer, String[]>();
  33.         //starting at 1 because we just did the top row
  34.         //going to size-1 because the last row is the same as the top (index)
  35.         for (int row = 1; row < tbl.size() - 1; row++) {
  36.             //first, determine the attack-type flags
  37.             int atkType = 0;
  38.             for (String atk : tbl.get(row)[0].split(""+separatorChar))
  39.                 atkType |= typeToBits.get(atk);
  40.            
  41.             //allocate space for this row of the semantic table
  42.             semanticTable.put(atkType, new String[typeToBits.size()]);
  43.             //and then put 'em in
  44.             for (int col = 1; col < nColumns; col++)
  45.                 semanticTable.get(atkType)[col - 1] = tbl.get(row)[col];
  46.         }
  47.         return semanticTable;
  48.     }
  49.     static ArrayList<String[]> readTable(String fn) throws IOException {
  50.         RingBuf rb = new RingBuf(new BufferedReader(new FileReader(fn)));
  51.         ArrayList<String[]> rows = new ArrayList<String[]>();
  52.         int cColIdx = 0;
  53.        
  54.         while (rb.read() != -1)
  55.             if (rb.justRead(actrO)) {
  56.                 //if we've read a <tr>, we need to allocate space for the next row which we're about to read
  57.                 rows.add(new String[nColumns]);
  58.                 cColIdx = 0;
  59.             } else if (rb.justRead(actdO)) {
  60.                 //if we've read a <td>, we need to properly read what information's in it and put it in the table
  61.                 String ccell = "";
  62.                 while (rb.read() != -1 && !rb.justRead(actdC))
  63.                     if (rb.justRead(acMultPathBegin))
  64.                         ccell += rb.readUntil(acPathEnd);
  65.                     else if (rb.justRead(acTypePathBegin))
  66.                         ccell += rb.readUntil(acPathEnd)+separatorChar;
  67.                 rows.get(rows.size() - 1)[cColIdx] = ccell;
  68.                 ++cColIdx;
  69.             }
  70.         return rows;
  71.     }
  72. }
Add Comment
Please, Sign In to add comment