Advertisement
Bladtman

Untitled

May 7th, 2012
42
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 8.47 KB | None | 0 0
  1. //A second attempt at creating a data set for the trie.
  2. //Theese classes are completely ad-hoc, and thus not optimized for reuse.
  3.  
  4. import java.io.File;
  5. import java.io.IOException;
  6. import java.io.FileOutputStream;
  7. import java.io.OutputStreamWriter;
  8. import java.io.BufferedWriter;
  9. import java.io.Closeable;
  10.  
  11. import java.util.Scanner;
  12. import java.util.List;
  13. import java.util.ArrayList;
  14. import java.util.Map;
  15. import java.util.HashMap;
  16. import java.util.Set;
  17. import java.util.TreeSet;
  18. import java.util.HashSet;
  19.  
  20. public class ConstructTrieData {
  21.     private static final String outFile="TrieData.txt";
  22.     private Map<Short, String> zipMap = new HashMap<Short, String>(); //map from zipcode to cityname
  23.     private List<Road> roads = new ArrayList<Road>(); //list of (unique) roads.
  24.     private Map<String,HashSet<Road>> zipBList = new HashMap<String, HashSet<Road>>();
  25.  
  26.     //The constructor controlls the flow of this ad-hoc  parser
  27.     public ConstructTrieData (String zips, String edges, String nodes) {
  28.         //initialize the file objects, terminate by runtimeexception if the files do not exist/can't be read
  29.         File zipsF  = assignRFile(zips);
  30.         File edgesF = assignRFile(edges);
  31.         File nodesF = assignRFile(edges);
  32.         File outF = assignWFile(outFile);
  33.  
  34.         //read contents of files into appropriate data structures
  35.         readZips(zipsF, zipMap);        //allows zip-code -> city-name mapping
  36.         makeRoadList(edgesF, zipMap, roads);    //stores unique roads (unique for roadname + zip)
  37.         writeFormats(nodesF, outF, roads);  //transfer controll to the writeFormats method, which pretty much takes over from here.
  38.     }
  39.  
  40.     //Reads zips in the format zip; cityName from File f, intro the map zips
  41.     private void readZips(File f, Map<Short, String> zips) {
  42.         System.out.println("Reading zip codes");
  43.         Scanner sc = getFReader(f);
  44.         sc.nextLine(); sc.nextLine(); //Jump 2 lines   
  45.         while(sc.hasNextLine()) {
  46.             String[] l = sc.nextLine().split(";");
  47.             //add K=zip V=cityName tothe zipmap
  48.             zips.put(Short.parseShort(l[0]), l[1]);
  49.         }
  50.         closeStream(sc);
  51.     }
  52.  
  53.     //Fills ArrayList roads with Road objects, created by reading File edges and mapping the zips to cities via Map zips
  54.     //this method also fills out zipBList
  55.     private void makeRoadList (File edges, Map<Short, String> zips, List<Road> roads) {
  56.         System.out.println("Reading edges to make the road list");
  57.         long ST = System.currentTimeMillis();
  58.        
  59.         //actually reading the roads into a set, so that one road is stored only once, regardless of its number of edges.  
  60.         Set<Road> uniqueRoads = new HashSet<Road>();
  61.  
  62.         //read the file
  63.         Scanner sc = getFReader(edges);
  64.         sc.nextLine(); //jump a line
  65.         while(sc.hasNextLine()) {
  66.             String[] l = sc.nextLine().split(",");
  67.  
  68.             String rName = l[6]; //name at sixth place
  69.             rName = rName.substring(1, rName.length()-1); // remove the leading and thrailing '
  70.             Short zip = Short.parseShort(l[17]);    //zip code at place 17 (and another at 18)
  71.             String city = zips.get(zip);        //get city name from the zip-map
  72.             Integer nId = Integer.parseInt(l[0]);   //node-id at zeroth place (and another at 1)
  73.  
  74.             if (city == null || rName.length()==0 ) continue;   //this goes for sweedish roads, where the data from post danmark is inadequate
  75.                                         //so the city name is null.
  76.                                         //And for kdv_unload entries without a road name
  77.             Road r = new Road(rName, zip, city, nId);
  78.             uniqueRoads.add(r);
  79.             addToZipList(r);
  80.         }
  81.         Road one = null, two = null, three = null;
  82.         int i =0;
  83.         for (Road r : uniqueRoads) {
  84.             if (r.zipCode == 4600 && r.rName.equals("Lærkevej")) {
  85.                 System.out.println("found one");
  86.                 if (i==0) one=r;
  87.                 if (i==1) two=r;
  88.                 if (i==2) three=r;
  89.                 i++;
  90.             }
  91.         }
  92.  
  93.         System.out.println(one.equals(one));
  94.         System.out.println(one.equals(two));
  95.         System.out.println(one.equals(three));
  96.        
  97.         roads.addAll(uniqueRoads);
  98.         System.out.println("Done reading edges and making the road list. It took: " + ((System.currentTimeMillis()-ST)/1000) + " seconds.\n" );
  99.         closeStream(sc);
  100.     }
  101.  
  102.     private void writeFormats(File nodeFile, File outFile, List<Road> roads) {
  103.         writeR  (outFile, roads);
  104.         //writeC    (nodeFile, outFile, zipBList);
  105.     }
  106.  
  107.     private void writeR (File outF, List<Road> roads) {
  108.         System.out.println("Writing normal formats formats (i.e; City## not yet included)");
  109.         long ST = System.currentTimeMillis();
  110.         BufferedWriter bw = getFWriter(outF);
  111.         for (Road r : roads) {
  112.             //Road##City
  113.             writeTo(bw, r.rName + "##" + r.cityName + ";" + r.nodeId);
  114.             //Road#Zip#City
  115.             writeTo(bw, r.rName + "#" + r.zipCode + "#" + r.cityName + ";" + r.nodeId);
  116.             //City#Zip#Road
  117.             writeTo(bw, r.cityName + "#" + r.zipCode + "#" + r.rName + ";" + r.nodeId);
  118.             //City##Road
  119.             writeTo(bw, r.cityName + "##" + r.rName + ";" + r.nodeId);
  120.         }
  121.         System.out.println("Done writing normal formats. It took: " + (System.currentTimeMillis()-ST) + " milliseconds.\n" );
  122.         closeStream(bw);
  123.     }
  124.  
  125.     private void writeC(File nodeF, File outF, Map<String, HashSet<Road>> zipBList) {
  126.         System.out.println("Writing City## format");
  127.         long ST = System.currentTimeMillis();
  128.         BufferedWriter bw = getFWriter(outF);
  129.         for (Set<Road> s : zipBList.values()){
  130.             Road r = centralRoad(s);
  131.             writeTo(bw, r.cityName + "##" + ";" + r.nodeId);
  132.         }
  133.        
  134.         System.out.println("Done writing City format. It took: " + (System.currentTimeMillis()-ST) + " milliseconds.\n" );
  135.         closeStream(bw);
  136.     }
  137.  
  138.     private Road centralRoad(Set<Road> roads) {
  139.         Object[] arr = roads.toArray();
  140.         @SuppressWarnings("unchecked")
  141.         Road tmp = (Road) arr[0];
  142.         return tmp;
  143.     }
  144.  
  145.     /*
  146.      * Helper methods from here on out
  147.      * convention for the IO helpers is to throw a RuntimeException on fatal IO problems, and thereby crash the program, unless otherwise noted
  148.      */
  149.  
  150.     //getRead/write methods, simply provides a 'cleaner' way of instatiating fileIO objects
  151.     private Scanner getFReader(File f){
  152.         try {
  153.             return new Scanner(f);
  154.         } catch (IOException e){
  155.             throw new RuntimeException(e);
  156.         }
  157.     }
  158.    
  159.     //the two following methods, clumsy as they may seem, provide a means of writing to a file using relavtively clean code
  160.     //returns a BufferedWriter on the File outF (asumes UTF-8)
  161.     private BufferedWriter getFWriter(File outF) {
  162.         try {
  163.             return new BufferedWriter( new OutputStreamWriter ( new FileOutputStream(outF, true), "UTF-8")); //true means append instead of overwriting
  164.         } catch (IOException e) {
  165.             throw new RuntimeException(e);
  166.         }
  167.     }
  168.  
  169.     //write String s, followed by a newline character, to bw
  170.     private void writeTo (BufferedWriter bw, String s) {
  171.         try {
  172.             bw.write(s);
  173.             bw.newLine();
  174.         } catch (IOException e) {
  175.             throw new RuntimeException(e);
  176.         }
  177.     }
  178.  
  179.     //verifies the validity of a file denoted by String path, and assigns it to a File f on a successful verification.
  180.     private File assignRFile(String path) {
  181.         File tmp = new File(path);
  182.         if (!tmp.canRead()) {
  183.             throw new RuntimeException("Cant read file " + path);
  184.         }
  185.         return tmp;
  186.     }
  187.  
  188.     //removes the file if it exists, and returns a file obj representing the newly created file
  189.     //terminates by runtimeexception if any of the IO fails fataly.
  190.     private File assignWFile(String path) {
  191.         File tmp = new File(path);
  192.         if (tmp.exists()) {
  193.             if (!tmp.delete()) throw new RuntimeException("Could not delete file: " + path);
  194.         }
  195.         try {
  196.             tmp.createNewFile();
  197.         } catch (IOException e) {
  198.             throw new RuntimeException(e);
  199.         }
  200.         return tmp;
  201.     }
  202.  
  203.     //convient way to close streams.
  204.     //doesnt crash on IO problems, as an error here is non-fatal for the parser.)
  205.     private void closeStream(Closeable c) {
  206.         try {
  207.             c.close();
  208.         } catch (IOException e) {
  209.             System.out.println("Huh, couldn't close this");
  210.             e.printStackTrace();
  211.         }
  212.     }
  213.  
  214.     private void addToZipList(Road r) {
  215.         if (zipBList.containsKey(r.cityName)) {
  216.             zipBList.get(r.cityName).add(r);
  217.         } else {
  218.             HashSet<Road> hs = new HashSet<Road>();
  219.             hs.add(r);
  220.             zipBList.put(r.cityName ,hs);
  221.         }
  222.     }
  223.  
  224.     /*
  225.      * Helper section over, from here on out only private classes and main()
  226.      */
  227.  
  228.     //class representing  a road, with all its neccesary data.
  229.     private class Road {   
  230.         public String rName, cityName;
  231.         public Short zipCode;
  232.         public Integer nodeId;
  233.  
  234.         public Road(String rn, Short zip, String cName, Integer nId) {
  235.             rName = rn;
  236.             zipCode = zip;
  237.             cityName = cName;
  238.             nodeId = nId;
  239.         }
  240.  
  241.         @Override
  242.         public boolean equals(Object r) {
  243.             if (r == null || r.getClass() != this.getClass()) return false;
  244.             if (this == r) return true;
  245.             Road other = (Road) r;
  246.             return (this.rName.equals(other.rName) && this.zipCode.equals(other.zipCode));
  247.         }
  248.     }
  249.  
  250.     public static void main (String[] args){
  251.         if (args.length != 3) {
  252.             System.out.println("Usage: zipfile edgefile nodefile");
  253.             return;
  254.         }
  255.  
  256.         new ConstructTrieData(args[0], args[1], args[2]);
  257.     }
  258. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement