Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- //A second attempt at creating a data set for the trie.
- //Theese classes are completely ad-hoc, and thus not optimized for reuse.
- import java.io.File;
- import java.io.IOException;
- import java.io.FileOutputStream;
- import java.io.OutputStreamWriter;
- import java.io.BufferedWriter;
- import java.io.Closeable;
- import java.util.Scanner;
- import java.util.List;
- import java.util.ArrayList;
- import java.util.Map;
- import java.util.HashMap;
- import java.util.Set;
- import java.util.TreeSet;
- import java.util.HashSet;
- public class ConstructTrieData {
- private static final String outFile="TrieData.txt";
- private Map<Short, String> zipMap = new HashMap<Short, String>(); //map from zipcode to cityname
- private List<Road> roads = new ArrayList<Road>(); //list of (unique) roads.
- private Map<String,HashSet<Road>> zipBList = new HashMap<String, HashSet<Road>>();
- //The constructor controlls the flow of this ad-hoc parser
- public ConstructTrieData (String zips, String edges, String nodes) {
- //initialize the file objects, terminate by runtimeexception if the files do not exist/can't be read
- File zipsF = assignRFile(zips);
- File edgesF = assignRFile(edges);
- File nodesF = assignRFile(edges);
- File outF = assignWFile(outFile);
- //read contents of files into appropriate data structures
- readZips(zipsF, zipMap); //allows zip-code -> city-name mapping
- makeRoadList(edgesF, zipMap, roads); //stores unique roads (unique for roadname + zip)
- writeFormats(nodesF, outF, roads); //transfer controll to the writeFormats method, which pretty much takes over from here.
- }
- //Reads zips in the format zip; cityName from File f, intro the map zips
- private void readZips(File f, Map<Short, String> zips) {
- System.out.println("Reading zip codes");
- Scanner sc = getFReader(f);
- sc.nextLine(); sc.nextLine(); //Jump 2 lines
- while(sc.hasNextLine()) {
- String[] l = sc.nextLine().split(";");
- //add K=zip V=cityName tothe zipmap
- zips.put(Short.parseShort(l[0]), l[1]);
- }
- closeStream(sc);
- }
- //Fills ArrayList roads with Road objects, created by reading File edges and mapping the zips to cities via Map zips
- //this method also fills out zipBList
- private void makeRoadList (File edges, Map<Short, String> zips, List<Road> roads) {
- System.out.println("Reading edges to make the road list");
- long ST = System.currentTimeMillis();
- //actually reading the roads into a set, so that one road is stored only once, regardless of its number of edges.
- Set<Road> uniqueRoads = new HashSet<Road>();
- //read the file
- Scanner sc = getFReader(edges);
- sc.nextLine(); //jump a line
- while(sc.hasNextLine()) {
- String[] l = sc.nextLine().split(",");
- String rName = l[6]; //name at sixth place
- rName = rName.substring(1, rName.length()-1); // remove the leading and thrailing '
- Short zip = Short.parseShort(l[17]); //zip code at place 17 (and another at 18)
- String city = zips.get(zip); //get city name from the zip-map
- Integer nId = Integer.parseInt(l[0]); //node-id at zeroth place (and another at 1)
- if (city == null || rName.length()==0 ) continue; //this goes for sweedish roads, where the data from post danmark is inadequate
- //so the city name is null.
- //And for kdv_unload entries without a road name
- Road r = new Road(rName, zip, city, nId);
- uniqueRoads.add(r);
- addToZipList(r);
- }
- Road one = null, two = null, three = null;
- int i =0;
- for (Road r : uniqueRoads) {
- if (r.zipCode == 4600 && r.rName.equals("Lærkevej")) {
- System.out.println("found one");
- if (i==0) one=r;
- if (i==1) two=r;
- if (i==2) three=r;
- i++;
- }
- }
- System.out.println(one.equals(one));
- System.out.println(one.equals(two));
- System.out.println(one.equals(three));
- roads.addAll(uniqueRoads);
- System.out.println("Done reading edges and making the road list. It took: " + ((System.currentTimeMillis()-ST)/1000) + " seconds.\n" );
- closeStream(sc);
- }
- private void writeFormats(File nodeFile, File outFile, List<Road> roads) {
- writeR (outFile, roads);
- //writeC (nodeFile, outFile, zipBList);
- }
- private void writeR (File outF, List<Road> roads) {
- System.out.println("Writing normal formats formats (i.e; City## not yet included)");
- long ST = System.currentTimeMillis();
- BufferedWriter bw = getFWriter(outF);
- for (Road r : roads) {
- //Road##City
- writeTo(bw, r.rName + "##" + r.cityName + ";" + r.nodeId);
- //Road#Zip#City
- writeTo(bw, r.rName + "#" + r.zipCode + "#" + r.cityName + ";" + r.nodeId);
- //City#Zip#Road
- writeTo(bw, r.cityName + "#" + r.zipCode + "#" + r.rName + ";" + r.nodeId);
- //City##Road
- writeTo(bw, r.cityName + "##" + r.rName + ";" + r.nodeId);
- }
- System.out.println("Done writing normal formats. It took: " + (System.currentTimeMillis()-ST) + " milliseconds.\n" );
- closeStream(bw);
- }
- private void writeC(File nodeF, File outF, Map<String, HashSet<Road>> zipBList) {
- System.out.println("Writing City## format");
- long ST = System.currentTimeMillis();
- BufferedWriter bw = getFWriter(outF);
- for (Set<Road> s : zipBList.values()){
- Road r = centralRoad(s);
- writeTo(bw, r.cityName + "##" + ";" + r.nodeId);
- }
- System.out.println("Done writing City format. It took: " + (System.currentTimeMillis()-ST) + " milliseconds.\n" );
- closeStream(bw);
- }
- private Road centralRoad(Set<Road> roads) {
- Object[] arr = roads.toArray();
- @SuppressWarnings("unchecked")
- Road tmp = (Road) arr[0];
- return tmp;
- }
- /*
- * Helper methods from here on out
- * convention for the IO helpers is to throw a RuntimeException on fatal IO problems, and thereby crash the program, unless otherwise noted
- */
- //getRead/write methods, simply provides a 'cleaner' way of instatiating fileIO objects
- private Scanner getFReader(File f){
- try {
- return new Scanner(f);
- } catch (IOException e){
- throw new RuntimeException(e);
- }
- }
- //the two following methods, clumsy as they may seem, provide a means of writing to a file using relavtively clean code
- //returns a BufferedWriter on the File outF (asumes UTF-8)
- private BufferedWriter getFWriter(File outF) {
- try {
- return new BufferedWriter( new OutputStreamWriter ( new FileOutputStream(outF, true), "UTF-8")); //true means append instead of overwriting
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
- }
- //write String s, followed by a newline character, to bw
- private void writeTo (BufferedWriter bw, String s) {
- try {
- bw.write(s);
- bw.newLine();
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
- }
- //verifies the validity of a file denoted by String path, and assigns it to a File f on a successful verification.
- private File assignRFile(String path) {
- File tmp = new File(path);
- if (!tmp.canRead()) {
- throw new RuntimeException("Cant read file " + path);
- }
- return tmp;
- }
- //removes the file if it exists, and returns a file obj representing the newly created file
- //terminates by runtimeexception if any of the IO fails fataly.
- private File assignWFile(String path) {
- File tmp = new File(path);
- if (tmp.exists()) {
- if (!tmp.delete()) throw new RuntimeException("Could not delete file: " + path);
- }
- try {
- tmp.createNewFile();
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
- return tmp;
- }
- //convient way to close streams.
- //doesnt crash on IO problems, as an error here is non-fatal for the parser.)
- private void closeStream(Closeable c) {
- try {
- c.close();
- } catch (IOException e) {
- System.out.println("Huh, couldn't close this");
- e.printStackTrace();
- }
- }
- private void addToZipList(Road r) {
- if (zipBList.containsKey(r.cityName)) {
- zipBList.get(r.cityName).add(r);
- } else {
- HashSet<Road> hs = new HashSet<Road>();
- hs.add(r);
- zipBList.put(r.cityName ,hs);
- }
- }
- /*
- * Helper section over, from here on out only private classes and main()
- */
- //class representing a road, with all its neccesary data.
- private class Road {
- public String rName, cityName;
- public Short zipCode;
- public Integer nodeId;
- public Road(String rn, Short zip, String cName, Integer nId) {
- rName = rn;
- zipCode = zip;
- cityName = cName;
- nodeId = nId;
- }
- @Override
- public boolean equals(Object r) {
- if (r == null || r.getClass() != this.getClass()) return false;
- if (this == r) return true;
- Road other = (Road) r;
- return (this.rName.equals(other.rName) && this.zipCode.equals(other.zipCode));
- }
- }
- public static void main (String[] args){
- if (args.length != 3) {
- System.out.println("Usage: zipfile edgefile nodefile");
- return;
- }
- new ConstructTrieData(args[0], args[1], args[2]);
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement