Advertisement
Guest User

Untitled

a guest
Oct 22nd, 2018
73
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 4.65 KB | None | 0 0
  1. package S4;
  2.  
  3. import edu.princeton.cs.algs4.Bag;
  4. import edu.princeton.cs.algs4.Digraph;
  5. import edu.princeton.cs.algs4.In;
  6. import edu.princeton.cs.algs4.Out;
  7.  
  8. public class WordNet {
  9.  
  10.     //The constructor should throw a java.lang.IllegalArgumentException
  11.     //if the input does not correspond to a rooted DAG. The distance() and
  12.     //sap() methods should throw a java.lang.IllegalArgumentException unless
  13.     //both of the noun arguments are WordNet nouns.
  14.  
  15.     //we import SAP so we can use its functions
  16.     private SAP sap;
  17.     //The input looks like this: id, nouns, description
  18.     //we store the nouns and descriptions separately in string arrays
  19.     //the array index represents the id
  20.     private String[] nouns;
  21.     private String[] descriptions;
  22.  
  23.     //a helper function to sort the synset file input
  24.     private void synsethelper(In synsetfile) {
  25.         String line = "";
  26.         Bag<String[]> splitbag = new Bag<String[]>();
  27.         //we split the input on commas and store in a bag
  28.         while(synsetfile.hasNextLine()) {
  29.             line = synsetfile.readLine();
  30.             String[] splitline = line.split(",");
  31.             splitbag.add(splitline);
  32.         }
  33.         //in each bag there is one array (splititem)
  34.         //each array has three indexes:
  35.         //[0]: id
  36.         //[1]: nouns
  37.         //[2]: description
  38.         nouns = new String[splitbag.size()];
  39.         descriptions = new String[splitbag.size()];
  40.  
  41.         for(String[] splititem : splitbag) {
  42.             int id = Integer.parseInt(splititem[0]);
  43.             nouns[id] = splititem[1];
  44.             descriptions[id] = splititem[2];
  45.         }
  46.     }
  47.  
  48.     //a helper function to sort the hypernym file input
  49.     private void hypernymhelper(In hypernymfile) {
  50.         //the input is a row of ids, separated by commas
  51.         //the first id acts like a root and thus connects to every other id around
  52.         String line = "";
  53.         //making a digraph for the nouns
  54.         Digraph nounDigraph = new Digraph(nouns.length);
  55.         //continue to read the text file and add to the line string, whilst the file is not empty
  56.         while(hypernymfile.hasNextLine()) {
  57.             line = hypernymfile.readLine();
  58.             //we split the input (line) on commas and add to a string array
  59.             String[] splitline = line.split(",");
  60.             for(int i = 1; i < splitline.length; i++) {
  61.                 //here we create an edge from the first id to every other id that we iterate through
  62.                 nounDigraph.addEdge(Integer.parseInt(splitline[0]), Integer.parseInt(splitline[i]));
  63.             }
  64.         }
  65.         //here we create a new digraph with the added edges from above
  66.         sap = new SAP(nounDigraph);
  67.     }
  68.  
  69.     //constructor takes the name of the two input files
  70.     public WordNet(String synsets , String hypernyms) {
  71.         In synsetfile = new In(synsets);
  72.         In hypernymfile = new In(hypernyms);
  73.  
  74.         synsethelper(synsetfile);
  75.         hypernymhelper(hypernymfile);
  76.     }
  77.  
  78.     // returns all WordNet nouns
  79.     public Iterable<String> nouns() {
  80.         //Making a set of separated single nouns from the set
  81.         Bag<String> iterableNouns = new Bag<String>(); 
  82.        
  83.         for(String string : nouns) {
  84.             iterableNouns.add(string);
  85.         }
  86.         return iterableNouns;
  87.     }
  88.  
  89.     // is the word a WordNet noun?
  90.     public boolean isNoun(String word) {
  91.         //Using the noun function created above
  92.         //We check each noun in the returned set to see if it contains the word we are looking for
  93.         for (String string : nouns()) {
  94.             if (string.contains(word))
  95.                 return true;
  96.         }
  97.         return false;
  98.     }
  99.  
  100.     //a helper function that returns the index of a string
  101.     private int getindex(String string) {
  102.         for (int i = 0; i < this.nouns.length; i++) {
  103.             if (nouns[i].equals(string)) {
  104.                 return i;
  105.             }
  106.         }
  107.         //we return -1 if we cant find the word
  108.         return -1;
  109.     }
  110.  
  111.     // distance between nounA and nounB (defined below)
  112.     public int distance(String nounA, String nounB) {
  113.         //checking the words are an empty string
  114.         if(nounA.equals(null) || nounB.equals(null)) {
  115.             throw new IllegalArgumentException();
  116.         }
  117.         //we need the index of the words so we can find the length between them
  118.         int a = getindex(nounA);
  119.         int b = getindex(nounB);
  120.  
  121.         return sap.length(a, b);
  122.     }
  123.  
  124.     // a synset (second field of synsets.txt) that is a shortest common ancestor
  125.     // of nounA and nounB
  126.     public String sap(String nounA, String nounB) {
  127.         //checking the words are really a noun
  128.         if(nounA.equals(null) || nounB.equals(null)) {
  129.             throw new IllegalArgumentException();
  130.         }
  131.         int shortestAncestor = sap.ancestor(getindex(nounA), getindex(nounB));
  132.         return nouns[shortestAncestor];
  133.     }
  134.  
  135.     // do unit testing of this class
  136.     public static void main(String[] args) {
  137.         /*WordNet wnet = new WordNet("synsets15.txt", "hypernyms100K.txt");
  138.         Out out = new Out();
  139.         out.println(wnet.sap(h, l));
  140.         out.println(wnet.distance(a, p));
  141.         out.println(wnet.getindex(b));
  142.         out.println(wnet.isNoun(a));
  143.         out.println(wnet.nouns());*/
  144.     }
  145. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement