Advertisement
Guest User

Untitled

a guest
Oct 22nd, 2018
95
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 6.58 KB | None | 0 0
  1. package S4;
  2.  
  3. //import edu.princeton.cs.algs4.In;
  4. //import edu.princeton.cs.algs4.StdOut;
  5.  
  6. //This class should work if wordnet works
  7. public class Outcast {
  8.  
  9.     private WordNet wordnet;
  10.  
  11.     // constructor takes a WordNet object
  12.     public Outcast(WordNet wordnet) {
  13.         this.wordnet = wordnet;
  14.     }
  15.  
  16.     // given an array of WordNet nouns, return an outcast
  17.     public String outcast(String[] nouns) {
  18.         //The outcast is the one with the max distance from others
  19.         int maxDistance = 0;
  20.         //To find the index of the outcast to be able to find the noun later
  21.         int outcastPlacement = 0;
  22.         //Current distance of the loop
  23.         int currentDistance;
  24.         //To help us calculate the distance and thus finding the outcast
  25.         for(int i = 0; i < nouns.length; i++) {
  26.             currentDistance = 0;
  27.             for(int j = 0; j < nouns.length; j++) {
  28.                 //To not add length when it is a duplicate
  29.                 if(!nouns[i].equals(nouns[j])) {
  30.                     currentDistance += wordnet.distance(nouns[i], nouns[j]);
  31.                 }
  32.             }
  33.             //Checking if the current distance is is bigger than the max distance
  34.             if(maxDistance < currentDistance) {
  35.                 //If so, the replace the max distance and the outcast
  36.                 maxDistance = currentDistance;
  37.                 outcastPlacement = i;
  38.             }
  39.         }
  40.         //Returns the outcast noun
  41.         return nouns[outcastPlacement];
  42.     }
  43.  
  44.     public static void main(String[] args) {
  45.         WordNet wordnet = new WordNet("wordnet-data/synsets.txt","wordnet-data/hypernyms.txt");
  46.         Outcast outcast = new Outcast(wordnet);
  47.         String[] test = {"water", "soda", "bed", "orange_juice", "milk", "apple_juice", "tea", "coffee"};
  48.         outcast.outcast(test);
  49.         /*
  50.         WordNet wordnet = new WordNet(args[0], args[1]); Outcast outcast = new Outcast(wordnet);
  51.         for (int t = 2; t < args.length; t++) {
  52.             String[] nouns = In.readStrings(args[t]);
  53.             StdOut.println(args[t] + ": " + outcast.outcast(nouns));
  54.         }*/
  55.     }
  56. }
  57.  
  58.  
  59. -------
  60.  
  61.  
  62. package S4;
  63.  
  64. import edu.princeton.cs.algs4.Bag;
  65. import edu.princeton.cs.algs4.Digraph;
  66. import edu.princeton.cs.algs4.In;
  67. import edu.princeton.cs.algs4.Out;
  68.  
  69. public class WordNet {
  70.  
  71.     //The constructor should throw a java.lang.IllegalArgumentException
  72.     //if the input does not correspond to a rooted DAG. The distance() and
  73.     //sap() methods should throw a java.lang.IllegalArgumentException unless
  74.     //both of the noun arguments are WordNet nouns.
  75.  
  76.     //we import SAP so we can use its functions
  77.     private SAP sap;
  78.     //The input looks like this: id, nouns, description
  79.     //we store the nouns and descriptions separately in string arrays
  80.     //the array index represents the id
  81.     private String[] nouns;
  82.     private String[] descriptions;
  83.  
  84.     //a helper function to sort the synset file input
  85.     private void synsethelper(In synsetfile) {
  86.         String line = "";
  87.         Bag<String[]> splitbag = new Bag<String[]>();
  88.         //we split the input on commas and store in a bag
  89.         while(synsetfile.hasNextLine()) {
  90.             line = synsetfile.readLine();
  91.             String[] splitline = line.split(",");
  92.             splitbag.add(splitline);
  93.         }
  94.         //in each bag there is one array (splititem)
  95.         //each array has three indexes:
  96.         //[0]: id
  97.         //[1]: nouns
  98.         //[2]: description
  99.         nouns = new String[splitbag.size()];
  100.         descriptions = new String[splitbag.size()];
  101.  
  102.         for(String[] splititem : splitbag) {
  103.             int id = Integer.parseInt(splititem[0]);
  104.             nouns[id] = splititem[1];
  105.             descriptions[id] = splititem[2];
  106.         }
  107.     }
  108.  
  109.     //a helper function to sort the hypernym file input
  110.     private void hypernymhelper(In hypernymfile) {
  111.         //the input is a row of ids, separated by commas
  112.         //the first id acts like a root and thus connects to every other id around
  113.         String line = "";
  114.         //making a digraph for the nouns
  115.         Digraph nounDigraph = new Digraph(nouns.length);
  116.         //continue to read the text file and add to the line string, whilst the file is not empty
  117.         while(hypernymfile.hasNextLine()) {
  118.             line = hypernymfile.readLine();
  119.             //we split the input (line) on commas and add to a string array
  120.             String[] splitline = line.split(",");
  121.             for(int i = 1; i < splitline.length; i++) {
  122.                 //here we create an edge from the first id to every other id that we iterate through
  123.                 nounDigraph.addEdge(Integer.parseInt(splitline[0]), Integer.parseInt(splitline[i]));
  124.             }
  125.         }
  126.         //here we create a new digraph with the added edges from above
  127.         sap = new SAP(nounDigraph);
  128.     }
  129.  
  130.     //constructor takes the name of the two input files
  131.     public WordNet(String synsets , String hypernyms) {
  132.         In synsetfile = new In(synsets);
  133.         In hypernymfile = new In(hypernyms);
  134.  
  135.         synsethelper(synsetfile);
  136.         hypernymhelper(hypernymfile);
  137.     }
  138.  
  139.     // returns all WordNet nouns
  140.     public Iterable<String> nouns() {
  141.         //Making a set of separated single nouns from the set
  142.         Bag<String> iterableNouns = new Bag<String>(); 
  143.        
  144.         for(String string : nouns) {
  145.             iterableNouns.add(string);
  146.         }
  147.         return iterableNouns;
  148.     }
  149.  
  150.     // is the word a WordNet noun?
  151.     public boolean isNoun(String word) {
  152.         //Using the noun function created above
  153.         //We check each noun in the returned set to see if it contains the word we are looking for
  154.         for (String string : nouns()) {
  155.             if (string.contains(word))
  156.                 return true;
  157.         }
  158.         return false;
  159.     }
  160.  
  161.     //a helper function that returns the index of a string
  162.     private int getindex(String string) {
  163.         for (int i = 0; i < this.nouns.length; i++) {
  164.             if (nouns[i].equals(string)) {
  165.                 return i;
  166.             }
  167.         }
  168.         //we return -1 if we cant find the word
  169.         return -1;
  170.     }
  171.  
  172.     // distance between nounA and nounB (defined below)
  173.     public int distance(String nounA, String nounB) {
  174.         //checking the words are an empty string
  175.         if(nounA.equals(null) || nounB.equals(null)) {
  176.             throw new IllegalArgumentException();
  177.         }
  178.         //we need the index of the words so we can find the length between them
  179.         int a = getindex(nounA);
  180.         int b = getindex(nounB);
  181.        
  182.         int distance = -1;
  183.        
  184.         if (a!= -1 && b!= -1) {
  185.             distance = sap.length(a, b);
  186.         }
  187.  
  188.         return distance;
  189.     }
  190.  
  191.     // a synset (second field of synsets.txt) that is a shortest common ancestor
  192.     // of nounA and nounB
  193.     public String sap(String nounA, String nounB) {
  194.         //checking the words are really a noun
  195.         if(nounA.equals(null) || nounB.equals(null)) {
  196.             throw new IllegalArgumentException();
  197.         }
  198.         int shortestAncestor = sap.ancestor(getindex(nounA), getindex(nounB));
  199.         return nouns[shortestAncestor];
  200.     }
  201.  
  202.     // do unit testing of this class
  203.     public static void main(String[] args) {
  204.         /*WordNet wnet = new WordNet("synsets15.txt", "hypernyms100K.txt");
  205.         Out out = new Out();
  206.         out.println(wnet.sap(h, l));
  207.         out.println(wnet.distance(a, p));
  208.         out.println(wnet.getindex(b));
  209.         out.println(wnet.isNoun(a));
  210.         out.println(wnet.nouns());*/
  211.     }
  212. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement