Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package S4;
- //import edu.princeton.cs.algs4.In;
- //import edu.princeton.cs.algs4.StdOut;
- //This class should work if wordnet works
- public class Outcast {
- private WordNet wordnet;
- // constructor takes a WordNet object
- public Outcast(WordNet wordnet) {
- this.wordnet = wordnet;
- }
- // given an array of WordNet nouns, return an outcast
- public String outcast(String[] nouns) {
- //The outcast is the one with the max distance from others
- int maxDistance = 0;
- //To find the index of the outcast to be able to find the noun later
- int outcastPlacement = 0;
- //Current distance of the loop
- int currentDistance;
- //To help us calculate the distance and thus finding the outcast
- for(int i = 0; i < nouns.length; i++) {
- currentDistance = 0;
- for(int j = 0; j < nouns.length; j++) {
- //To not add length when it is a duplicate
- if(!nouns[i].equals(nouns[j])) {
- currentDistance += wordnet.distance(nouns[i], nouns[j]);
- }
- }
- //Checking if the current distance is is bigger than the max distance
- if(maxDistance < currentDistance) {
- //If so, the replace the max distance and the outcast
- maxDistance = currentDistance;
- outcastPlacement = i;
- }
- }
- //Returns the outcast noun
- return nouns[outcastPlacement];
- }
- public static void main(String[] args) {
- WordNet wordnet = new WordNet("wordnet-data/synsets.txt","wordnet-data/hypernyms.txt");
- Outcast outcast = new Outcast(wordnet);
- String[] test = {"water", "soda", "bed", "orange_juice", "milk", "apple_juice", "tea", "coffee"};
- outcast.outcast(test);
- /*
- WordNet wordnet = new WordNet(args[0], args[1]); Outcast outcast = new Outcast(wordnet);
- for (int t = 2; t < args.length; t++) {
- String[] nouns = In.readStrings(args[t]);
- StdOut.println(args[t] + ": " + outcast.outcast(nouns));
- }*/
- }
- }
- -------
- package S4;
- import edu.princeton.cs.algs4.Bag;
- import edu.princeton.cs.algs4.Digraph;
- import edu.princeton.cs.algs4.In;
- import edu.princeton.cs.algs4.Out;
- public class WordNet {
- //The constructor should throw a java.lang.IllegalArgumentException
- //if the input does not correspond to a rooted DAG. The distance() and
- //sap() methods should throw a java.lang.IllegalArgumentException unless
- //both of the noun arguments are WordNet nouns.
- //we import SAP so we can use its functions
- private SAP sap;
- //The input looks like this: id, nouns, description
- //we store the nouns and descriptions separately in string arrays
- //the array index represents the id
- private String[] nouns;
- private String[] descriptions;
- //a helper function to sort the synset file input
- private void synsethelper(In synsetfile) {
- String line = "";
- Bag<String[]> splitbag = new Bag<String[]>();
- //we split the input on commas and store in a bag
- while(synsetfile.hasNextLine()) {
- line = synsetfile.readLine();
- String[] splitline = line.split(",");
- splitbag.add(splitline);
- }
- //in each bag there is one array (splititem)
- //each array has three indexes:
- //[0]: id
- //[1]: nouns
- //[2]: description
- nouns = new String[splitbag.size()];
- descriptions = new String[splitbag.size()];
- for(String[] splititem : splitbag) {
- int id = Integer.parseInt(splititem[0]);
- nouns[id] = splititem[1];
- descriptions[id] = splititem[2];
- }
- }
- //a helper function to sort the hypernym file input
- private void hypernymhelper(In hypernymfile) {
- //the input is a row of ids, separated by commas
- //the first id acts like a root and thus connects to every other id around
- String line = "";
- //making a digraph for the nouns
- Digraph nounDigraph = new Digraph(nouns.length);
- //continue to read the text file and add to the line string, whilst the file is not empty
- while(hypernymfile.hasNextLine()) {
- line = hypernymfile.readLine();
- //we split the input (line) on commas and add to a string array
- String[] splitline = line.split(",");
- for(int i = 1; i < splitline.length; i++) {
- //here we create an edge from the first id to every other id that we iterate through
- nounDigraph.addEdge(Integer.parseInt(splitline[0]), Integer.parseInt(splitline[i]));
- }
- }
- //here we create a new digraph with the added edges from above
- sap = new SAP(nounDigraph);
- }
- //constructor takes the name of the two input files
- public WordNet(String synsets , String hypernyms) {
- In synsetfile = new In(synsets);
- In hypernymfile = new In(hypernyms);
- synsethelper(synsetfile);
- hypernymhelper(hypernymfile);
- }
- // returns all WordNet nouns
- public Iterable<String> nouns() {
- //Making a set of separated single nouns from the set
- Bag<String> iterableNouns = new Bag<String>();
- for(String string : nouns) {
- iterableNouns.add(string);
- }
- return iterableNouns;
- }
- // is the word a WordNet noun?
- public boolean isNoun(String word) {
- //Using the noun function created above
- //We check each noun in the returned set to see if it contains the word we are looking for
- for (String string : nouns()) {
- if (string.contains(word))
- return true;
- }
- return false;
- }
- //a helper function that returns the index of a string
- private int getindex(String string) {
- for (int i = 0; i < this.nouns.length; i++) {
- if (nouns[i].equals(string)) {
- return i;
- }
- }
- //we return -1 if we cant find the word
- return -1;
- }
- // distance between nounA and nounB (defined below)
- public int distance(String nounA, String nounB) {
- //checking the words are an empty string
- if(nounA.equals(null) || nounB.equals(null)) {
- throw new IllegalArgumentException();
- }
- //we need the index of the words so we can find the length between them
- int a = getindex(nounA);
- int b = getindex(nounB);
- int distance = -1;
- if (a!= -1 && b!= -1) {
- distance = sap.length(a, b);
- }
- return distance;
- }
- // a synset (second field of synsets.txt) that is a shortest common ancestor
- // of nounA and nounB
- public String sap(String nounA, String nounB) {
- //checking the words are really a noun
- if(nounA.equals(null) || nounB.equals(null)) {
- throw new IllegalArgumentException();
- }
- int shortestAncestor = sap.ancestor(getindex(nounA), getindex(nounB));
- return nouns[shortestAncestor];
- }
- // do unit testing of this class
- public static void main(String[] args) {
- /*WordNet wnet = new WordNet("synsets15.txt", "hypernyms100K.txt");
- Out out = new Out();
- out.println(wnet.sap(h, l));
- out.println(wnet.distance(a, p));
- out.println(wnet.getindex(b));
- out.println(wnet.isNoun(a));
- out.println(wnet.nouns());*/
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement