Untitled

/********* LexiconData.java ********
 *
 *  APCS Labs 2011-2020
 *  Cryptology
 *  Dr. John Pais
 *  pais.john@gmail.com
 *  Copyright (c) 2011 to present John Pais. All rights reserved.
 *
 */

package LexiconData;
import java.util.*;

public class LexiconData
{
    protected List<Character> alphabet;
    protected int alphaSize;
    // Create ntuples of individual plaintext words or (plaintext word,
    // known language word) pairs. Note that this will be determined
    // programmatically by a ReadWriteFile method (see below).
    private List<Ntuple> lexicon = new ArrayList<Ntuple>();
    private List<String> plaintextWords = new ArrayList<String>();
    private List<Ntuple> equivClassesSorted = new ArrayList<Ntuple>();
    private Map<String, Set<String>> mapWordToEquivClass = new HashMap<String, Set<String>>();
    private Map<String, Integer> mapWordToEquivClassSize = new HashMap<String, Integer>();
    private List<Ntuple> anagramClassesSorted = new ArrayList<Ntuple>();
    private Map<String, Set<String>> mapWordToAnagramClass = new HashMap<String, Set<String>>();
    private Map<String, Integer> mapWordToAnagramClassSize = new HashMap<String, Integer>();

    public LexiconData(List<Character> alphabet, String dirPath, String lexicon, boolean init)
    {
        this.alphabet = alphabet;
        this.alphaSize = alphabet.size();
        readFileOfLexicon(dirPath + lexicon);
        createPlaintextWords();
        if(init)
        {
            writeEquivClassesSorted(dirPath + "equivClassesSorted.txt");
            writeAnagramClassesSorted(dirPath + "anagramClassesSorted.txt");
        }
        else
        {
            readEquivClassesSorted(dirPath + "equivClassesSorted.txt");
//          readAnagramClassesSorted(dirPath + "anagramClassesSorted.txt");
        }
        mapWordToEquivClass();
        mapWordToEquivClassSize();
//      mapWordToAnagramClass();
//      mapWordToAnagramClassSize();
    }

    // Problem 1. Create getter for alphabet.
    public List<Character> getAlphabet()
    {
        return alphabet;
    }

    // Problem 2. Create getter for alphabet size.
    public int getAlphaSize()
    {
        return alphaSize;
    }

    // Problem 3. Read lexicon into List of ntuples of individual words
    // or possibly (mystery word, english word) pairs. Note that this will be
    // determined programmatically by the ReadWriteFile method createNtupleLines,
    // which reads each line (record) of strings into an ntuple and creates an
    // ArrayList of these ntuples.
    public void readFileOfLexicon(String inputFile)
    {
        ReadWriteFile rwf = new ReadWriteFile(inputFile,3);
        lexicon = rwf.getNtupleLines();
    }

    // Problem 4. Create plaintextWords.
    public void createPlaintextWords()
    {
        for (Ntuple ntuple : lexicon) {
            plaintextWords.add((String)ntuple.getkth(0));
        }
    }

    // Problem 5. Create getter for plaintextWords.
    public List<String> getPlaintextWords()
    {
    	System.out.println(plaintextWords.size());
        return plaintextWords;
    }

    // equivClass methods

    // Problem 6. Create character set of a string.
    // Note that a set automatically removes duplicates.
    public Set<Character> charSet(String str)
    {
        Set<Character> set = new HashSet<>();
        for (Character c : str.toCharArray()) {
            set.add(c);
        }
        return set;
    }

    // Problem 7. Create same character set test for two strings.
    public boolean sameCharSet(String str1, String str2)
    {
        return charSet(str1).equals(charSet(str1));
    }

    // Problem 8. Create same character set equivalence
    // class of a given string, since sameCharSet is an
    // equivalence relation. Note that this is dependent
    // on the list of plaintextWords created using the
    // current lexicon.
    public Set<String> equivClass(String str)
    {
        Set<String> equivClass = new HashSet<>();
        for (String word : plaintextWords) {
            if (sameCharSet(word,str)) {
                equivClass.add(word);
            }
        }
        return equivClass;
    }

    // Problem 9. Create the set of all equivalence classes
    // created from a given set of strings. Note that this
    // is dependent on the list of plaintextWords created
    // using the current lexicon.
    public Set<Set<String>> equivClasses(Set<String> set)
    {
        Set<Set<String>> equivClasses = new HashSet<Set<String>>();
        for (String str : set) {
            equivClasses.add(equivClass(str));
        }
        return equivClasses;
    }

    // Problem 10. Create the equivalence class of size at least minSize
    // of a random string of length strLen.
    public Set<String> equivClassRndStr(int strLen, int minSize)
    {
        Random rnd = new Random();
        int index = 0;
        while (plaintextWords.get(index).length() != strLen ||
                equivClass(plaintextWords.get(index)).size() < minSize) {
            index = rnd.nextInt(plaintextWords.size());
        }
        return equivClass(plaintextWords.get(index));
    }

    // Problem 11. Create a list of ntuples containing equivClass
    // and equivClass size pairs, sort the list by size, and write
    // it to disk. This is a computation intensive task that may
    // take several minutes, so we do this only once and write the
    // result to disk. Then we extract any information we from the
    // disk file.
    public void writeEquivClassesSorted(String outputFile)
    {
        List<Ntuple> equivClassesSorted = new ArrayList<Ntuple>();
        Set<String> equivClass = new HashSet<String>();
        String word;
        Ntuple ntuple0 = new Ntuple();
        for(Ntuple ntuple : lexicon)
        {
            word = (String)ntuple.getkth(0);
            equivClass = equivClass(word);
            ntuple0 = new Ntuple(word,equivClass,equivClass.size());
            equivClassesSorted.add(ntuple0);
        }
        NtupleComparator nc = new NtupleComparator(2,1,false);
        nc.sortNtupleList(equivClassesSorted);
        ReadWriteFile rwf = new ReadWriteFile();
        rwf.writeNtupleOutput(equivClassesSorted, outputFile);
    }

    // Problem 12. Convert a string representation of an ntuple comprised
    // of a set of strings and the size of the set into an actual Ntuple
    // containing the actual set and its length.
    public Ntuple createNtupleFromNtupleStr(String ntupleStrSetPlusLen)
    {
        int wordStart = ntupleStrSetPlusLen.indexOf("(") + 1;
        int wordStop = ntupleStrSetPlusLen.indexOf(",");
        String word = ntupleStrSetPlusLen.substring(wordStart,wordStop);
        Set<String> set = new HashSet<String>();
        int setStart = ntupleStrSetPlusLen.indexOf("[") + 1;
        int setStop = ntupleStrSetPlusLen.indexOf("]");
        String str = ntupleStrSetPlusLen.substring(setStart,setStop);
        int index;
        while(str.length() > 0)
        {
            index = str.indexOf(",");
            if(index != -1)
            {
               set.add(str.substring(0, index));
               str = str.substring(str.indexOf(",")+2);
            }
            else
            {
                set.add(str);
                str = "";
            }
        }
        return new Ntuple(word,set,set.size());
    }

    // Problem 13. Read the file created in Problem 11 into
    // a list of strings and then recreate it as a list of
    // ntuples coded into the variable equivClassesSorted.
    public void readEquivClassesSorted(String inputFile)
    {
        ReadWriteFile rwf = new ReadWriteFile(inputFile);
        List<String> lines = rwf.getFileLines();
        for(String str : lines)
        {
            equivClassesSorted.add(createNtupleFromNtupleStr(str));
        }
    }

    // Problem 14. Create getter for equivClassesSorted.
    public List<Ntuple> getEquivClassesSorted()
    {
        return equivClassesSorted;
    }

    // Problem 15. Create mapWordToEquivClass map.
    @SuppressWarnings("unchecked")
    public void mapWordToEquivClass()
    {
        for (Ntuple ntuple : equivClassesSorted) {
            mapWordToEquivClass.put((String) ntuple.getkth(0),(Set<String>) ntuple.getkth(1));
        }
    }

    // Problem 16. Create getter for equivClass using mapWordToEquivClass map.
    public Set<String> getEquivClass(String word)
    {
        return mapWordToEquivClass.get(word);
    }

    // Problem 17. Create mapWordToEquivClassSize map.
    @SuppressWarnings("unchecked")
    public void mapWordToEquivClassSize()
    {
        for (Ntuple ntuple : equivClassesSorted) {
            mapWordToEquivClassSize.put((String)ntuple.getkth(0),(Integer)ntuple.getkth(2));
        }
    }

    // Problem 18. Create getter for equivClassSize using mapWordToEquivClassSize map.
    public int getEquivClassSize(String word)
    {
        return mapWordToEquivClassSize.get(word);
    }

    // Problem 19. Create getter for total number of EquivClasses.
    public int getNumEquivClasses()
    {
        Set<Set<String>> sets = new HashSet<Set<String>>();
        for (Set<String> set : mapWordToEquivClass.values()) {
            sets.add(set);
        }
        return sets.size();
    }

    // Problem 20. Create getter for equivClass max size.
    public int getEquivClassMaxSize()
    {
        return (int)equivClassesSorted.get(0).getkth(2);
    }

// anagramClass methods

    // Problem 21. Count number of occurrences of a
    // character in a string.
    public int occurr(char ch, String str)
    {
        // insert your code here
    	int cnt = 0;
    	for(int i = 0; i < str.length(); i++) {
    		if(str.charAt(i) == ch) {
    			++cnt;
    		}
    	}
    	return cnt;
    }

    // Problem 22. Create test whether or not strX is an anagram of str.
    // You must use: 1. occur above, 2. str.toCharArray(), 3. enhanced for loop
    // You shoul dmirror the methods above in the anagram methods below
    public boolean isAnagram(String strX, String str)
    {
        // insert your code here
    	char[] a = strX.toCharArray(); char[] b = str.toCharArray();
    	for(char c : a) {
    		if(occurr(c, strX) != occurr(c, str)) {
    			return false;
    		}
    	}
    	return true;
    }

    // Problem 23. Create anagram equivalence class of a given string,
    // which refines the sameCharSet equivalence relation.
    public Set<String> anagramClass(String str)
    {
        Set<String> anagramClass = new HashSet<>();
        for (String word : plaintextWords) {
            if (isAnagram(word,str)) {
            	anagramClass.add(word);
            }
        }
        return anagramClass;
    }

    // Problem 24. Create the set of all equivalence classes
    // created from a given set of strings. Note that this
    // is dependent on the list of plaintextWords created
    // using the current lexicon.
    public Set<Set<String>> anagramClasses(Set<String> set)
    {
        Set<Set<String>> anagramClasses = new HashSet<Set<String>>();
        for (String str : set) {
            anagramClasses.add(anagramClass(str));
        }
        return anagramClasses;
    }

    // Problem 25. Create the anagram class of size at least minSize
    // of a random string of length strLen.
    public Set<String> anagramClassRndStr(int strLen, int minSize)
    {
        // insert your code here
            Random rnd = new Random();
            int index = 0;
            while (plaintextWords.get(index).length() != strLen ||
                    anagramClass(plaintextWords.get(index)).size() < minSize) {
                index = rnd.nextInt(plaintextWords.size());
            }
            return anagramClass(plaintextWords.get(index));
    }

    // Problem 26. Create a list of ntuples containing anagramClass
    // and anagramClass size pairs, sort the list by size, and write
    // it to disk. This is a computation intensive task that may
    // take several minutes, so we do this only once and write the
    // result to disk. Then we extract any information we from the
    // disk file.
    public void writeAnagramClassesSorted(String outputFile)
    {
        List<Ntuple> anagramClassesSorted = new ArrayList<Ntuple>();
        Set<String> anagramClass = new HashSet<String>();
        String word;
        Ntuple ntuple0 = new Ntuple();
        for(Ntuple ntuple : lexicon)
        {
            word = (String)ntuple.getkth(0);
            anagramClass = anagramClass(word);
            ntuple0 = new Ntuple(word,anagramClass,anagramClass.size());
            anagramClassesSorted.add(ntuple0);
        }
        NtupleComparator nc = new NtupleComparator(2,1,false);
        nc.sortNtupleList(anagramClassesSorted);
        ReadWriteFile rwf = new ReadWriteFile();
        rwf.writeNtupleOutput(anagramClassesSorted, outputFile);
    }

    // Problem 27. Read the file created in Problem 26 into
    // a list of strings and then recreate it as a list of
    // ntuples coded into the variable anagramClassesSorted.
    public void readAnagramClassesSorted(String inputFile)
    {
        ReadWriteFile rwf = new ReadWriteFile(inputFile);
        List<String> lines = rwf.getFileLines();
        for(String str : lines)
        {
            anagramClassesSorted.add(createNtupleFromNtupleStr(str));
        }
    }

    // Problem 28. Create getter for anagramClassesSorted.
    public List<Ntuple> getAnagramClassesSorted()
    {
        // insert your code here
    	return anagramClassesSorted;
    }

    // Problem 29. Create mapWordToEquivClass map.
    @SuppressWarnings("unchecked")
    public void mapWordToAnagramClass()
    {
        // insert your code here
    	for (Ntuple ntuple : anagramClassesSorted) {
            mapWordToEquivClass.put((String) ntuple.getkth(0),(Set<String>) ntuple.getkth(1));
        }
    }

    // Problem 30. Create getter for anagramClass using mapWordToAnagramClass map.
    public Set<String> getAnagramClass(String word)
    {
        // insert your code here
    	return mapWordToAnagramClass.get(word);
    }

    // Problem 31. Create mapWordToAnagramClassSize map.
    @SuppressWarnings("unchecked")
    public void mapWordToAnagramClassSize()
    {
            for (Ntuple ntuple : equivClassesSorted) {
                mapWordToAnagramClassSize.put((String)ntuple.getkth(0),(Integer)ntuple.getkth(2));
            }
    }

    // Problem 32. Create getter for anagramClassSize using mapWordToAnagramClassSize map.
    public int getAnagramClassSize(String word)
    {
        // insert your code here
    	return mapWordToAnagramClassSize.get(word);
    }
 // Problem 19. Create getter for total number of EquivClasses.


    // Problem 33. Create getter for total number of AnagramClasses.
    public int getNumAnagramClasses()
    {
    	 Set<Set<String>> sets = new HashSet<Set<String>>();
         for (Set<String> set : mapWordToAnagramClass.values()) {
             sets.add(set);
         }
         return sets.size();
     }

    // Problem 34. Create getter for equivClass max size.
    public int getAnagramClassMaxSize()
    {
        return (int)anagramClassesSorted.get(0).getkth(2);    }

// lexicon stats

    // Problem 35. Create array of alphabet character counts for a given string.
    public double[] charCountArray(String str)
    {
        // insert your code here
    }

    // Problem 36. Create array of alphabet character count totals
    // for all strings in lexicon keySet.
    public double[] charCountArrayTotal()
    {
        // insert your code here
    }

    // Problem 37. Create array of alphabet character % totals
    // for all strings in lexicon keySet.
    public double[] charPercentArray()
    {
        // insert your code here
    }

    // Problem 38. Count (recursively) the number of occurrences of str1 in str2.
    public int countOccurr(String str1, String str2)
    {
        // insert your code here
    }

    // Problem 39. Create ntuples of bigrams (pairs of characters)
    // sorted by percent.
    public List<Ntuple> bigramPercentSortedNtuples()
    {
        ArrayList<Ntuple> ntuples = new ArrayList<Ntuple>();
        List<String> bigrams = new ArrayList<String>();

        // insert your code here
    }

    // Problem 40. Create array of percentages of word lengths. The value at each
    // index i is the percentage of words of length i.
    public double[] wordLengthPercentArray(int maxLen)
    {
        // insert your code here
    }

    // Problem 41. Create lexicon report.
    public void lexiconReport(String wordInLexicon)
    {

        System.out.println("\nLexicon Data: Alphabet, Words, EquivClasses & AnigramClasses");
        System.out.println("getAlphabet() = " + getAlphabet());
        System.out.println("getAlphaSize() = " + getAlphaSize());


        System.out.println("getPlaintextWords() = " + getPlaintextWords().subList(0, 50));
        System.out.println("getPlaintextWords().size() = " + getPlaintextWords().size());

        System.out.println("\ncreateNtupleFromNtupleStr = " + createNtupleFromNtupleStr("Ntuple(post,[stoops, opts, post, stop, stoop, spot, spots, tops, pots, stops, posts],11)"));
        System.out.println("getEquivClassesSorted().subList(0, 50) = " + getEquivClassesSorted().subList(0, 50));

        System.out.println("getNumEquivClasses() = " + getNumEquivClasses());
        System.out.println("\nwordInLexicon = " + wordInLexicon);
        System.out.println("getEquivClass(wordInLexicon) = " + getEquivClass(wordInLexicon));
        System.out.println("getEquivClassSize(wordInLexicon) = " + getEquivClassSize(wordInLexicon));
        System.out.println("getEquivClassMaxSize = " + getEquivClassMaxSize());

        System.out.println("\ngetAnagramClassesSorted().subList(0, 50) = " + getAnagramClassesSorted().subList(0, 50));
        System.out.println("getNumAnagramClasses() = " + getNumAnagramClasses());
        System.out.println("\nwordInLexicon = " + wordInLexicon);
        System.out.println("getAnagramClass(wordInLexicon) = " + getAnagramClass(wordInLexicon));
        System.out.println("getAnagramClassSize(wordInLexicon) = " + getAnagramClassSize(wordInLexicon));
        System.out.println("getAnagramClassMaxSize = " + getAnagramClassMaxSize());
        System.out.println("\nwordInLexicon = " + wordInLexicon);
        System.out.println("getEquivClass(wordInLexicon) = " + getEquivClass(wordInLexicon));
        System.out.println("anagramClasses(getEquivClass(wordInLexicon)) = " + anagramClasses(getEquivClass(wordInLexicon)));
        System.out.println("getAnagramClass(wordInLexicon) = " + getAnagramClass(wordInLexicon));
        System.out.println("equivClasses(getAnagramClass(wordInLexicon)) = " + equivClasses(getAnagramClass(wordInLexicon)));
        /*
        System.out.println("\nLexicon Data: Character Counts & Percentages, Bigram Percentages and Word length Percentages");
        System.out.println("getAlphabet() = " + getAlphabet());
        System.out.println("wordInLexicon = " + wordInLexicon);
        System.out.println("charCountArray(wordInLexicon) = " + Arrays.toString(charCountArray(wordInLexicon)));
        System.out.println("charCountArrayTotal() = " + Arrays.toString(charCountArrayTotal()));
        System.out.println("charPercentArray() = " + Arrays.toString(charPercentArray()));
        List<Ntuple> bigramPercentSortedNtuples = bigramPercentSortedNtuples();
        System.out.println("\nbigramPercentSortedNtuples() = " + bigramPercentSortedNtuples);
        System.out.println("bigramPercentSortedNtuples().size() = " + bigramPercentSortedNtuples.size() +" (with nonzero percent out of " + getAlphaSize()*getAlphaSize() + " bigrams)");
        System.out.println("wordLengthPercentArray(27) = " + Arrays.toString(wordLengthPercentArray(27)));
        */
    }

}