Advertisement
Guest User

Untitled

a guest
Oct 14th, 2019
113
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 17.64 KB | None | 0 0
  1. /********* LexiconData.java ********
  2. *
  3. * APCS Labs 2011-2020
  4. * Cryptology
  5. * Dr. John Pais
  6. * pais.john@gmail.com
  7. * Copyright (c) 2011 to present John Pais. All rights reserved.
  8. *
  9. */
  10.  
  11. package LexiconData;
  12. import java.util.*;
  13.  
  14. public class LexiconData
  15. {
  16. protected List<Character> alphabet;
  17. protected int alphaSize;
  18. // Create ntuples of individual plaintext words or (plaintext word,
  19. // known language word) pairs. Note that this will be determined
  20. // programmatically by a ReadWriteFile method (see below).
  21. private List<Ntuple> lexicon = new ArrayList<Ntuple>();
  22. private List<String> plaintextWords = new ArrayList<String>();
  23. private List<Ntuple> equivClassesSorted = new ArrayList<Ntuple>();
  24. private Map<String, Set<String>> mapWordToEquivClass = new HashMap<String, Set<String>>();
  25. private Map<String, Integer> mapWordToEquivClassSize = new HashMap<String, Integer>();
  26. private List<Ntuple> anagramClassesSorted = new ArrayList<Ntuple>();
  27. private Map<String, Set<String>> mapWordToAnagramClass = new HashMap<String, Set<String>>();
  28. private Map<String, Integer> mapWordToAnagramClassSize = new HashMap<String, Integer>();
  29.  
  30. public LexiconData(List<Character> alphabet, String dirPath, String lexicon, boolean init)
  31. {
  32. this.alphabet = alphabet;
  33. this.alphaSize = alphabet.size();
  34. readFileOfLexicon(dirPath + lexicon);
  35. createPlaintextWords();
  36. if(init)
  37. {
  38. writeEquivClassesSorted(dirPath + "equivClassesSorted.txt");
  39. writeAnagramClassesSorted(dirPath + "anagramClassesSorted.txt");
  40. }
  41. else
  42. {
  43. readEquivClassesSorted(dirPath + "equivClassesSorted.txt");
  44. readAnagramClassesSorted(dirPath + "anagramClassesSorted.txt");
  45. }
  46. mapWordToEquivClass();
  47. mapWordToEquivClassSize();
  48. mapWordToAnagramClass();
  49. mapWordToAnagramClassSize();
  50. }
  51.  
  52. // Problem 1. Create getter for alphabet.
  53. public List<Character> getAlphabet()
  54. {
  55. return alphabet;
  56. }
  57.  
  58. // Problem 2. Create getter for alphabet size.
  59. public int getAlphaSize()
  60. {
  61. return alphaSize;
  62. }
  63.  
  64. // Problem 3. Read lexicon into List of ntuples of individual words
  65. // or possibly (mystery word, english word) pairs. Note that this will be
  66. // determined programmatically by the ReadWriteFile method createNtupleLines,
  67. // which reads each line (record) of strings into an ntuple and creates an
  68. // ArrayList of these ntuples.
  69. public void readFileOfLexicon(String inputFile)
  70. {
  71. ReadWriteFile rwf = new ReadWriteFile(inputFile,3);
  72. lexicon = rwf.getNtupleLines();
  73. }
  74.  
  75. // Problem 4. Create plaintextWords.
  76. public void createPlaintextWords()
  77. {
  78. for (Ntuple ntuple : lexicon) {
  79. plaintextWords.add((String)ntuple.getkth(0));
  80. }
  81. }
  82.  
  83. // Problem 5. Create getter for plaintextWords.
  84. public List<String> getPlaintextWords()
  85. {
  86. return plaintextWords;
  87. }
  88.  
  89. // equivClass methods
  90.  
  91. // Problem 6. Create character set of a string.
  92. // Note that a set automatically removes duplicates.
  93. public Set<Character> charSet(String str)
  94. {
  95. Set<Character> set = new HashSet<>();
  96. for (Character c : str.toCharArray()) {
  97. set.add(c);
  98. }
  99. return set;
  100. }
  101.  
  102. // Problem 7. Create same character set test for two strings.
  103. public boolean sameCharSet(String str1, String str2)
  104. {
  105. return charSet(str1).equals(charSet(str1));
  106. }
  107.  
  108. // Problem 8. Create same character set equivalence
  109. // class of a given string, since sameCharSet is an
  110. // equivalence relation. Note that this is dependent
  111. // on the list of plaintextWords created using the
  112. // current lexicon.
  113. public Set<String> equivClass(String str)
  114. {
  115. Set<String> equivClass = new HashSet<>();
  116. for (String word : plaintextWords) {
  117. if (sameCharSet(word,str)) {
  118. equivClass.add(word);
  119. }
  120. }
  121. return equivClass;
  122. }
  123.  
  124. // Problem 9. Create the set of all equivalence classes
  125. // created from a given set of strings. Note that this
  126. // is dependent on the list of plaintextWords created
  127. // using the current lexicon.
  128. public Set<Set<String>> equivClasses(Set<String> set)
  129. {
  130. Set<Set<String>> equivClasses = new HashSet<Set<String>>();
  131. for (String str : set) {
  132. equivClasses.add(equivClass(str));
  133. }
  134. return equivClasses;
  135. }
  136.  
  137. // Problem 10. Create the equivalence class of size at least minSize
  138. // of a random string of length strLen.
  139. public Set<String> equivClassRndStr(int strLen, int minSize)
  140. {
  141. Random rnd = new Random();
  142. int index = 0;
  143. while (plaintextWords.get(index).length() != strLen ||
  144. equivClass(plaintextWords.get(index)).size() < minSize) {
  145. index = rnd.nextInt(plaintextWords.size());
  146. }
  147. return equivClass(plaintextWords.get(index));
  148. }
  149.  
  150. // Problem 11. Create a list of ntuples containing equivClass
  151. // and equivClass size pairs, sort the list by size, and write
  152. // it to disk. This is a computation intensive task that may
  153. // take several minutes, so we do this only once and write the
  154. // result to disk. Then we extract any information we from the
  155. // disk file.
  156. public void writeEquivClassesSorted(String outputFile)
  157. {
  158. List<Ntuple> equivClassesSorted = new ArrayList<Ntuple>();
  159. Set<String> equivClass = new HashSet<String>();
  160. String word;
  161. Ntuple ntuple0 = new Ntuple();
  162. for(Ntuple ntuple : lexicon)
  163. {
  164. word = (String)ntuple.getkth(0);
  165. equivClass = equivClass(word);
  166. ntuple0 = new Ntuple(word,equivClass,equivClass.size());
  167. equivClassesSorted.add(ntuple0);
  168. }
  169. NtupleComparator nc = new NtupleComparator(2,1,false);
  170. nc.sortNtupleList(equivClassesSorted);
  171. ReadWriteFile rwf = new ReadWriteFile();
  172. rwf.writeNtupleOutput(equivClassesSorted, outputFile);
  173. }
  174.  
  175. // Problem 12. Convert a string representation of an ntuple comprised
  176. // of a set of strings and the size of the set into an actual Ntuple
  177. // containing the actual set and its length.
  178. public Ntuple createNtupleFromNtupleStr(String ntupleStrSetPlusLen)
  179. {
  180. int wordStart = ntupleStrSetPlusLen.indexOf("(") + 1;
  181. int wordStop = ntupleStrSetPlusLen.indexOf(",");
  182. String word = ntupleStrSetPlusLen.substring(wordStart,wordStop);
  183. Set<String> set = new HashSet<String>();
  184. int setStart = ntupleStrSetPlusLen.indexOf("[") + 1;
  185. int setStop = ntupleStrSetPlusLen.indexOf("]");
  186. String str = ntupleStrSetPlusLen.substring(setStart,setStop);
  187. int index;
  188. while(str.length() > 0)
  189. {
  190. index = str.indexOf(",");
  191. if(index != -1)
  192. {
  193. set.add(str.substring(0, index));
  194. str = str.substring(str.indexOf(",")+2);
  195. }
  196. else
  197. {
  198. set.add(str);
  199. str = "";
  200. }
  201. }
  202. return new Ntuple(word,set,set.size());
  203. }
  204.  
  205. // Problem 13. Read the file created in Problem 11 into
  206. // a list of strings and then recreate it as a list of
  207. // ntuples coded into the variable equivClassesSorted.
  208. public void readEquivClassesSorted(String inputFile)
  209. {
  210. ReadWriteFile rwf = new ReadWriteFile(inputFile);
  211. List<String> lines = rwf.getFileLines();
  212. for(String str : lines)
  213. {
  214. equivClassesSorted.add(createNtupleFromNtupleStr(str));
  215. }
  216. }
  217.  
  218. // Problem 14. Create getter for equivClassesSorted.
  219. public List<Ntuple> getEquivClassesSorted()
  220. {
  221. return equivClassesSorted;
  222. }
  223.  
  224. // Problem 15. Create mapWordToEquivClass map.
  225. @SuppressWarnings("unchecked")
  226. public void mapWordToEquivClass()
  227. {
  228. for (Ntuple ntuple : equivClassesSorted) {
  229. mapWordToEquivClass.put((String) ntuple.getkth(0),(Set<String>) ntuple.getkth(1));
  230. }
  231. }
  232.  
  233. // Problem 16. Create getter for equivClass using mapWordToEquivClass map.
  234. public Set<String> getEquivClass(String word)
  235. {
  236. return mapWordToEquivClass.get(word);
  237. }
  238.  
  239. // Problem 17. Create mapWordToEquivClassSize map.
  240. @SuppressWarnings("unchecked")
  241. public void mapWordToEquivClassSize()
  242. {
  243. for (Ntuple ntuple : equivClassesSorted) {
  244. mapWordToEquivClassSize.put((String)ntuple.getkth(0),(Integer)ntuple.getkth(2));
  245. }
  246. }
  247.  
  248. // Problem 18. Create getter for equivClassSize using mapWordToEquivClassSize map.
  249. public int getEquivClassSize(String word)
  250. {
  251. return mapWordToEquivClassSize.get(word);
  252. }
  253.  
  254. // Problem 19. Create getter for total number of EquivClasses.
  255. public int getNumEquivClasses()
  256. {
  257. Set<Set<String>> sets = new HashSet<Set<String>>();
  258. for (Set<String> set : mapWordToEquivClass.values()) {
  259. sets.add(set);
  260. }
  261. return sets.size();
  262. }
  263.  
  264. // Problem 20. Create getter for equivClass max size.
  265. public int getEquivClassMaxSize()
  266. {
  267. return (int)equivClassesSorted.get(0).getkth(2);
  268. }
  269.  
  270. // anagramClass methods
  271.  
  272. // Problem 21. Count number of occurrences of a
  273. // character in a string.
  274. public int occurr(char ch, String str)
  275. {
  276. int count = 0;
  277. for (int i = 0; i < str.length(); i++) {
  278. if (str.charAt(i) == (ch)) {
  279. count++;
  280. }
  281. }
  282. return count;
  283. }
  284.  
  285. // Problem 22. Create test whether or not strX is an anagram of str.
  286. // You must use: 1. occur above, 2. str.toCharArray(), 3. enhanced for loop
  287. // You should mirror the methods above in the anagram methods below
  288. public boolean isAnagram(String strX, String str)
  289. {
  290. char[] char1 = strX.toCharArray();
  291. for (char a : strX.toCharArray()) {
  292. if (occurr(a, strX) != occurr(a, str)) {
  293. return false;
  294. }
  295. }
  296. return true;
  297. }
  298.  
  299. // Problem 23. Create anagram equivalence class of a given string,
  300. // which refines the sameCharSet equivalence relation.
  301. public Set<String> anagramClass(String str)
  302. {
  303. Set<String> anagramClass = new HashSet<>();
  304. for (String word : plaintextWords) {
  305. if (isAnagram(word,str)) {
  306. anagramClass.add(word);
  307. }
  308. }
  309. return anagramClass;
  310. }
  311.  
  312. // Problem 24. Create the set of all anagram classes
  313. // created from a given set of strings. Note that this
  314. // is dependent on the list of plaintextWords created
  315. // using the current lexicon.
  316. public Set<Set<String>> anagramClasses(Set<String> set)
  317. {
  318. Set<Set<String>> anagramClasses = new HashSet<Set<String>>();
  319. for (String str : set) {
  320. anagramClasses.add(anagramClass(str));
  321. }
  322. return anagramClasses;
  323. }
  324.  
  325. // Problem 25. Create the anagram class of size at least minSize
  326. // of a random string of length strLen.
  327. public Set<String> anagramClassRndStr(int strLen, int minSize)
  328. {
  329. Random rnd = new Random();
  330. int index = 0;
  331. while (plaintextWords.get(index).length() != strLen ||
  332. anagramClass(plaintextWords.get(index)).size() < minSize) {
  333. index = rnd.nextInt(plaintextWords.size());
  334. }
  335. return anagramClass(plaintextWords.get(index));
  336. }
  337.  
  338. // Problem 26. Create a list of ntuples containing anagramClass
  339. // and anagramClass size pairs, sort the list by size, and write
  340. // it to disk. This is a computation intensive task that may
  341. // take several minutes, so we do this only once and write the
  342. // result to disk. Then we extract any information we from the
  343. // disk file.
  344. public void writeAnagramClassesSorted(String outputFile)
  345. {
  346. List<Ntuple> anagramClassesSorted = new ArrayList<Ntuple>();
  347. Set<String> anagramClass = new HashSet<String>();
  348. String word;
  349. Ntuple ntuple0 = new Ntuple();
  350. for(Ntuple ntuple : lexicon)
  351. {
  352. word = (String)ntuple.getkth(0);
  353. anagramClass = anagramClass(word);
  354. ntuple0 = new Ntuple(word,anagramClass,anagramClass.size());
  355. anagramClassesSorted.add(ntuple0);
  356. }
  357. NtupleComparator nc = new NtupleComparator(2,1,false);
  358. nc.sortNtupleList(anagramClassesSorted);
  359. ReadWriteFile rwf = new ReadWriteFile();
  360. rwf.writeNtupleOutput(anagramClassesSorted, outputFile);
  361. }
  362.  
  363. // Problem 27. Read the file created in Problem 26 into
  364. // a list of strings and then recreate it as a list of
  365. // ntuples coded into the variable anagramClassesSorted.
  366. public void readAnagramClassesSorted(String inputFile)
  367. {
  368. ReadWriteFile rwf = new ReadWriteFile(inputFile);
  369. List<String> lines = rwf.getFileLines();
  370. for(String str : lines)
  371. {
  372. anagramClassesSorted.add(createNtupleFromNtupleStr(str));
  373. }
  374. }
  375.  
  376. // Problem 28. Create getter for anagramClassesSorted.
  377. public List<Ntuple> getAnagramClassesSorted()
  378. {
  379. return anagramClassesSorted;
  380. }
  381.  
  382. // Problem 29. Create mapWordToEquivClass map.
  383. @SuppressWarnings("unchecked")
  384. public void mapWordToAnagramClass()
  385. {
  386. for (Ntuple ntuple : anagramClassesSorted) {
  387. mapWordToAnagramClass.put((String) ntuple.getkth(0),(Set<String>) ntuple.getkth(1));
  388. }
  389. }
  390.  
  391. // Problem 30. Create getter for anagramClass using mapWordToAnagramClass map.
  392. public Set<String> getAnagramClass(String word)
  393. {
  394. return mapWordToAnagramClass.get(word);
  395. }
  396.  
  397. // Problem 31. Create mapWordToAnagramClassSize map.
  398. @SuppressWarnings("unchecked")
  399. public void mapWordToAnagramClassSize()
  400. {
  401. for (Ntuple ntuple : anagramClassesSorted) {
  402. mapWordToAnagramClassSize.put((String)ntuple.getkth(0),(Integer)ntuple.getkth(2));
  403. }
  404. }
  405.  
  406. // Problem 32. Create getter for anagramClassSize using mapWordToAnagramClassSize map.
  407. public int getAnagramClassSize(String word)
  408. {
  409. return mapWordToAnagramClassSize.get(word);
  410. }
  411.  
  412. // Problem 33. Create getter for total number of AnagramClasses.
  413. public int getNumAnagramClasses()
  414. {
  415. Set<Set<String>> sets = new HashSet<Set<String>>();
  416. for (Set<String> set : mapWordToAnagramClass.values()) {
  417. sets.add(set);
  418. }
  419. return sets.size();
  420. }
  421.  
  422. // Problem 34. Create getter for equivClass max size.
  423. public int getAnagramClassMaxSize()
  424. {
  425. return (int)anagramClassesSorted.get(0).getkth(2);
  426. }
  427.  
  428. // lexicon stats
  429.  
  430. // Problem 35. Create array of alphabet character counts for a given string.
  431. public double[] charCountArray(String str)
  432. {
  433. // insert your code here
  434. }
  435.  
  436. // Problem 36. Create array of alphabet character count totals
  437. // for all strings in lexicon keySet.
  438. public double[] charCountArrayTotal()
  439. {
  440. // insert your code here
  441. }
  442.  
  443. // Problem 37. Create array of alphabet character % totals
  444. // for all strings in lexicon keySet.
  445. public double[] charPercentArray()
  446. {
  447. // insert your code here
  448. }
  449.  
  450. // Problem 38. Count (recursively) the number of occurrences of str1 in str2.
  451. public int countOccurr(String str1, String str2)
  452. {
  453. // insert your code here
  454. }
  455.  
  456. // Problem 39. Create ntuples of bigrams (pairs of characters)
  457. // sorted by percent.
  458. public List<Ntuple> bigramPercentSortedNtuples()
  459. {
  460. ArrayList<Ntuple> ntuples = new ArrayList<Ntuple>();
  461. List<String> bigrams = new ArrayList<String>();
  462.  
  463. // insert your code here
  464. }
  465.  
  466. // Problem 40. Create array of percentages of word lengths. The value at each
  467. // index i is the percentage of words of length i.
  468. public double[] wordLengthPercentArray(int maxLen)
  469. {
  470. // insert your code here
  471. }
  472.  
  473. // Problem 41. Create lexicon report.
  474. public void lexiconReport(String wordInLexicon)
  475. {
  476.  
  477. System.out.println("\nLexicon Data: Alphabet, Words, EquivClasses & AnigramClasses");
  478. System.out.println("getAlphabet() = " + getAlphabet());
  479. System.out.println("getAlphaSize() = " + getAlphaSize());
  480.  
  481.  
  482. System.out.println("getPlaintextWords() = " + getPlaintextWords().subList(0, 50));
  483. System.out.println("getPlaintextWords().size() = " + getPlaintextWords().size());
  484.  
  485. System.out.println("\ncreateNtupleFromNtupleStr = " + createNtupleFromNtupleStr("Ntuple(post,[stoops, opts, post, stop, stoop, spot, spots, tops, pots, stops, posts],11)"));
  486. System.out.println("getEquivClassesSorted().subList(0, 50) = " + getEquivClassesSorted().subList(0, 50));
  487.  
  488. System.out.println("getNumEquivClasses() = " + getNumEquivClasses());
  489. System.out.println("\nwordInLexicon = " + wordInLexicon);
  490. System.out.println("getEquivClass(wordInLexicon) = " + getEquivClass(wordInLexicon));
  491. System.out.println("getEquivClassSize(wordInLexicon) = " + getEquivClassSize(wordInLexicon));
  492. System.out.println("getEquivClassMaxSize = " + getEquivClassMaxSize());
  493.  
  494. System.out.println("\ngetAnagramClassesSorted().subList(0, 50) = " + getAnagramClassesSorted().subList(0, 50));
  495. System.out.println("getNumAnagramClasses() = " + getNumAnagramClasses());
  496. System.out.println("\nwordInLexicon = " + wordInLexicon);
  497. System.out.println("getAnagramClass(wordInLexicon) = " + getAnagramClass(wordInLexicon));
  498. System.out.println("getAnagramClassSize(wordInLexicon) = " + getAnagramClassSize(wordInLexicon));
  499. System.out.println("getAnagramClassMaxSize = " + getAnagramClassMaxSize());
  500. System.out.println("\nwordInLexicon = " + wordInLexicon);
  501. System.out.println("getEquivClass(wordInLexicon) = " + getEquivClass(wordInLexicon));
  502. System.out.println("anagramClasses(getEquivClass(wordInLexicon)) = " + anagramClasses(getEquivClass(wordInLexicon)));
  503. System.out.println("getAnagramClass(wordInLexicon) = " + getAnagramClass(wordInLexicon));
  504. // System.out.println("equivClasses(getAnagramClass(wordInLexicon)) = " + equivClasses(getAnagramClass(wordInLexicon)));
  505. /*
  506. System.out.println("\nLexicon Data: Character Counts & Percentages, Bigram Percentages and Word length Percentages");
  507. System.out.println("getAlphabet() = " + getAlphabet());
  508. System.out.println("wordInLexicon = " + wordInLexicon);
  509. System.out.println("charCountArray(wordInLexicon) = " + Arrays.toString(charCountArray(wordInLexicon)));
  510. System.out.println("charCountArrayTotal() = " + Arrays.toString(charCountArrayTotal()));
  511. System.out.println("charPercentArray() = " + Arrays.toString(charPercentArray()));
  512. List<Ntuple> bigramPercentSortedNtuples = bigramPercentSortedNtuples();
  513. System.out.println("\nbigramPercentSortedNtuples() = " + bigramPercentSortedNtuples);
  514. System.out.println("bigramPercentSortedNtuples().size() = " + bigramPercentSortedNtuples.size() +" (with nonzero percent out of " + getAlphaSize()*getAlphaSize() + " bigrams)");
  515. System.out.println("wordLengthPercentArray(27) = " + Arrays.toString(wordLengthPercentArray(27)));
  516. */
  517. }
  518.  
  519. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement