Advertisement
Guest User

Untitled

a guest
Oct 14th, 2019
221
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 20.15 KB | None | 0 0
  1. /********* LexiconData.java ********
  2. *
  3. * APCS Labs 2011-2020
  4. * Cryptology
  5. * Dr. John Pais
  6. * pais.john@gmail.com
  7. * Copyright (c) 2011 to present John Pais. All rights reserved.
  8. *
  9. */
  10.  
  11. package LexiconData;
  12. import java.util.*;
  13.  
  14. public class LexiconData
  15. {
  16. protected List<Character> alphabet;
  17. protected int alphaSize;
  18. // Create ntuples of individual plaintext words or (plaintext word,
  19. // known language word) pairs. Note that this will be determined
  20. // programmatically by a ReadWriteFile method (see below).
  21. private List<Ntuple> lexicon = new ArrayList<Ntuple>();
  22. private List<String> plaintextWords = new ArrayList<String>();
  23. private List<Ntuple> equivClassesSorted = new ArrayList<Ntuple>();
  24. private Map<String, Set<String>> mapWordToEquivClass = new HashMap<String, Set<String>>();
  25. private Map<String, Integer> mapWordToEquivClassSize = new HashMap<String, Integer>();
  26. private List<Ntuple> anagramClassesSorted = new ArrayList<Ntuple>();
  27. private Map<String, Set<String>> mapWordToAnagramClass = new HashMap<String, Set<String>>();
  28. private Map<String, Integer> mapWordToAnagramClassSize = new HashMap<String, Integer>();
  29.  
  30. public LexiconData(List<Character> alphabet, String dirPath, String lexicon, boolean init)
  31. {
  32. this.alphabet = alphabet;
  33. this.alphaSize = alphabet.size();
  34. readFileOfLexicon(dirPath + lexicon);
  35. createPlaintextWords();
  36. if(init)
  37. {
  38. writeEquivClassesSorted(dirPath + "equivClassesSorted.txt");
  39. writeAnagramClassesSorted(dirPath + "anagramClassesSorted.txt");
  40. }
  41. else
  42. {
  43. readEquivClassesSorted(dirPath + "equivClassesSorted.txt");
  44. // readAnagramClassesSorted(dirPath + "anagramClassesSorted.txt");
  45. }
  46. mapWordToEquivClass();
  47. mapWordToEquivClassSize();
  48. // mapWordToAnagramClass();
  49. // mapWordToAnagramClassSize();
  50. }
  51.  
  52. // Problem 1. Create getter for alphabet.
  53. public List<Character> getAlphabet()
  54. {
  55. return alphabet;
  56. }
  57.  
  58. // Problem 2. Create getter for alphabet size.
  59. public int getAlphaSize()
  60. {
  61. return alphaSize;
  62. }
  63.  
  64. // Problem 3. Read lexicon into List of ntuples of individual words
  65. // or possibly (mystery word, english word) pairs. Note that this will be
  66. // determined programmatically by the ReadWriteFile method createNtupleLines,
  67. // which reads each line (record) of strings into an ntuple and creates an
  68. // ArrayList of these ntuples.
  69. public void readFileOfLexicon(String inputFile)
  70. {
  71. ReadWriteFile rwf = new ReadWriteFile(inputFile,3);
  72. lexicon = rwf.getNtupleLines();
  73. }
  74.  
  75. // Problem 4. Create plaintextWords.
  76. public void createPlaintextWords()
  77. {
  78. for (Ntuple ntuple : lexicon) {
  79. plaintextWords.add((String)ntuple.getkth(0));
  80. }
  81. }
  82.  
  83. // Problem 5. Create getter for plaintextWords.
  84. public List<String> getPlaintextWords()
  85. {
  86. System.out.println(plaintextWords.size());
  87. return plaintextWords;
  88. }
  89.  
  90. // equivClass methods
  91.  
  92. // Problem 6. Create character set of a string.
  93. // Note that a set automatically removes duplicates.
  94. public Set<Character> charSet(String str)
  95. {
  96. Set<Character> set = new HashSet<>();
  97. for (Character c : str.toCharArray()) {
  98. set.add(c);
  99. }
  100. return set;
  101. }
  102.  
  103. // Problem 7. Create same character set test for two strings.
  104. public boolean sameCharSet(String str1, String str2)
  105. {
  106. return charSet(str1).equals(charSet(str1));
  107. }
  108.  
  109. // Problem 8. Create same character set equivalence
  110. // class of a given string, since sameCharSet is an
  111. // equivalence relation. Note that this is dependent
  112. // on the list of plaintextWords created using the
  113. // current lexicon.
  114. public Set<String> equivClass(String str)
  115. {
  116. Set<String> equivClass = new HashSet<>();
  117. for (String word : plaintextWords) {
  118. if (sameCharSet(word,str)) {
  119. equivClass.add(word);
  120. }
  121. }
  122. return equivClass;
  123. }
  124.  
  125. // Problem 9. Create the set of all equivalence classes
  126. // created from a given set of strings. Note that this
  127. // is dependent on the list of plaintextWords created
  128. // using the current lexicon.
  129. public Set<Set<String>> equivClasses(Set<String> set)
  130. {
  131. Set<Set<String>> equivClasses = new HashSet<Set<String>>();
  132. for (String str : set) {
  133. equivClasses.add(equivClass(str));
  134. }
  135. return equivClasses;
  136. }
  137.  
  138. // Problem 10. Create the equivalence class of size at least minSize
  139. // of a random string of length strLen.
  140. public Set<String> equivClassRndStr(int strLen, int minSize)
  141. {
  142. Random rnd = new Random();
  143. int index = 0;
  144. while (plaintextWords.get(index).length() != strLen ||
  145. equivClass(plaintextWords.get(index)).size() < minSize) {
  146. index = rnd.nextInt(plaintextWords.size());
  147. }
  148. return equivClass(plaintextWords.get(index));
  149. }
  150.  
  151. // Problem 11. Create a list of ntuples containing equivClass
  152. // and equivClass size pairs, sort the list by size, and write
  153. // it to disk. This is a computation intensive task that may
  154. // take several minutes, so we do this only once and write the
  155. // result to disk. Then we extract any information we from the
  156. // disk file.
  157. public void writeEquivClassesSorted(String outputFile)
  158. {
  159. List<Ntuple> equivClassesSorted = new ArrayList<Ntuple>();
  160. Set<String> equivClass = new HashSet<String>();
  161. String word;
  162. Ntuple ntuple0 = new Ntuple();
  163. for(Ntuple ntuple : lexicon)
  164. {
  165. word = (String)ntuple.getkth(0);
  166. equivClass = equivClass(word);
  167. ntuple0 = new Ntuple(word,equivClass,equivClass.size());
  168. equivClassesSorted.add(ntuple0);
  169. }
  170. NtupleComparator nc = new NtupleComparator(2,1,false);
  171. nc.sortNtupleList(equivClassesSorted);
  172. ReadWriteFile rwf = new ReadWriteFile();
  173. rwf.writeNtupleOutput(equivClassesSorted, outputFile);
  174. }
  175.  
  176. // Problem 12. Convert a string representation of an ntuple comprised
  177. // of a set of strings and the size of the set into an actual Ntuple
  178. // containing the actual set and its length.
  179. public Ntuple createNtupleFromNtupleStr(String ntupleStrSetPlusLen)
  180. {
  181. int wordStart = ntupleStrSetPlusLen.indexOf("(") + 1;
  182. int wordStop = ntupleStrSetPlusLen.indexOf(",");
  183. String word = ntupleStrSetPlusLen.substring(wordStart,wordStop);
  184. Set<String> set = new HashSet<String>();
  185. int setStart = ntupleStrSetPlusLen.indexOf("[") + 1;
  186. int setStop = ntupleStrSetPlusLen.indexOf("]");
  187. String str = ntupleStrSetPlusLen.substring(setStart,setStop);
  188. int index;
  189. while(str.length() > 0)
  190. {
  191. index = str.indexOf(",");
  192. if(index != -1)
  193. {
  194. set.add(str.substring(0, index));
  195. str = str.substring(str.indexOf(",")+2);
  196. }
  197. else
  198. {
  199. set.add(str);
  200. str = "";
  201. }
  202. }
  203. return new Ntuple(word,set,set.size());
  204. }
  205.  
  206. // Problem 13. Read the file created in Problem 11 into
  207. // a list of strings and then recreate it as a list of
  208. // ntuples coded into the variable equivClassesSorted.
  209. public void readEquivClassesSorted(String inputFile)
  210. {
  211. ReadWriteFile rwf = new ReadWriteFile(inputFile);
  212. List<String> lines = rwf.getFileLines();
  213. for(String str : lines)
  214. {
  215. equivClassesSorted.add(createNtupleFromNtupleStr(str));
  216. }
  217. }
  218.  
  219. // Problem 14. Create getter for equivClassesSorted.
  220. public List<Ntuple> getEquivClassesSorted()
  221. {
  222. return equivClassesSorted;
  223. }
  224.  
  225. // Problem 15. Create mapWordToEquivClass map.
  226. @SuppressWarnings("unchecked")
  227. public void mapWordToEquivClass()
  228. {
  229. for (Ntuple ntuple : equivClassesSorted) {
  230. mapWordToEquivClass.put((String) ntuple.getkth(0),(Set<String>) ntuple.getkth(1));
  231. }
  232. }
  233.  
  234. // Problem 16. Create getter for equivClass using mapWordToEquivClass map.
  235. public Set<String> getEquivClass(String word)
  236. {
  237. return mapWordToEquivClass.get(word);
  238. }
  239.  
  240. // Problem 17. Create mapWordToEquivClassSize map.
  241. @SuppressWarnings("unchecked")
  242. public void mapWordToEquivClassSize()
  243. {
  244. for (Ntuple ntuple : equivClassesSorted) {
  245. mapWordToEquivClassSize.put((String)ntuple.getkth(0),(Integer)ntuple.getkth(2));
  246. }
  247. }
  248.  
  249. // Problem 18. Create getter for equivClassSize using mapWordToEquivClassSize map.
  250. public int getEquivClassSize(String word)
  251. {
  252. return mapWordToEquivClassSize.get(word);
  253. }
  254.  
  255. // Problem 19. Create getter for total number of EquivClasses.
  256. public int getNumEquivClasses()
  257. {
  258. Set<Set<String>> sets = new HashSet<Set<String>>();
  259. for (Set<String> set : mapWordToEquivClass.values()) {
  260. sets.add(set);
  261. }
  262. return sets.size();
  263. }
  264.  
  265. // Problem 20. Create getter for equivClass max size.
  266. public int getEquivClassMaxSize()
  267. {
  268. return (int)equivClassesSorted.get(0).getkth(2);
  269. }
  270.  
  271. // anagramClass methods
  272.  
  273. // Problem 21. Count number of occurrences of a
  274. // character in a string.
  275. public int occurr(char ch, String str)
  276. {
  277. // insert your code here
  278. int cnt = 0;
  279. for(int i = 0; i < str.length(); i++) {
  280. if(str.charAt(i) == ch) {
  281. ++cnt;
  282. }
  283. }
  284. return cnt;
  285. }
  286.  
  287. // Problem 22. Create test whether or not strX is an anagram of str.
  288. // You must use: 1. occur above, 2. str.toCharArray(), 3. enhanced for loop
  289. // You shoul dmirror the methods above in the anagram methods below
  290. public boolean isAnagram(String strX, String str)
  291. {
  292. // insert your code here
  293. char[] a = strX.toCharArray(); char[] b = str.toCharArray();
  294. for(char c : a) {
  295. if(occurr(c, strX) != occurr(c, str)) {
  296. return false;
  297. }
  298. }
  299. return true;
  300. }
  301.  
  302. // Problem 23. Create anagram equivalence class of a given string,
  303. // which refines the sameCharSet equivalence relation.
  304. public Set<String> anagramClass(String str)
  305. {
  306. Set<String> anagramClass = new HashSet<>();
  307. for (String word : plaintextWords) {
  308. if (isAnagram(word,str)) {
  309. anagramClass.add(word);
  310. }
  311. }
  312. return anagramClass;
  313. }
  314.  
  315. // Problem 24. Create the set of all equivalence classes
  316. // created from a given set of strings. Note that this
  317. // is dependent on the list of plaintextWords created
  318. // using the current lexicon.
  319. public Set<Set<String>> anagramClasses(Set<String> set)
  320. {
  321. Set<Set<String>> anagramClasses = new HashSet<Set<String>>();
  322. for (String str : set) {
  323. anagramClasses.add(anagramClass(str));
  324. }
  325. return anagramClasses;
  326. }
  327.  
  328. // Problem 25. Create the anagram class of size at least minSize
  329. // of a random string of length strLen.
  330. public Set<String> anagramClassRndStr(int strLen, int minSize)
  331. {
  332. // insert your code here
  333. Random rnd = new Random();
  334. int index = 0;
  335. while (plaintextWords.get(index).length() != strLen ||
  336. anagramClass(plaintextWords.get(index)).size() < minSize) {
  337. index = rnd.nextInt(plaintextWords.size());
  338. }
  339. return anagramClass(plaintextWords.get(index));
  340. }
  341.  
  342. // Problem 26. Create a list of ntuples containing anagramClass
  343. // and anagramClass size pairs, sort the list by size, and write
  344. // it to disk. This is a computation intensive task that may
  345. // take several minutes, so we do this only once and write the
  346. // result to disk. Then we extract any information we from the
  347. // disk file.
  348. public void writeAnagramClassesSorted(String outputFile)
  349. {
  350. List<Ntuple> anagramClassesSorted = new ArrayList<Ntuple>();
  351. Set<String> anagramClass = new HashSet<String>();
  352. String word;
  353. Ntuple ntuple0 = new Ntuple();
  354. for(Ntuple ntuple : lexicon)
  355. {
  356. word = (String)ntuple.getkth(0);
  357. anagramClass = anagramClass(word);
  358. ntuple0 = new Ntuple(word,anagramClass,anagramClass.size());
  359. anagramClassesSorted.add(ntuple0);
  360. }
  361. NtupleComparator nc = new NtupleComparator(2,1,false);
  362. nc.sortNtupleList(anagramClassesSorted);
  363. ReadWriteFile rwf = new ReadWriteFile();
  364. rwf.writeNtupleOutput(anagramClassesSorted, outputFile);
  365. }
  366.  
  367. // Problem 27. Read the file created in Problem 26 into
  368. // a list of strings and then recreate it as a list of
  369. // ntuples coded into the variable anagramClassesSorted.
  370. public void readAnagramClassesSorted(String inputFile)
  371. {
  372. ReadWriteFile rwf = new ReadWriteFile(inputFile);
  373. List<String> lines = rwf.getFileLines();
  374. for(String str : lines)
  375. {
  376. anagramClassesSorted.add(createNtupleFromNtupleStr(str));
  377. }
  378. }
  379.  
  380. // Problem 28. Create getter for anagramClassesSorted.
  381. public List<Ntuple> getAnagramClassesSorted()
  382. {
  383. // insert your code here
  384. return anagramClassesSorted;
  385. }
  386.  
  387. // Problem 29. Create mapWordToEquivClass map.
  388. @SuppressWarnings("unchecked")
  389. public void mapWordToAnagramClass()
  390. {
  391. // insert your code here
  392. for (Ntuple ntuple : anagramClassesSorted) {
  393. mapWordToEquivClass.put((String) ntuple.getkth(0),(Set<String>) ntuple.getkth(1));
  394. }
  395. }
  396.  
  397. // Problem 30. Create getter for anagramClass using mapWordToAnagramClass map.
  398. public Set<String> getAnagramClass(String word)
  399. {
  400. // insert your code here
  401. return mapWordToAnagramClass.get(word);
  402. }
  403.  
  404. // Problem 31. Create mapWordToAnagramClassSize map.
  405. @SuppressWarnings("unchecked")
  406. public void mapWordToAnagramClassSize()
  407. {
  408. for (Ntuple ntuple : equivClassesSorted) {
  409. mapWordToAnagramClassSize.put((String)ntuple.getkth(0),(Integer)ntuple.getkth(2));
  410. }
  411. }
  412.  
  413. // Problem 32. Create getter for anagramClassSize using mapWordToAnagramClassSize map.
  414. public int getAnagramClassSize(String word)
  415. {
  416. // insert your code here
  417. return mapWordToAnagramClassSize.get(word);
  418. }
  419. // Problem 19. Create getter for total number of EquivClasses.
  420.  
  421.  
  422.  
  423. // Problem 33. Create getter for total number of AnagramClasses.
  424. public int getNumAnagramClasses()
  425. {
  426. Set<Set<String>> sets = new HashSet<Set<String>>();
  427. for (Set<String> set : mapWordToAnagramClass.values()) {
  428. sets.add(set);
  429. }
  430. return sets.size();
  431. }
  432.  
  433. // Problem 34. Create getter for equivClass max size.
  434. public int getAnagramClassMaxSize()
  435. {
  436. return (int)anagramClassesSorted.get(0).getkth(2); }
  437.  
  438. // lexicon stats
  439.  
  440. // Problem 35. Create array of alphabet character counts for a given string.
  441. public double[] charCountArray(String str)
  442. {
  443. // insert your code here
  444. }
  445.  
  446. // Problem 36. Create array of alphabet character count totals
  447. // for all strings in lexicon keySet.
  448. public double[] charCountArrayTotal()
  449. {
  450. // insert your code here
  451. }
  452.  
  453. // Problem 37. Create array of alphabet character % totals
  454. // for all strings in lexicon keySet.
  455. public double[] charPercentArray()
  456. {
  457. // insert your code here
  458. }
  459.  
  460. // Problem 38. Count (recursively) the number of occurrences of str1 in str2.
  461. public int countOccurr(String str1, String str2)
  462. {
  463. // insert your code here
  464. }
  465.  
  466. // Problem 39. Create ntuples of bigrams (pairs of characters)
  467. // sorted by percent.
  468. public List<Ntuple> bigramPercentSortedNtuples()
  469. {
  470. ArrayList<Ntuple> ntuples = new ArrayList<Ntuple>();
  471. List<String> bigrams = new ArrayList<String>();
  472.  
  473. // insert your code here
  474. }
  475.  
  476. // Problem 40. Create array of percentages of word lengths. The value at each
  477. // index i is the percentage of words of length i.
  478. public double[] wordLengthPercentArray(int maxLen)
  479. {
  480. // insert your code here
  481. }
  482.  
  483. // Problem 41. Create lexicon report.
  484. public void lexiconReport(String wordInLexicon)
  485. {
  486.  
  487. System.out.println("\nLexicon Data: Alphabet, Words, EquivClasses & AnigramClasses");
  488. System.out.println("getAlphabet() = " + getAlphabet());
  489. System.out.println("getAlphaSize() = " + getAlphaSize());
  490.  
  491.  
  492. System.out.println("getPlaintextWords() = " + getPlaintextWords().subList(0, 50));
  493. System.out.println("getPlaintextWords().size() = " + getPlaintextWords().size());
  494.  
  495. System.out.println("\ncreateNtupleFromNtupleStr = " + createNtupleFromNtupleStr("Ntuple(post,[stoops, opts, post, stop, stoop, spot, spots, tops, pots, stops, posts],11)"));
  496. System.out.println("getEquivClassesSorted().subList(0, 50) = " + getEquivClassesSorted().subList(0, 50));
  497.  
  498. System.out.println("getNumEquivClasses() = " + getNumEquivClasses());
  499. System.out.println("\nwordInLexicon = " + wordInLexicon);
  500. System.out.println("getEquivClass(wordInLexicon) = " + getEquivClass(wordInLexicon));
  501. System.out.println("getEquivClassSize(wordInLexicon) = " + getEquivClassSize(wordInLexicon));
  502. System.out.println("getEquivClassMaxSize = " + getEquivClassMaxSize());
  503.  
  504. System.out.println("\ngetAnagramClassesSorted().subList(0, 50) = " + getAnagramClassesSorted().subList(0, 50));
  505. System.out.println("getNumAnagramClasses() = " + getNumAnagramClasses());
  506. System.out.println("\nwordInLexicon = " + wordInLexicon);
  507. System.out.println("getAnagramClass(wordInLexicon) = " + getAnagramClass(wordInLexicon));
  508. System.out.println("getAnagramClassSize(wordInLexicon) = " + getAnagramClassSize(wordInLexicon));
  509. System.out.println("getAnagramClassMaxSize = " + getAnagramClassMaxSize());
  510. System.out.println("\nwordInLexicon = " + wordInLexicon);
  511. System.out.println("getEquivClass(wordInLexicon) = " + getEquivClass(wordInLexicon));
  512. System.out.println("anagramClasses(getEquivClass(wordInLexicon)) = " + anagramClasses(getEquivClass(wordInLexicon)));
  513. System.out.println("getAnagramClass(wordInLexicon) = " + getAnagramClass(wordInLexicon));
  514. System.out.println("equivClasses(getAnagramClass(wordInLexicon)) = " + equivClasses(getAnagramClass(wordInLexicon)));
  515. /*
  516. System.out.println("\nLexicon Data: Character Counts & Percentages, Bigram Percentages and Word length Percentages");
  517. System.out.println("getAlphabet() = " + getAlphabet());
  518. System.out.println("wordInLexicon = " + wordInLexicon);
  519. System.out.println("charCountArray(wordInLexicon) = " + Arrays.toString(charCountArray(wordInLexicon)));
  520. System.out.println("charCountArrayTotal() = " + Arrays.toString(charCountArrayTotal()));
  521. System.out.println("charPercentArray() = " + Arrays.toString(charPercentArray()));
  522. List<Ntuple> bigramPercentSortedNtuples = bigramPercentSortedNtuples();
  523. System.out.println("\nbigramPercentSortedNtuples() = " + bigramPercentSortedNtuples);
  524. System.out.println("bigramPercentSortedNtuples().size() = " + bigramPercentSortedNtuples.size() +" (with nonzero percent out of " + getAlphaSize()*getAlphaSize() + " bigrams)");
  525. System.out.println("wordLengthPercentArray(27) = " + Arrays.toString(wordLengthPercentArray(27)));
  526. */
  527. }
  528.  
  529. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement