Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- //////////////////////// TOP OF FILE COMMENT BLOCK ////////////////////////////
- //
- // Title: Create Word Sets
- // Course: CS 200, Fall 2019
- //
- // Author: Sydney Clark
- // Email: slclark4@wisc.edu
- // Lecturer's Name: Marc Ranault
- //
- ///////////////////////////////// CITATIONS ////////////////////////////////////
- //
- // N/A
- /////////////////////////////// 80 COLUMNS WIDE ////////////////////////////////
- import java.io.File;
- import java.io.FileNotFoundException;
- import java.io.PrintWriter;
- import java.util.ArrayList;
- import java.util.Collections;
- import java.util.Scanner;
- public class CreateWordSets {
- /**
- * Returns whether there is a vowel in the word.
- * Vowels are the lower case letters: aeiouy
- *
- * @param word The word to check letters.
- * @return true if at least one letter in word is a vowel,
- * false otherwise.
- */
- public static boolean hasVowel(String word) {
- boolean vowel = false;
- //word = word.toLowerCase();
- for(int i=0; i<word.length();i++) {
- if(word.charAt(i)== 'a'||word.charAt(i)== 'e'||word.charAt(i)== 'i'||word.charAt(i)== 'o'||word.charAt(i)== 'u'||word.charAt(i)== 'y') {
- vowel=true;
- }
- }
- return vowel;
- }
- /**
- * Returns whether the word parameter has only lower case letters between
- * 'a' and 'z', inclusive.
- *
- * @param word The word to check for letters.
- * @return true if the word only contains letters, false otherwise.
- */
- public static boolean onlyLetters(String word) {
- boolean letters = true;
- for(int i=0; i<word.length();i++) {
- if(Character.isDigit(word.charAt(i))||Character.isUpperCase(word.charAt(i))||!Character.isLetter(word.charAt(i))) {
- letters=false;
- }
- }
- return letters;
- }
- /**
- * Opens and reads a dictionary file returning a list of words.
- * Each word is checked to see if it contains a vowel (hasVowel) and is
- * composed only of letters (onlyLetters). If not the word
- * is discarded.
- *
- * If there is an error reading the file, such as the file cannot be found,
- * then the following message is shown:
- * Error: Unable to read file <dictionaryFilename>
- * with <dictionaryFilename> replaced with the parameter value.
- *
- * @param dictionaryFilename The dictionary file to read.
- * @return An ArrayList of words.
- */
- public static ArrayList<String> readDictionary(String dictionaryFilename) {
- Scanner scnr = null;
- ArrayList<String> wordList = new ArrayList<String>();
- try {
- File dictionary = new File(dictionaryFilename);
- scnr = new Scanner(dictionary);
- while(scnr.hasNextLine()) {
- String word= scnr.nextLine();
- if(hasVowel(word)&& onlyLetters(word)) {
- wordList.add(word);
- }
- }
- }catch(FileNotFoundException e) {
- System.out.println("Error: Unable to read file " + dictionaryFilename);
- }
- finally {
- if(scnr!=null) {
- scnr.close();
- }
- }
- return wordList;
- }
- /**
- * This removes all the words from the dictionary that are outside the
- * length parameters.
- *
- * @param dictionary The dictionary to filter.
- * @param minLength The minimum length of words in the dictionary.
- * @param maxLength The maximum length of words in the dictionary.
- */
- public static void lengthFilter( ArrayList<String> dictionary, int minLength, int maxLength) {
- ArrayList<String> newDictionary = new ArrayList<String>();
- for(int i=0;i<dictionary.size();i++) {
- String word = dictionary.get(i);
- if(word.length()>=minLength && word.length()<=maxLength) {
- newDictionary.add(word);
- }
- }
- dictionary.clear();
- dictionary.addAll(newDictionary);
- return;
- }
- /**
- * Returns an array with the count of each letter in the word. This only
- * check for the letters from lower case 'a' to 'z' and the 0th index
- * in the returned array corresponds to the count of 'a' in the word.
- *
- * @param word The word to count its letters.
- * @return The array of counts of each letter in the word.
- */
- public static int [] letterCounts(String word) {
- int[]letterCount = new int[26];
- for(int i=0; i<26; i++) {
- for(int j=0; j<word.length(); j++) {
- if(word.charAt(j)== (char)(97+i)) {
- letterCount[i]= letterCount[i]+1;
- }
- }
- }
- return letterCount; //TODO
- }
- /**
- * This checks that the check word has no more of any letter than
- * the keyword. The counts parameters were created with the letterCounts method.
- *
- * @param keywordLetterCounts The counts of each letter for the keyword.
- * @param checkWordLetterCounts The counts of each letter for the check word.
- * @return
- */
- public static boolean sameLetters(int[]keywordLetterCounts, int[] checkWordLetterCounts) {
- boolean sameLetter=true;
- for(int i=0; i<checkWordLetterCounts.length;i++) {
- if(keywordLetterCounts[i]<checkWordLetterCounts[i]) {
- sameLetter=false;
- }
- }
- return sameLetter;
- }
- /**
- * For each word in the dictionary that has length keywordLength this creates
- * sets of words that are made of the same or subset of the letters.
- *
- * Algorithm:
- * For each word in the dictionary having a length equal to the keywordLength
- * Create a new word set.
- * Count the number of each letter in the keyword (letterCounts)
- * For each word in the dictionary that has the same or a subset of the
- * same letters (sameLetters) then add that word to the word set.
- * If the word set has at least minWordSetSize words
- * Sort the word set (If wordSet is an ArrayList of String then
- * use java.util.Collections.sort(wordSet)).
- * Write the keyword to the file as the first word on the line,
- * followed by a : and space.
- * Then write each word of the word set to the same line of the
- * file with a space separating each word.
- *
- * If there is an error writing the file then the following message is shown:
- * Error: Unable to write file <wordSetFilename>
- * with <wordSetFilename> replaced with the parameter value.
- *
- * File Example:
- * shall: all ash hall has las shall
- * award: ada award draw raw war ward
- *
- * The first words, shall and award, are the keywords, one word that uses
- * all the letters. All the other words are made up of a subset (or all) of
- * the letters. Also see the wordSets.txt file.
- *
- * @param dictionary The dictionary of words.
- * @param wordSetFilename The name of the file to write the word sets
- * @param keywordLength length of the keyword
- * @param minWordSetSize The minimum number of words necessary in order to
- * save the word set.
- */
- public static void createSets(ArrayList<String> dictionary,
- String wordSetFilename, int keywordLength, int minWordSetSize) {
- try {
- PrintWriter newFileContent = new PrintWriter(wordSetFilename);
- ArrayList<String> wordsForFile = new ArrayList<String>();
- for(int i=0; i<dictionary.size(); i++) {
- if(keywordLength==dictionary.get(i).length()) {
- for(int j=0; j<dictionary.size();j++) {
- if( sameLetters(letterCounts(dictionary.get(i)), letterCounts(dictionary.get(j)))) {
- wordsForFile.add(dictionary.get(j));
- }
- }
- if(wordsForFile.size()>= minWordSetSize) {
- java.util.Collections.sort(wordsForFile);
- newFileContent.print(dictionary.get(i) + ": ");
- for(int k=0; k<wordsForFile.size(); k++) {
- newFileContent.print(wordsForFile.get(k) + " ");
- }
- System.out.println("");
- }
- }
- }
- newFileContent.close();
- } catch(FileNotFoundException e) {
- System.out.println("Error: Unable to write file " + wordSetFilename);
- }
- return; //TODO
- }
- /**
- * This reads in a dictionary and filters the dictionary to contain only
- * words from 3 to 5 letters long. Then sets of words to be used in the
- * WordDetective program are saved to a file.
- *
- * The source dictionary, google10000english.txt, is from google:
- * https://github.com/first20hours/google-10000-english/blob/master/google-10000-english-no-swears.txt
- *
- * @param args unused.
- */
- public static void main(String[] args) {
- ArrayList<String> dictionary = readDictionary("google10000english.txt");
- lengthFilter(dictionary, 3, 5);
- createSets(dictionary, "wordSets.txt", 5, 6);
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement