Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package cecs274;
- import java.util.*;
- import java.util.Scanner;
- import java.io.File;
- import java.io.FileNotFoundException;
- import java.util.ArrayList;
- import java.lang.String;
- /**
- * This class finds the top 40 most used words said by a particular speaker in a
- * political debate. This list of words depends on the speaker and the debate
- * given.
- *
- * @author Matthew Chung matthew.chung01@student.csulb.edu
- * @author Leo Rauthause leo.rauthause@student.csulb.edu
- */
- public class DebateWordCloud {
- private String candidate;
- private static ArrayList<String> stopWords = new ArrayList<>();
- private static ArrayList<String> debateWords = new ArrayList<>();
- private static ArrayList<WordFrequency> words = new ArrayList<>();
- private static ArrayList<WordFrequency> topDebateWords = new ArrayList<>();
- private static int maxFrequency;
- private static int minFrequency;
- public DebateWordCloud(String candidate) {
- this.candidate = candidate;
- }
- public static void main(String args[]) throws FileNotFoundException {
- System.out.println("Enter the following in the terminal as shown: (Debate File) (Candidate) (Stop Words File)");
- try {
- File stopWordsFile = new File(args[2]);
- Scanner stopWordsScanner = new Scanner(stopWordsFile);
- while (stopWordsScanner.hasNextLine()) {
- String line = stopWordsScanner.nextLine();
- stopWords.add(line.toLowerCase());
- }
- stopWordsScanner.close();
- File debateFile = new File(args[0]);
- Scanner debateFileScanner1 = new Scanner(debateFile);
- while (debateFileScanner1.hasNext()) {
- String word = debateFileScanner1.next();
- word.replaceAll("[^a-zA-Z0-9_-]", "");
- debateWords.add(word);
- }
- debateFileScanner1.close();
- getCandidateWords(args[1]);
- removeStopWords();
- Collections.sort(words);
- topWords();
- System.out.print(topDebateWords);
- } catch (FileNotFoundException fnfe) {
- System.out.println("File not Found");
- }
- }
- /**
- * Removes the stop words from a given stop words txt file from the words
- * ArrayList Source:
- * https://stackoverflow.com/questions/27685839/removing-stopwords-from-a-string-in-java
- */
- public static void removeStopWords() {
- for (int x = 0; x < words.size(); x++) {
- if (stopWords.contains(words.get(x).getWord().toLowerCase())) {
- words.remove(x);
- x--;
- }
- }
- }
- /**
- * Retrieves the words from a specific candidate and adds them to the debate
- * ArrayList Received help from CS Tutor
- *
- * @param speaker the candidate that's words we are retreiving
- */
- public static void getCandidateWords(String speaker) {
- String candidate = speaker.toUpperCase() + ":";
- Boolean isCandidateWords = false;
- for (int x = 0; x < debateWords.size(); x++) {
- String word = debateWords.get(x);
- if (word.equals(candidate)) {
- isCandidateWords = true;
- } else if (!(word.equals(candidate)) && word.contains(":")) {
- isCandidateWords = false;
- }
- getCandidateWordsExtended(word, candidate, isCandidateWords);
- // word spoken by speaker
- }
- }
- /**
- * Gets words spoken by specific candidate and adds them to words ArrayList
- * Received help from CS Tutor
- *
- * @param word Word being checked if it was spoken by candidate
- * @param candidate candidate who's words are being assembled
- * @param isCandidateWords is true if the words belong to the candidate. False
- * otherwise.
- */
- public static void getCandidateWordsExtended(String word, String candidate, Boolean isCandidateWords) {
- Boolean wordFound = false;
- if (isCandidateWords && !(word.contains(":")) && !(word.equals(candidate))) {
- for (int x = 0; x < words.size(); x++) {
- if (words.get(x).getWord().equals(word.toLowerCase())) {
- wordFound = true;
- }
- }
- if (!wordFound) {
- WordFrequency candidateWord = new WordFrequency(word.toLowerCase());
- words.add(candidateWord);
- } else {
- for (int y = 0; y < words.size(); y++) {
- if (words.get(y).getWord().equals(word.toLowerCase())) {
- words.get(y).incrementFrequency();
- }
- }
- }
- }
- }
- /**
- * Finds the top 40 words in the words ArrayList
- */
- public static void topWords() {
- Collections.sort(words);
- for (int x = words.size() - 1; x > words.size() - 41; x--) {
- topDebateWords.add(words.get(x));
- }
- maxFrequency = topDebateWords.get(0).getFrequency();
- minFrequency = topDebateWords.get(topDebateWords.size() - 1).getFrequency();
- Collections.shuffle(topDebateWords);
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement