Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.util.*;
- import java.io.*;
- public class Vocabulary4 {
- public static void main(String[] q) throws FileNotFoundException {
- Scanner console = new Scanner(System.in);
- giveIntro();
- System.out.print("file #1 name? ");
- Scanner input1 = new Scanner(new File(console.nextLine())); // moby.txt
- System.out.print("file #2 name? ");
- Scanner input2 = new Scanner(new File(console.nextLine())); // hamlet.txt
- System.out.println();
- double time1 = System.currentTimeMillis();
- TreeSet<String> set1 = getWords(input1);
- TreeSet<String> set2 = getWords(input2);
- double time2 = System.currentTimeMillis();
- System.out.println("File read time=" + (time2 - time1) / 1000 + " seconds");
- time1 = System.currentTimeMillis();
- TreeSet<String> overlap = getOverlap(set1, set2);
- reportResults(set1, set2, overlap);
- time2 = System.currentTimeMillis();
- System.out.println("Process time=" + (time2 - time1) + " milliseconds");
- console.close();
- }
- // TreeSet automatically sorts the list and does not store duplicate objects
- public static TreeSet<String> getWords(Scanner input) {
- // ignore all but alphabetic characters and apostrophes
- input.useDelimiter("[^a-zA-Z']");
- // read all words and sort
- TreeSet<String> words = new TreeSet<String>();
- while (input.hasNext()) {
- String next = input.next().toLowerCase();
- words.add(next);
- }
- return words;
- }
- // Define a new TreeSet as set1 and retain only the elements it shares with set2
- public static TreeSet<String> getOverlap(TreeSet<String> set1, TreeSet<String> set2) {
- TreeSet<String> overlap = new TreeSet<String>(set1);
- overlap.retainAll(set2);
- return overlap;
- }
- // Explanation of the program via the Console
- public static void giveIntro() {
- System.out.println("This program compares the vocabulary of two");
- System.out.println("text files, reporting the number of words");
- System.out.println("in common and the percent of overlap.\n");
- }
- // Reports the percentage of overlap between the files
- public static void reportResults(TreeSet<String> set1, TreeSet<String> set2, TreeSet<String> overlap) {
- System.out.println("file #1 words = " + set1.size());
- System.out.println("file #2 words = " + set2.size());
- System.out.println("common words = " + overlap.size());
- double percent1 = 100.0 * overlap.size() / set1.size();
- double percent2 = 100.0 * overlap.size() / set2.size();
- System.out.println("% of file 1 in overlap = " + percent1);
- System.out.println("% of file 2 in overlap = " + percent2);
- }
- } // End Vocabulary4
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement