Advertisement
Guest User

Untitled

a guest
Jan 23rd, 2018
60
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 2.55 KB | None | 0 0
  1. import java.util.*;
  2. import java.io.*;
  3.  
  4. public class Vocabulary4 {
  5.     public static void main(String[] q) throws FileNotFoundException {
  6.         Scanner console = new Scanner(System.in);
  7.         giveIntro();
  8.  
  9.         System.out.print("file #1 name? ");
  10.         Scanner input1 = new Scanner(new File(console.nextLine())); // moby.txt
  11.         System.out.print("file #2 name? ");
  12.         Scanner input2 = new Scanner(new File(console.nextLine())); // hamlet.txt
  13.         System.out.println();
  14.  
  15.         double time1 = System.currentTimeMillis();
  16.         TreeSet<String> set1 = getWords(input1);
  17.         TreeSet<String> set2 = getWords(input2);
  18.         double time2 = System.currentTimeMillis();
  19.         System.out.println("File read time=" + (time2 - time1) / 1000 + " seconds");
  20.  
  21.         time1 = System.currentTimeMillis();
  22.         TreeSet<String> overlap = getOverlap(set1, set2);
  23.         reportResults(set1, set2, overlap);
  24.         time2 = System.currentTimeMillis();
  25.         System.out.println("Process time=" + (time2 - time1) + " milliseconds");
  26.        
  27.         console.close();
  28.     }
  29.  
  30.     // TreeSet automatically sorts the list and does not store duplicate objects
  31.     public static TreeSet<String> getWords(Scanner input) {
  32.         // ignore all but alphabetic characters and apostrophes
  33.         input.useDelimiter("[^a-zA-Z']");
  34.         // read all words and sort
  35.         TreeSet<String> words = new TreeSet<String>();
  36.         while (input.hasNext()) {
  37.             String next = input.next().toLowerCase();
  38.             words.add(next);
  39.         }
  40.         return words;
  41.     }
  42.  
  43.     // Define a new TreeSet as set1 and retain only the elements it shares with set2
  44.     public static TreeSet<String> getOverlap(TreeSet<String> set1, TreeSet<String> set2) {
  45.         TreeSet<String> overlap = new TreeSet<String>(set1);
  46.         overlap.retainAll(set2);
  47.         return overlap;
  48.     }
  49.  
  50.     // Explanation of the program via the Console
  51.     public static void giveIntro() {
  52.         System.out.println("This program compares the vocabulary of two");
  53.         System.out.println("text files, reporting the number of words");
  54.         System.out.println("in common and the percent of overlap.\n");
  55.     }
  56.  
  57.     // Reports the percentage of overlap between the files
  58.     public static void reportResults(TreeSet<String> set1, TreeSet<String> set2, TreeSet<String> overlap) {
  59.         System.out.println("file #1 words = " + set1.size());
  60.         System.out.println("file #2 words = " + set2.size());
  61.         System.out.println("common words  = " + overlap.size());
  62.  
  63.         double percent1 = 100.0 * overlap.size() / set1.size();
  64.         double percent2 = 100.0 * overlap.size() / set2.size();
  65.         System.out.println("% of file 1 in overlap = " + percent1);
  66.         System.out.println("% of file 2 in overlap = " + percent2);
  67.     }
  68. } // End Vocabulary4
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement