Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.io.*;
- import java.nio.file.Path;
- import java.nio.file.Paths;
- import java.util.*;
- import java.util.stream.IntStream;
- public class WordCounter {
- // The following are the ONLY variables we will modify for grading.
- // The rest of your code must run with no changes.
- public static final Path FOLDER_OF_TEXT_FILES = Paths.get("res"); // path to the folder where input text files are located
- public static final Path WORD_COUNT_TABLE_FILE = Paths.get("out1.txt"); // path to the output plain-text (.txt) file
- public static final int NUMBER_OF_THREADS = 1; // max. number of threads to spawn
- /**
- * Key is for words. Value is a map of keys containing file names, value containing occurrence that in that filename.
- * Used as a lock
- */
- private final static Map<String, Map<String, Integer>> wordCount = new TreeMap<>();
- /**
- * Largest word found
- */
- private static int maxWordLen = 0;
- /**
- * Largest file name found
- */
- private static int maxFileLen = 0;
- /**
- * Most amount of times a word was found
- */
- private static int maxOccurrence = 0;
- /**
- * Files located in {@link WordCounter#FOLDER_OF_TEXT_FILES}
- */
- private static File[] fileNames;
- /**
- * For debugging. If false, table is printed on stdout, else table is printed in file {@link WordCounter#WORD_COUNT_TABLE_FILE}
- */
- private static boolean toFile = true;
- public static void main(String[] args) {
- System.out.println("Processors: " + Runtime.getRuntime().availableProcessors());
- WordCounter.fileNames = getFileNames();
- long start = System.currentTimeMillis();
- countWords();
- long end = System.currentTimeMillis();
- maxOccurrence = countDigits(maxOccurrence);
- System.out.println(wordCount);
- System.out.println();
- printTable(getPrintStream());
- System.out.printf("\nDone. Loading words took %dms%n", end - start);
- }
- private static int countDigits(int num) {
- if (num == 0) return 0;
- else return 1 + countDigits(num / 10);
- }
- /**
- * @return Determine what printstream we should use based off {@link WordCounter#toFile}
- */
- private static PrintStream getPrintStream() {
- if (!toFile) return System.out;
- PrintStream ps = null;
- try {
- ps = new PrintStream(WORD_COUNT_TABLE_FILE.toFile());
- } catch (FileNotFoundException e) {
- e.printStackTrace();
- /* Can't continue, end gracefully */
- System.exit(1);
- }
- return ps;
- }
- /**
- * Start off the process of filling the map structure.
- * Creates threads as needed
- */
- private static void countWords() {
- int threadCount = 0;
- Thread[] threads = new Thread[fileNames.length];
- for (int i = 0; i < fileNames.length; ++i) {
- final File file = fileNames[i];
- Runnable runnable = () -> countWords0(file);
- threads[i] = new Thread(runnable);
- if (threadCount >= NUMBER_OF_THREADS - 1) {
- // System.out.printf("Too many threads running (%d). Running on main%n", threadCount);
- runnable.run();
- } else {
- // System.out.println("Running a new thread");
- threads[i].start();
- ++threadCount;
- /* If thread finishes, should threadCount increase? */
- }
- }
- /* Wait for all threads to finish */
- for (Thread t : threads) {
- try {
- t.join();
- } catch (InterruptedException e) {
- e.printStackTrace();
- }
- }
- }
- /**
- *
- * @return children of {@link WordCounter#FOLDER_OF_TEXT_FILES}
- */
- private static File[] getFileNames() {
- File directory = FOLDER_OF_TEXT_FILES.toFile();
- if (!directory.isDirectory()) throw new IllegalArgumentException("Bad directory");
- return directory.listFiles();
- }
- /**
- * Prints ' ' character to specified PrintStream
- * @param n Number of spaces
- * @param ps PrintStream to write spaces
- */
- private static void printSpaces(int n, PrintStream ps) {
- IntStream
- .range(0, n)
- .forEach(i -> ps.print(' '));
- }
- /**
- * Prints header containing the file names
- * @param ps PrintStream to write header to
- */
- private static void printHeader(PrintStream ps) {
- printSpaces(maxWordLen + 1, ps);
- for (File fileName : fileNames) {
- ps.print(String.format(getFileNameFormatter('s'), fileName.getName()));
- }
- ps.println("total");
- }
- /**
- * Returns format to insert properly space argument based off {@link WordCounter#maxFileLen}
- * @param flag flag for format (d, s, c, etc.)
- * @return String to be used for a format method call
- */
- private static String getFileNameFormatter(char flag) {
- int minSpacing = Math.max(maxFileLen, maxOccurrence);
- /* Add 4 for a little padding */
- return String.format("%%-%d%c", minSpacing + 4, flag);
- }
- /**
- * Starts printing off table to specified PrintStream
- * @param ps Stream to write table to
- */
- private static void printTable(PrintStream ps) {
- printHeader(ps);
- List<String> keys = new ArrayList<>(wordCount.keySet());
- keys.sort(null);
- for (String key : keys) {
- ps.print(key);
- printSpaces(maxWordLen - key.length() + 1, ps);
- int total = 0;
- for (File fileName : fileNames) {
- int occurrence = wordCount.get(key).getOrDefault(fileName.getName(), 0);
- total += occurrence;
- ps.print(String.format(getFileNameFormatter('d'), occurrence));
- }
- ps.println(String.format(getFileNameFormatter('d'), total));
- }
- if (ps != System.out) ps.close();
- }
- /**
- * Counts words in given file and updates the map structure
- * @param file File to read from
- */
- public static void countWords0(File file) {
- System.out.printf("%s is reading %s%n", Thread.currentThread().getName(), file.getAbsolutePath());
- long start = System.currentTimeMillis();
- // System.out.println("Counting words for " + file.getAbsolutePath());
- try (BufferedReader br = new BufferedReader(new FileReader(file))) {
- StringBuilder words = new StringBuilder();
- String line;
- while ((line = br.readLine()) != null) {
- words.append(line);
- words.append(' ');
- }
- countWords1(words.toString(), file.getName());
- } catch (IOException e) {
- System.err.println(e.getMessage());
- System.err.println("Skipping: " + file.getAbsolutePath());
- }
- long end = System.currentTimeMillis();
- System.out.printf("Loading %s took: %dms%n", file.getName(), end - start);
- }
- /**
- * Given file content on one line, add each word to structure
- * @param line line to be parsed
- * @param fileName fileName for the map structure
- */
- public static void countWords1(String line, String fileName) {
- /* Split to avoid punctuation */
- String[] tokens = line.split("[.,?! ]"); /* Any others? */
- Arrays.stream(tokens)
- .map(String::trim)
- .filter(s -> !s.isEmpty())
- .map(String::toLowerCase)
- .forEach(s -> incrementKeyCount(s, fileName));
- }
- /**
- * Thread-safe method to update the map structure as well as max values
- * @param key Key to be added or updated
- * @param fileName Filename to be added or updated
- */
- public static void incrementKeyCount(String key, String fileName) {
- synchronized (wordCount) {
- // System.out.println("Incrementing for key " + key);
- /* Init map for word */
- wordCount.computeIfAbsent(key, k -> new TreeMap<>());
- Map<String, Integer> fileWordCountMap = wordCount.get(key);
- /* Increment one for filename */
- fileWordCountMap.compute(fileName, (k, v) -> v == null ? 1 : v + 1);
- maxOccurrence = Math.max(maxOccurrence, fileWordCountMap.get(fileName));
- maxWordLen = Math.max(maxWordLen, key.length());
- maxFileLen = Math.max(maxFileLen, fileName.length());
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement