Advertisement
Guest User

Untitled

a guest
Nov 21st, 2019
118
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 8.62 KB | None | 0 0
  1. import java.io.*;
  2. import java.nio.file.Path;
  3. import java.nio.file.Paths;
  4. import java.util.*;
  5. import java.util.stream.IntStream;
  6.  
  7. public class WordCounter {
  8.  
  9. // The following are the ONLY variables we will modify for grading.
  10. // The rest of your code must run with no changes.
  11. public static final Path FOLDER_OF_TEXT_FILES = Paths.get("res"); // path to the folder where input text files are located
  12. public static final Path WORD_COUNT_TABLE_FILE = Paths.get("out1.txt"); // path to the output plain-text (.txt) file
  13. public static final int NUMBER_OF_THREADS = 1; // max. number of threads to spawn
  14.  
  15. /**
  16. * Key is for words. Value is a map of keys containing file names, value containing occurrence that in that filename.
  17. * Used as a lock
  18. */
  19. private final static Map<String, Map<String, Integer>> wordCount = new TreeMap<>();
  20.  
  21. /**
  22. * Largest word found
  23. */
  24. private static int maxWordLen = 0;
  25.  
  26. /**
  27. * Largest file name found
  28. */
  29. private static int maxFileLen = 0;
  30.  
  31. /**
  32. * Most amount of times a word was found
  33. */
  34. private static int maxOccurrence = 0;
  35.  
  36. /**
  37. * Files located in {@link WordCounter#FOLDER_OF_TEXT_FILES}
  38. */
  39. private static File[] fileNames;
  40.  
  41. /**
  42. * For debugging. If false, table is printed on stdout, else table is printed in file {@link WordCounter#WORD_COUNT_TABLE_FILE}
  43. */
  44. private static boolean toFile = true;
  45.  
  46. public static void main(String[] args) {
  47. System.out.println("Processors: " + Runtime.getRuntime().availableProcessors());
  48. WordCounter.fileNames = getFileNames();
  49. long start = System.currentTimeMillis();
  50. countWords();
  51. long end = System.currentTimeMillis();
  52. maxOccurrence = countDigits(maxOccurrence);
  53. System.out.println(wordCount);
  54. System.out.println();
  55. printTable(getPrintStream());
  56. System.out.printf("\nDone. Loading words took %dms%n", end - start);
  57. }
  58.  
  59. private static int countDigits(int num) {
  60. if (num == 0) return 0;
  61. else return 1 + countDigits(num / 10);
  62. }
  63.  
  64. /**
  65. * @return Determine what printstream we should use based off {@link WordCounter#toFile}
  66. */
  67. private static PrintStream getPrintStream() {
  68. if (!toFile) return System.out;
  69. PrintStream ps = null;
  70. try {
  71. ps = new PrintStream(WORD_COUNT_TABLE_FILE.toFile());
  72. } catch (FileNotFoundException e) {
  73. e.printStackTrace();
  74. /* Can't continue, end gracefully */
  75. System.exit(1);
  76. }
  77. return ps;
  78. }
  79.  
  80. /**
  81. * Start off the process of filling the map structure.
  82. * Creates threads as needed
  83. */
  84. private static void countWords() {
  85. int threadCount = 0;
  86. Thread[] threads = new Thread[fileNames.length];
  87. for (int i = 0; i < fileNames.length; ++i) {
  88. final File file = fileNames[i];
  89. Runnable runnable = () -> countWords0(file);
  90. threads[i] = new Thread(runnable);
  91. if (threadCount >= NUMBER_OF_THREADS - 1) {
  92. // System.out.printf("Too many threads running (%d). Running on main%n", threadCount);
  93. runnable.run();
  94. } else {
  95. // System.out.println("Running a new thread");
  96. threads[i].start();
  97. ++threadCount;
  98. /* If thread finishes, should threadCount increase? */
  99. }
  100. }
  101. /* Wait for all threads to finish */
  102. for (Thread t : threads) {
  103. try {
  104. t.join();
  105. } catch (InterruptedException e) {
  106. e.printStackTrace();
  107. }
  108. }
  109. }
  110.  
  111. /**
  112. *
  113. * @return children of {@link WordCounter#FOLDER_OF_TEXT_FILES}
  114. */
  115. private static File[] getFileNames() {
  116. File directory = FOLDER_OF_TEXT_FILES.toFile();
  117. if (!directory.isDirectory()) throw new IllegalArgumentException("Bad directory");
  118. return directory.listFiles();
  119. }
  120.  
  121. /**
  122. * Prints ' ' character to specified PrintStream
  123. * @param n Number of spaces
  124. * @param ps PrintStream to write spaces
  125. */
  126. private static void printSpaces(int n, PrintStream ps) {
  127. IntStream
  128. .range(0, n)
  129. .forEach(i -> ps.print(' '));
  130. }
  131.  
  132. /**
  133. * Prints header containing the file names
  134. * @param ps PrintStream to write header to
  135. */
  136. private static void printHeader(PrintStream ps) {
  137. printSpaces(maxWordLen + 1, ps);
  138. for (File fileName : fileNames) {
  139. ps.print(String.format(getFileNameFormatter('s'), fileName.getName()));
  140. }
  141. ps.println("total");
  142. }
  143.  
  144. /**
  145. * Returns format to insert properly space argument based off {@link WordCounter#maxFileLen}
  146. * @param flag flag for format (d, s, c, etc.)
  147. * @return String to be used for a format method call
  148. */
  149. private static String getFileNameFormatter(char flag) {
  150. int minSpacing = Math.max(maxFileLen, maxOccurrence);
  151. /* Add 4 for a little padding */
  152. return String.format("%%-%d%c", minSpacing + 4, flag);
  153. }
  154.  
  155. /**
  156. * Starts printing off table to specified PrintStream
  157. * @param ps Stream to write table to
  158. */
  159. private static void printTable(PrintStream ps) {
  160. printHeader(ps);
  161. List<String> keys = new ArrayList<>(wordCount.keySet());
  162. keys.sort(null);
  163. for (String key : keys) {
  164. ps.print(key);
  165. printSpaces(maxWordLen - key.length() + 1, ps);
  166. int total = 0;
  167. for (File fileName : fileNames) {
  168. int occurrence = wordCount.get(key).getOrDefault(fileName.getName(), 0);
  169. total += occurrence;
  170. ps.print(String.format(getFileNameFormatter('d'), occurrence));
  171. }
  172. ps.println(String.format(getFileNameFormatter('d'), total));
  173. }
  174. if (ps != System.out) ps.close();
  175. }
  176.  
  177. /**
  178. * Counts words in given file and updates the map structure
  179. * @param file File to read from
  180. */
  181. public static void countWords0(File file) {
  182. System.out.printf("%s is reading %s%n", Thread.currentThread().getName(), file.getAbsolutePath());
  183. long start = System.currentTimeMillis();
  184. // System.out.println("Counting words for " + file.getAbsolutePath());
  185. try (BufferedReader br = new BufferedReader(new FileReader(file))) {
  186. StringBuilder words = new StringBuilder();
  187. String line;
  188. while ((line = br.readLine()) != null) {
  189. words.append(line);
  190. words.append(' ');
  191. }
  192. countWords1(words.toString(), file.getName());
  193. } catch (IOException e) {
  194. System.err.println(e.getMessage());
  195. System.err.println("Skipping: " + file.getAbsolutePath());
  196. }
  197. long end = System.currentTimeMillis();
  198. System.out.printf("Loading %s took: %dms%n", file.getName(), end - start);
  199. }
  200.  
  201. /**
  202. * Given file content on one line, add each word to structure
  203. * @param line line to be parsed
  204. * @param fileName fileName for the map structure
  205. */
  206. public static void countWords1(String line, String fileName) {
  207. /* Split to avoid punctuation */
  208. String[] tokens = line.split("[.,?! ]"); /* Any others? */
  209. Arrays.stream(tokens)
  210. .map(String::trim)
  211. .filter(s -> !s.isEmpty())
  212. .map(String::toLowerCase)
  213. .forEach(s -> incrementKeyCount(s, fileName));
  214. }
  215.  
  216. /**
  217. * Thread-safe method to update the map structure as well as max values
  218. * @param key Key to be added or updated
  219. * @param fileName Filename to be added or updated
  220. */
  221. public static void incrementKeyCount(String key, String fileName) {
  222. synchronized (wordCount) {
  223. // System.out.println("Incrementing for key " + key);
  224. /* Init map for word */
  225. wordCount.computeIfAbsent(key, k -> new TreeMap<>());
  226. Map<String, Integer> fileWordCountMap = wordCount.get(key);
  227. /* Increment one for filename */
  228. fileWordCountMap.compute(fileName, (k, v) -> v == null ? 1 : v + 1);
  229. maxOccurrence = Math.max(maxOccurrence, fileWordCountMap.get(fileName));
  230. maxWordLen = Math.max(maxWordLen, key.length());
  231. maxFileLen = Math.max(maxFileLen, fileName.length());
  232. }
  233. }
  234.  
  235. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement