Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.io.*;
- import java.util.*;
- public class WordStatCount {
- public static boolean isLetterOrDashOrApostrophe(char sym) {
- return Character.isLetter(sym) || Character.getType(sym) == Character.DASH_PUNCTUATION || sym == '\'';
- }
- public static void main(String[] files) {
- try {
- Map<String, Integer> words = new LinkedHashMap<>();
- BufferedReader in = null;
- try {
- in = new BufferedReader(
- new InputStreamReader(
- new FileInputStream(new File(files[0])),
- "UTF-8"
- )
- );
- int textSize = 0;
- for (String line = in.readLine(); line != null; line = in.readLine()) {
- for (int begin = 0; begin < line.length(); ++begin) {
- int end = begin;
- while (end < line.length() && isLetterOrDashOrApostrophe(line.charAt(end))) {
- ++end;
- }
- if (begin != end) {
- String word = line.substring(begin, end).toLowerCase();
- words.put(word, words.getOrDefault(word, 0) + 1);
- begin = end;
- }
- }
- }
- } finally {
- if (in != null)
- in.close();
- }
- BufferedWriter out = null;
- try {
- out = new BufferedWriter(
- new OutputStreamWriter(
- new FileOutputStream(new File(files[1])),
- "UTF-8"
- )
- );
- int[][] sortedByCount = new int[words.size()][2];
- String[] text = new String[words.size()];
- int current = 0;
- for (Map.Entry<String, Integer> orderedWord : words.entrySet()) {
- text[current] = orderedWord.getKey();
- sortedByCount[current][0] = orderedWord.getValue();
- sortedByCount[current][1] = current;
- ++current;
- }
- Arrays.sort(sortedByCount, new Comparator<int[]>() {
- @Override
- public int compare(int[] o1, int[] o2) {
- return o1[0] - o2[0];
- }
- });
- for (int[] i : sortedByCount) {
- out.write(text[i[1]]);
- out.write(" " + i[0]);
- out.newLine();
- }
- } finally {
- if (out != null)
- out.close();
- }
- } catch (IOException e) {
- System.out.println(e.getMessage());
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment