Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package dom1;
- import java.io.InputStream;
- import java.util.ArrayList;
- import java.util.Arrays;
- import java.util.Comparator;
- import java.util.HashSet;
- import java.util.Iterator;
- import java.util.List;
- import java.util.Locale;
- import java.util.Map;
- import java.util.Scanner;
- import java.util.Set;
- import java.util.SortedSet;
- import java.util.StringTokenizer;
- import java.util.TreeMap;
- import java.util.TreeSet;
- import java.util.Map.Entry;
- public class TermFrequencyTest {
- public static void main(String[] args) {
- String[] stop = new String[] { "во", "и", "се", "за", "ќе", "да", "од",
- "ги", "е", "со", "не", "тоа", "кои", "до", "го", "или", "дека",
- "што", "на", "а", "но", "кој", "ја" };
- TermFrequency tf = new TermFrequency(System.in, stop);
- System.out.println(tf.countTotal());
- System.out.println(tf.countDistinct());
- System.out.println(tf.mostOften(10));
- }
- }
- class TermFrequency {
- TreeMap<String, Integer> allWords;
- int total;
- public TermFrequency(InputStream input, String[] stop) {
- allWords = new TreeMap<String, Integer>();
- List<String> ignore = Arrays.asList(stop);
- Scanner scan = new Scanner(input);
- String line;
- String fullText = "";
- while (scan.hasNextLine()) {
- fullText += scan.nextLine() + " ";
- }
- StringTokenizer tokens = new StringTokenizer(fullText);
- while (tokens.hasMoreTokens()) {
- String word = tokens.nextToken();
- word = word.replaceAll("[.,]*", "");
- word = word.toLowerCase();
- if (!ignore.contains(word)&&!word.equals("")) {
- total++;
- //System.out.print(word + " ");
- if (word.charAt(word.length()-1) == '-') {
- word = word.substring(0, word.length() - 1);
- }
- if (!allWords.containsKey(word)) {
- allWords.put(word, 1);
- } else {
- allWords.put(word, allWords.get(word) + 1);
- }
- }
- }
- //System.out.println("\n" + total);
- //System.out.println(allWords.size());
- }
- private static String prepareWord(String word) {
- String prepared = "";
- char[] letters = word.toCharArray();
- for (int i = 0; i < letters.length; i++) {
- }
- return null;
- }
- public int countTotal() {
- return total;
- }
- public int countDistinct() {
- return allWords.size();
- }
- public List<String> mostOften(int k) {
- TreeMap<String, Integer> sorted = new TreeMap<String, Integer>(new ValuesComparator(allWords));
- sorted.putAll(allWords);
- Iterator<Entry<String, Integer>> it = sorted.entrySet().iterator();
- List<String> mostUsed = new ArrayList<String>();
- while (it.hasNext()) {
- mostUsed.add(it.next().getKey());
- k--;
- if (k == 0) {
- break;
- }
- }
- return mostUsed;
- }
- }
- class ValuesComparator implements Comparator<String> {
- Map<String, Integer> base;
- public ValuesComparator(Map<String, Integer> base) {
- this.base = base;
- }
- @Override
- public int compare(String x, String y) {
- int tmp = base.get(y)-base.get(x);
- if (tmp != 0) {
- return tmp;
- } else {
- return x.compareTo(y);
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement