Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- public static double[] computeDistribution(final String corpus) {
- String shortenedCorpus = corpus.replaceAll("[^a-zA-Z]", "");
- char[] corpusArray = shortenedCorpus.toCharArray();
- int corpusLength = shortenedCorpus.length();
- double[] resultSet = new double[ALPHA_BET];
- char check = ' ';
- String alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
- if (corpusLength < 1) {
- return resultSet;
- }
- for (int i = 0; i < corpusLength; i++) {
- int charIndex = alphabet.indexOf(corpusArray[i]);
- resultSet[charIndex] = resultSet[charIndex] + 1;
- }
- for (int i = 0; i < resultSet.length; i++) {
- resultSet[i] = resultSet[i] / corpusLength;
- }
- return resultSet;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement