Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- using System.Collections.Generic;
- using System.Globalization;
- namespace TextAnalysis
- {
- static class FrequencyAnalysisTask
- {
- public static Dictionary<string, string> GetMostFrequentNextWords(List<List<string>> text)
- {
- var result = new Dictionary<string, string>();
- var bigramms = new Dictionary<string, int>();
- var trigramms = new Dictionary<string, int>();
- bool findBigramm = true;
- bool findTrigramm = true;
- for (var currentSentenceCounter = 0; currentSentenceCounter < text.Capacity; currentSentenceCounter++)
- {
- for (var currentWordCounter = 0; currentWordCounter < text.Capacity - 1; currentWordCounter++)
- {
- findBigramm = true;
- findTrigramm = true;
- var bigramm = text[currentSentenceCounter][currentWordCounter] + ' ' + text[currentSentenceCounter][currentWordCounter + 1];
- string trigramm = null;
- if (currentWordCounter < text.Capacity - 2)
- {
- trigramm = text[currentSentenceCounter][currentWordCounter] + ' ' +
- text[currentSentenceCounter][currentWordCounter + 1] + ' ' +
- text[currentSentenceCounter][currentWordCounter + 2];
- }
- if (bigramms.ContainsKey(bigramm))
- {
- findBigramm = false;
- }
- if (trigramms.ContainsKey(trigramm))
- {
- findTrigramm = false;
- }
- if (findBigramm || findTrigramm)
- {
- var tempWordCounter = currentWordCounter;
- for (var sentenceCounter = currentSentenceCounter;
- sentenceCounter < text.Capacity;
- sentenceCounter++)
- {
- for (var wordCounter = tempWordCounter; wordCounter < text.Capacity - 1; wordCounter++)
- {
- if ((text[sentenceCounter][wordCounter] + ' ' + text[sentenceCounter][wordCounter + 1])
- .Equals(bigramm))
- {
- if (!bigramms.ContainsKey(bigramm)) bigramms[bigramm] = 0;
- bigramms[bigramm]++;
- }
- if ((text[currentSentenceCounter][currentWordCounter] + ' ' +
- text[currentSentenceCounter][currentWordCounter + 1] + ' ' +
- text[currentSentenceCounter][currentWordCounter + 2]).Equals(trigramm))
- {
- if (!trigramms.ContainsKey(trigramm)) trigramms[trigramm] = 0;
- trigramms[trigramm]++;
- }
- }
- tempWordCounter = 0;
- }
- }
- }
- }
- return result;
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement