Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- using System.Collections.Generic;
- namespace TextAnalysis
- {
- static class FrequencyAnalysisTask
- {
- public static void GramCountDictionary(
- List<string> sentence,
- int n,
- Dictionary<string, Dictionary<string, int>> gramDictionary
- )
- {
- for (var i = 0; i < sentence.Count - n + 1; i++)
- {
- var firstHalfGram = n == 2 ? sentence[i] : sentence[i] + ' ' + sentence[i + 1];
- var secondHalfGram = n == 2 ? sentence[i + 1] : sentence[i + 2];
- if (!gramDictionary.ContainsKey(firstHalfGram))
- {
- gramDictionary.Add(firstHalfGram, new Dictionary<string, int>());
- }
- if (gramDictionary[firstHalfGram].ContainsKey(secondHalfGram))
- {
- gramDictionary[firstHalfGram][secondHalfGram]++;
- }
- else
- {
- gramDictionary[firstHalfGram][secondHalfGram] = 1;
- }
- }
- }
- public static void AddMaxFrequentGramInResult(
- Dictionary<string, Dictionary<string, int>> gramDictionary,
- Dictionary<string, string> result
- )
- {
- foreach (var firstHalfGram in gramDictionary)
- {
- var firstHalfGramKey = firstHalfGram.Key;
- var maxCount = -1;
- var secondHalfMaxFrequentGramKey = "";
- foreach (var secondHalfGram in gramDictionary[firstHalfGramKey])
- {
- if (secondHalfGram.Value > maxCount ||
- secondHalfGram.Value == maxCount &&
- string.CompareOrdinal(secondHalfGram.Key, secondHalfMaxFrequentGramKey) < 0
- )
- {
- maxCount = secondHalfGram.Value;
- secondHalfMaxFrequentGramKey = secondHalfGram.Key;
- }
- }
- result.Add(firstHalfGramKey, secondHalfMaxFrequentGramKey);
- }
- }
- public static Dictionary<string, string> GetMostFrequentNextWords(List<List<string>> text)
- {
- var result = new Dictionary<string, string>();
- var twoGramDictionary = new Dictionary<string, Dictionary<string, int>>();
- var threeGramDictionary = new Dictionary<string, Dictionary<string, int>>();
- foreach (var sentence in text)
- {
- GramCountDictionary(sentence, 2, twoGramDictionary);
- GramCountDictionary(sentence, 3, threeGramDictionary);
- }
- AddMaxFrequentGramInResult(twoGramDictionary, result);
- AddMaxFrequentGramInResult(threeGramDictionary, result);
- return result;
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement