Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- using System;
- using System.Collections.Generic;
- using System.Linq;
- using System.Text;
- namespace TextAnalysis
- {
- static class FrequencyAnalysisTask
- {
- public static Dictionary<string, string> GetMostFrequentNextWords(List<List<string>> text)
- {
- var result = new Dictionary<string, string>();
- return SetBigram(text).Concat(SetTrigram(text)).ToDictionary(x => x.Key, x => x.Value);
- }
- public static Dictionary<string, string> SetBigram(List<List<string>> text)
- {
- var bigramFrequency = new Dictionary<string, Dictionary<string, int>>();
- var fistWord = new StringBuilder();
- var bigramEnding = new StringBuilder();
- foreach (var sentence in text)
- {
- if (sentence.Count > 1)
- for (int i = 0; i < sentence.Count - 1; i++)
- {
- fistWord.Clear().Append(sentence[i]);
- bigramEnding.Clear().Append(sentence[i + 1]);
- if (!bigramFrequency.ContainsKey(sentence[i]))
- bigramFrequency[fistWord.ToString()] = new Dictionary<string, int>();
- if (!bigramFrequency[fistWord.ToString()].ContainsKey(bigramEnding.ToString()))
- bigramFrequency[fistWord.ToString()][bigramEnding.ToString()] = 0;
- bigramFrequency[fistWord.ToString()][bigramEnding.ToString()]++;
- }
- }
- return MakeNgramDictionary(bigramFrequency);
- }
- public static Dictionary<string, string> SetTrigram(List<List<string>> text)
- {
- var trigramFrequency = new Dictionary<string, Dictionary<string, int>>();
- var bigram = new StringBuilder();
- var trigramEnding = new StringBuilder();
- foreach (var sentence in text)
- {
- if (sentence.Count > 2)
- for (int i = 0; i < sentence.Count - 2; i++)
- {
- bigram.Clear().Append(sentence[i] + " " + sentence[i + 1]);
- trigramEnding.Clear().Append(sentence[i + 2]);
- if (!trigramFrequency.ContainsKey(bigram.ToString()))
- trigramFrequency[bigram.ToString()] = new Dictionary<string, int>();
- if (!trigramFrequency[bigram.ToString()].ContainsKey(sentence[i + 2]))
- trigramFrequency[bigram.ToString()][trigramEnding.ToString()] = 0;
- trigramFrequency[bigram.ToString()][trigramEnding.ToString()]++;
- }
- }
- return MakeNgramDictionary(trigramFrequency);
- }
- private static Dictionary<string, string> MakeNgramDictionary(
- Dictionary<string, Dictionary<string, int>> ngramFrequency)
- {
- var ngramDictionary = new Dictionary<string, string>();
- var maxFrequency = 0;
- var ngramEnding = new StringBuilder();
- foreach (var ngram in ngramFrequency)
- {
- foreach (var ngramEndingFrequency in ngram.Value)
- {
- if (ngramEndingFrequency.Value == maxFrequency &&
- string.CompareOrdinal(ngramEndingFrequency.Key, ngramEnding.ToString()) < 0)
- ngramEnding.Clear().Append(ngramEndingFrequency.Key);
- if (ngramEndingFrequency.Value > maxFrequency)
- {
- maxFrequency = ngramEndingFrequency.Value;
- ngramEnding.Clear().Append(ngramEndingFrequency.Key);
- }
- }
- ngramDictionary.Add(ngram.Key, ngramEnding.ToString());
- ngramEnding.Clear();
- maxFrequency = 0;
- }
- return ngramDictionary;
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement