Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- using System;
- using System.Collections.Generic;
- using System.Linq;
- namespace TextAnalysis
- {
- static class FrequencyAnalysisTask
- {
- public static Dictionary<string, string> GetMostFrequentNextWords(List<List<string>> text)
- {
- var temp = new List<Tuple<string, string>>();
- foreach (var sentence in text)
- {
- if (sentence.Count == 2)
- {
- temp.Add(Tuple.Create(sentence[0], sentence[1]));
- }
- else
- {
- for (int i = 0; i < sentence.Count - 2; i++)
- {
- var ngrams = sentence.Skip(i).Take(3).ToArray();
- temp.Add(Tuple.Create(ngrams[0], ngrams[1]));
- temp.Add(Tuple.Create(string.Join(" ", ngrams.Take(2)), ngrams[2]));
- if (i == sentence.Count - 3) temp.Add(Tuple.Create(ngrams[1], ngrams[2]));
- }
- }
- }
- var result = temp.GroupBy(tuple => tuple.Item1)
- .Select(tuple => new
- {
- tuple.Key,
- Value = tuple.Select(item => item.Item2)
- .GroupBy(x => x).ToList().
- Select(y => new { y.Key, Count = y.Count() })
- .OrderByDescending(x => x.Count)
- .ThenBy(x => x.Key,StringComparer.Ordinal)
- .Select(p => p.Key)
- .First()
- }).ToDictionary(x => x.Key, x => x.Value);
- return result;
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement