Advertisement
Guest User

textAnalysis

a guest
Oct 19th, 2019
85
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C# 2.11 KB | None | 0 0
  1. using System.Collections.Generic;
  2.  
  3. namespace TextAnalysis
  4. {
  5.     internal static class FrequencyAnalysisTask
  6.     {
  7.         public static Dictionary<string, string> GetMostFrequentNextWords(List<List<string>> text)
  8.         {
  9.             var result = new Dictionary<string, string>();
  10.             var frequencies = BuildFrequencies(text);
  11.  
  12.             foreach (var bigram in frequencies)
  13.                 result.Add(bigram.Key, GetMostFrequent(bigram.Value));
  14.  
  15.             return result;
  16.         }
  17.  
  18.         private static Dictionary<string, Dictionary<string, int>> BuildFrequencies(List<List<string>> text)
  19.         {
  20.             var frequencies = new Dictionary<string, Dictionary<string, int>>();
  21.  
  22.             foreach (var sentence in text)
  23.             {
  24.                 for (var i = 0; i < sentence.Count - 1; i++)
  25.                 {
  26.                     IncrementFrequencySafely(frequencies, sentence[i], sentence[i + 1]);
  27.                     if (i < sentence.Count - 2)
  28.                         IncrementFrequencySafely(frequencies, sentence[i] + " " + sentence[i + 1], sentence[i + 2]);
  29.                 }
  30.             }
  31.  
  32.             return frequencies;
  33.         }
  34.  
  35.         private static void IncrementFrequencySafely(Dictionary<string, Dictionary<string, int>> frequencies,
  36.             string beginning, string nextWord)
  37.         {
  38.             if (!frequencies.ContainsKey(beginning))
  39.                 frequencies[beginning] = new Dictionary<string, int>();
  40.  
  41.             if (!frequencies[beginning].ContainsKey(nextWord))
  42.                 frequencies[beginning][nextWord] = 0;
  43.             frequencies[beginning][nextWord]++;
  44.         }
  45.  
  46.         private static string GetMostFrequent(Dictionary<string, int> wordFrequencies)
  47.         {
  48.             var result = "";
  49.             var maxFrequency = -1;
  50.  
  51.             foreach (var word in wordFrequencies)
  52.                 if (word.Value >= maxFrequency && string.CompareOrdinal(word.Key, result) < 0)
  53.                 {
  54.                     result = word.Key;
  55.                     maxFrequency = word.Value;
  56.                 }
  57.  
  58.             return result;
  59.         }
  60.     }
  61. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement