SHARE
TWEET

Untitled

a guest Oct 14th, 2019 87 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. using System.Collections.Generic;
  2.  
  3. namespace TextAnalysis
  4. {
  5.     static class FrequencyAnalysisTask
  6.     {
  7.         public static void GramCountDictionary(
  8.             List<string> sentence,
  9.             int n,
  10.             Dictionary<string, Dictionary<string, int>> gramDictionary
  11.             )
  12.         {
  13.             for (var i = 0; i < sentence.Count - n + 1; i++)
  14.             {
  15.                 var firstHalfGram = n == 2 ? sentence[i] : sentence[i] + ' ' + sentence[i + 1];
  16.                 var secondHalfGram = n == 2 ? sentence[i + 1] : sentence[i + 2];
  17.                 if (!gramDictionary.ContainsKey(firstHalfGram))
  18.                 {
  19.                     gramDictionary.Add(firstHalfGram, new Dictionary<string, int>());
  20.                 }
  21.                 if (gramDictionary[firstHalfGram].ContainsKey(secondHalfGram))
  22.                 {
  23.                     gramDictionary[firstHalfGram][secondHalfGram]++;
  24.                 }
  25.                 else
  26.                 {
  27.                     gramDictionary[firstHalfGram][secondHalfGram] = 1;
  28.                 }
  29.             }
  30.         }
  31.  
  32.         public static void AddMaxFrequentGramInResult(
  33.             Dictionary<string, Dictionary<string, int>> gramDictionary,
  34.             Dictionary<string, string> result
  35.             )
  36.         {
  37.             foreach (var firstHalfGram in gramDictionary)
  38.             {
  39.                 var firstHalfGramKey = firstHalfGram.Key;
  40.                 var maxCount = -1;
  41.                 var secondHalfMaxFrequentGramKey = "";
  42.                 foreach (var secondHalfGram in gramDictionary[firstHalfGramKey])
  43.                 {
  44.                     if (secondHalfGram.Value > maxCount ||
  45.                         secondHalfGram.Value == maxCount &&
  46.                         string.CompareOrdinal(secondHalfGram.Key, secondHalfMaxFrequentGramKey) < 0
  47.                         )
  48.                     {
  49.                         maxCount = secondHalfGram.Value;
  50.                         secondHalfMaxFrequentGramKey = secondHalfGram.Key;
  51.                     }
  52.                 }
  53.                 result.Add(firstHalfGramKey, secondHalfMaxFrequentGramKey);
  54.             }
  55.         }
  56.  
  57.         public static Dictionary<string, string> GetMostFrequentNextWords(List<List<string>> text)
  58.         {
  59.             var result = new Dictionary<string, string>();
  60.             var twoGramDictionary = new Dictionary<string, Dictionary<string, int>>();
  61.             var threeGramDictionary = new Dictionary<string, Dictionary<string, int>>();
  62.             foreach (var sentence in text)
  63.             {
  64.                 GramCountDictionary(sentence, 2, twoGramDictionary);
  65.                 GramCountDictionary(sentence, 3, threeGramDictionary);
  66.             }
  67.             AddMaxFrequentGramInResult(twoGramDictionary, result);
  68.             AddMaxFrequentGramInResult(threeGramDictionary, result);
  69.             return result;
  70.         }
  71.     }
  72. }
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top