Advertisement
Guest User

Untitled

a guest
Oct 15th, 2019
116
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.93 KB | None | 0 0
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. using System.Text;
  5.  
  6. namespace TextAnalysis
  7. {
  8. static class FrequencyAnalysisTask
  9. {
  10. public static Dictionary<string, string> GetMostFrequentNextWords(List<List<string>> text)
  11. {
  12. var result = new Dictionary<string, string>();
  13. return SetBigram(text).Concat(SetTrigram(text)).ToDictionary(x => x.Key, x => x.Value);
  14. }
  15. public static Dictionary<string, string> SetBigram(List<List<string>> text)
  16. {
  17. var bigramFrequency = new Dictionary<string, Dictionary<string, int>>();
  18. var fistWord = new StringBuilder();
  19. var bigramEnding = new StringBuilder();
  20. foreach (var sentence in text)
  21. {
  22. if (sentence.Count > 1)
  23. for (int i = 0; i < sentence.Count - 1; i++)
  24. {
  25. fistWord.Clear().Append(sentence[i]);
  26. bigramEnding.Clear().Append(sentence[i + 1]);
  27. if (!bigramFrequency.ContainsKey(sentence[i]))
  28. bigramFrequency[fistWord.ToString()] = new Dictionary<string, int>();
  29. if (!bigramFrequency[fistWord.ToString()].ContainsKey(bigramEnding.ToString()))
  30. bigramFrequency[fistWord.ToString()][bigramEnding.ToString()] = 0;
  31. bigramFrequency[fistWord.ToString()][bigramEnding.ToString()]++;
  32. }
  33. }
  34. return MakeNgramDictionary(bigramFrequency);
  35. }
  36.  
  37. public static Dictionary<string, string> SetTrigram(List<List<string>> text)
  38. {
  39. var trigramFrequency = new Dictionary<string, Dictionary<string, int>>();
  40. var bigram = new StringBuilder();
  41. var trigramEnding = new StringBuilder();
  42. foreach (var sentence in text)
  43. {
  44. if (sentence.Count > 2)
  45. for (int i = 0; i < sentence.Count - 2; i++)
  46. {
  47. bigram.Clear().Append(sentence[i] + " " + sentence[i + 1]);
  48. trigramEnding.Clear().Append(sentence[i + 2]);
  49. if (!trigramFrequency.ContainsKey(bigram.ToString()))
  50. trigramFrequency[bigram.ToString()] = new Dictionary<string, int>();
  51. if (!trigramFrequency[bigram.ToString()].ContainsKey(sentence[i + 2]))
  52. trigramFrequency[bigram.ToString()][trigramEnding.ToString()] = 0;
  53. trigramFrequency[bigram.ToString()][trigramEnding.ToString()]++;
  54. }
  55. }
  56. return MakeNgramDictionary(trigramFrequency);
  57. }
  58. private static Dictionary<string, string> MakeNgramDictionary(
  59. Dictionary<string, Dictionary<string, int>> ngramFrequency)
  60. {
  61. var ngramDictionary = new Dictionary<string, string>();
  62. var maxFrequency = 0;
  63. var ngramEnding = new StringBuilder();
  64. foreach (var ngram in ngramFrequency)
  65. {
  66. foreach (var ngramEndingFrequency in ngram.Value)
  67. {
  68. if (ngramEndingFrequency.Value == maxFrequency &&
  69. string.CompareOrdinal(ngramEndingFrequency.Key, ngramEnding.ToString()) < 0)
  70. ngramEnding.Clear().Append(ngramEndingFrequency.Key);
  71. if (ngramEndingFrequency.Value > maxFrequency)
  72. {
  73. maxFrequency = ngramEndingFrequency.Value;
  74. ngramEnding.Clear().Append(ngramEndingFrequency.Key);
  75. }
  76. }
  77. ngramDictionary.Add(ngram.Key, ngramEnding.ToString());
  78. ngramEnding.Clear();
  79. maxFrequency = 0;
  80. }
  81. return ngramDictionary;
  82. }
  83. }
  84. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement