Advertisement
Guest User

Untitled

a guest
Jan 16th, 2022
99
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 11.52 KB | None | 0 0
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. using System.Web;
  5.  
  6. namespace tp
  7. {
  8. public class FindSimilaryLookingWords
  9. {
  10. public FindSimilaryLookingWords(string word, List<string> words,
  11. out List<string> wordsSortedBySimilarity, out List<double> similarities)
  12. {
  13. wordsSortedBySimilarity = new List<string>();
  14. similarities = new List<double>();
  15. foreach (string comparedWord in words)
  16. {
  17. double similarity;
  18. var compute2 = new Compute2WordsSimilarity(word, comparedWord, out similarity);
  19. int index = similarities.Count;
  20. for (int i = 0; i < similarities.Count; i ++)
  21. {
  22. if (similarity > similarities [i])
  23. {
  24. index = i;
  25. break;
  26. }
  27. }
  28. wordsSortedBySimilarity.Insert(index, comparedWord);
  29. similarities.Insert(index, similarity);
  30. }
  31. }
  32. }
  33.  
  34. public class Compute2WordsSimilarity
  35. {
  36. public Compute2WordsSimilarity(string word1, string word2,
  37. // 0 = no similarity, 100 = equal
  38. out double similarity
  39. )
  40. {
  41. double maxSimilarity = 0;
  42. double importance;
  43. double removeFromMaxSimilarity = 0;
  44.  
  45. if (word1 == null || word2 == null)
  46. {
  47. similarity = 0;
  48. return;
  49. }
  50. string word1Lc = word1.ToLower();
  51. string word2Lc = word2.ToLower();
  52.  
  53. // if both words don't start with same letters remove 50 points
  54. importance = 30;
  55. maxSimilarity += importance;
  56. if ((getFirstChar(word1) != getFirstChar(word2)) && (getLastChar(word1) != getLastChar(word2)))
  57. {
  58. removeFromMaxSimilarity -= importance;
  59. }
  60.  
  61. // if they don't start with same letter and end with same remove 30
  62. importance = 15;
  63. maxSimilarity += importance;
  64. if ((getFirstChar(word1) != getFirstChar(word2)) && (getLastChar(word1) == getLastChar(word2)))
  65. {
  66. removeFromMaxSimilarity -= importance;
  67. }
  68.  
  69. // if they start with same letter and don't end with same remove 20
  70. importance = 10;
  71. maxSimilarity += importance;
  72. if ((getFirstChar(word1) == getFirstChar(word2)) && (getLastChar(word1) != getLastChar(word2)))
  73. {
  74. removeFromMaxSimilarity -= importance;
  75. }
  76.  
  77. // words length difference: remove max 5 points
  78. importance = 5;
  79. maxSimilarity += importance;
  80. removeFromMaxSimilarity -= importance * (1 - Math.Min(word1.Length, word2.Length) / (double)Math.Max(word1.Length, word2.Length));
  81.  
  82. // number of letters or sets shuffled: remove max 10 points
  83. List<int> setsMatchedIndexesWord1; // indexes in word1
  84. List<int> setsMatchedIndexesWord2; // indexes in word2
  85. List<int> setsLengths; // lengths are equal in both words, hence the match
  86. List<int> setsMissingIndexes; // indexes in word2
  87. List<int> setsMissingLengths;
  88. List<int> setsAddedIndexes; // indexes in word1
  89. List<int> setsAddedLengths;
  90. pairShuffledSets(word1Lc, word2Lc, out setsMatchedIndexesWord1, out setsMatchedIndexesWord2,
  91. out setsLengths, out setsMissingIndexes,
  92. out setsMissingLengths, out setsAddedIndexes, out setsAddedLengths);
  93. double countShuffled = 0;
  94. double sumShuffled = 0;
  95. double sumDistanceShuffle = 0;
  96. for (int i = 0; i < setsMatchedIndexesWord1.Count; i++)
  97. {
  98. int setIndexWord1 = setsMatchedIndexesWord1[i];
  99. int setIndexWord2 = setsMatchedIndexesWord2[i];
  100. //double movePercentage = (setIndexWord1 - setIndexWord2) /
  101. if (setIndexWord1 != setIndexWord2)
  102. {
  103. sumDistanceShuffle += Math.Abs(setIndexWord1 - setIndexWord2);
  104. sumShuffled += setsLengths [i];
  105. countShuffled++;
  106. }
  107. }
  108. importance = 10;
  109. maxSimilarity += importance;
  110. removeFromMaxSimilarity -= importance * sumShuffled / (double)word1.Length;
  111.  
  112. // summed distance of movement when shuffling: remove max 5 points
  113. importance = 10;
  114. maxSimilarity += importance;
  115. removeFromMaxSimilarity -= importance * sumDistanceShuffle / (double)word1.Length;
  116.  
  117. // number of letters or sets removed: remove max 20 points
  118. importance = 100;
  119. maxSimilarity += importance;
  120. int sumRemoved = setsMissingLengths.Sum();
  121. removeFromMaxSimilarity -= importance * sumRemoved / (double)word1.Length;
  122.  
  123. // number of letters or sets added: remove max 10 points
  124. importance = 10;
  125. maxSimilarity += importance;
  126. int sumAdded = setsAddedLengths.Sum();
  127. removeFromMaxSimilarity -= importance * sumAdded / (double)word1.Length;
  128.  
  129. // number of breaks when shuffling (count of shuffled): remove max 5 points
  130. importance = 5;
  131. maxSimilarity += importance;
  132. removeFromMaxSimilarity -= importance * countShuffled / (double)word1.Length;
  133.  
  134. // number of breaks when removing: remove max 5 points
  135. importance = 5;
  136. maxSimilarity += importance;
  137. removeFromMaxSimilarity -= importance * setsMissingIndexes.Count / (double)word1.Length;
  138.  
  139. // number of breaks when adding: remove max 5 points
  140. importance = 5;
  141. maxSimilarity += importance;
  142. removeFromMaxSimilarity -= importance * setsAddedIndexes.Count / (double)word1.Length;
  143.  
  144. // computer similarity
  145. similarity = 100 * (maxSimilarity + removeFromMaxSimilarity) / maxSimilarity;
  146. }
  147.  
  148. private void pairShuffledSets(string word1, string word2,
  149. out List<int> setsMatchedIndexesWord1, // indexes in word1
  150. out List<int> setsMatchedIndexesWord2, // indexes in word2
  151. out List<int> setsLengths,
  152. out List<int> setsMissingIndexes, // indexes in word2
  153. out List<int> setsMissingLengths,
  154. out List<int> setsAddedIndexes, // indexes in word1
  155. out List<int> setsAddedLengths
  156. )
  157. {
  158. // vars
  159. setsMatchedIndexesWord1 = new List<int>();
  160. setsMatchedIndexesWord2 = new List<int>();
  161. setsLengths = new List<int>();
  162. setsMissingIndexes = new List<int>();
  163. setsMissingLengths = new List<int>();
  164. setsAddedIndexes = new List<int>();
  165. setsAddedLengths = new List<int>();
  166. var takenBits1 = new List<bool>(word1.Length);
  167. for (int i = 0; i < word1.Length; i++)
  168. takenBits1.Add(false);
  169. var takenBits2 = new List<bool>(word2.Length);
  170. for (int i = 0; i < word2.Length; i++)
  171. takenBits2.Add(false);
  172. bool areAnyBitsFree;
  173. areAnyBitsFree = true;
  174.  
  175. // get setsMatchedIndexesWord1, setsMatchedIndexesWord2, setsLengths
  176. for (int blockSize = word1.Length; blockSize >= 1 && areAnyBitsFree; blockSize--)// for each possible block length
  177. {
  178. for (int x1 = 0; x1 <= word1.Length - blockSize && areAnyBitsFree; x1++) // take a block from every possible position in word1
  179. {
  180. string block = word1.Substring(x1, blockSize);
  181. int startIndex = 0;
  182. bool isBlockTaken = true;
  183. while (isBlockTaken)
  184. {
  185. int x2 = word2.IndexOf(block, startIndex, StringComparison.InvariantCultureIgnoreCase);
  186. if (x2 != -1)
  187. {
  188. // get isBlockTaken. check that this block is not already taken by some other block. if it is try to find next block in word2.
  189. isBlockTaken = false;
  190. for (int i = x2; i < x2 + block.Length; i++)
  191. {
  192. if (takenBits2[i])
  193. {
  194. isBlockTaken = true;
  195. startIndex = x2 + 1;
  196. break;
  197. }
  198. }
  199.  
  200. // update takenBits1
  201. if (!isBlockTaken)
  202. {
  203. for (int i = x1; i < x1 + block.Length; i++)
  204. takenBits1[i] = true;
  205. }
  206.  
  207. // update takenBits2
  208. if (!isBlockTaken)
  209. {
  210. for (int i = x2; i < x2 + block.Length; i++)
  211. takenBits2[i] = true;
  212. }
  213.  
  214. // update areAnyBitsFree
  215. areAnyBitsFree = takenBits1.IndexOf(false) != -1 && takenBits2.IndexOf(false) != -1;
  216.  
  217. // we found a match of two parts. update out variables
  218. if (!isBlockTaken)
  219. {
  220. setsMatchedIndexesWord1.Add(x1);
  221. setsMatchedIndexesWord2.Add(x2);
  222. setsLengths.Add(blockSize);
  223. }
  224. }
  225. else
  226. break;
  227. }
  228. }
  229. }
  230.  
  231. // get setsMissingIndexes and setsMissingLengths from takenBits1
  232. int wx1 = 0;
  233. for (; wx1 < takenBits1.Count; wx1++)
  234. {
  235. if (!takenBits1[wx1])
  236. {
  237. setsMissingIndexes.Add(wx1);
  238. var missingSetLen = 0;
  239. for (; wx1 < takenBits1.Count && !takenBits1[wx1]; wx1++)
  240. missingSetLen++;
  241. setsMissingLengths.Add(missingSetLen);
  242. }
  243. }
  244.  
  245. // get setsAddedIndexes and setsAddedLengths from takenBits2
  246. int wx2 = 0;
  247. for (; wx2 < takenBits2.Count; wx2++)
  248. {
  249. if (!takenBits2[wx2])
  250. {
  251. setsAddedIndexes.Add(wx2);
  252. var addedSetLen = 0;
  253. for (; wx2 < takenBits1.Count && !takenBits1[wx2]; wx2++)
  254. addedSetLen++;
  255. setsAddedLengths.Add(addedSetLen);
  256. }
  257. }
  258. }
  259.  
  260. private char getFirstChar(string s)
  261. {
  262. if (s == null || s.Length == 0)
  263. return '\0';
  264. return s[0];
  265. }
  266.  
  267. private char getLastChar(string s)
  268. {
  269. if (s == null || s.Length == 0)
  270. return '\0';
  271. return s[s.Length - 1];
  272. }
  273. }
  274. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement