Guest User

Untitled

a guest
Jan 16th, 2019
82
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.18 KB | None | 0 0
  1. namespace Code
  2. {
  3. using System;
  4. using System.Collections;
  5. using System.Collections.Generic;
  6. using System.Linq;
  7.  
  8. /// <summary>
  9. /// A generic implementation of the Knuth-Morris-Pratt algorithm that searches,
  10. /// in a memory efficient way, over a given <see cref="IEnumerator"/>.
  11. /// </summary>
  12. public static class KMP
  13. {
  14. /// <summary>
  15. /// Determines whether the Enumerator contains the specified pattern.
  16. /// </summary>
  17. /// <typeparam name="T">The type of an item.</typeparam>
  18. /// <param name="source">
  19. /// The source, the <see cref="IEnumerator"/> must yield
  20. /// objects of <typeparamref name="T"/>.
  21. /// </param>
  22. /// <param name="pattern">The pattern.</param>
  23. /// <param name="equalityComparer">The equality comparer.</param>
  24. /// <returns>
  25. /// <c>true</c> if the source contains the specified pattern;
  26. /// otherwise, <c>false</c>.
  27. /// </returns>
  28. /// <exception cref="ArgumentNullException">pattern</exception>
  29. public static bool Contains<T>(
  30. this IEnumerator source,
  31. IEnumerable<T> pattern,
  32. IEqualityComparer<T> equalityComparer = null)
  33. {
  34. if (pattern == null)
  35. {
  36. throw new ArgumentNullException(nameof(pattern));
  37. }
  38.  
  39. equalityComparer = equalityComparer ?? EqualityComparer<T>.Default;
  40.  
  41. return SearchImplementation(pattern, source, equalityComparer).Any();
  42. }
  43.  
  44. /// <summary>
  45. /// Identifies indices of a pattern string in source.
  46. /// </summary>
  47. /// <typeparam name="T">The type of an item.</typeparam>
  48. /// <param name="patternString">The pattern string.</param>
  49. /// <param name="source">
  50. /// The source, the <see cref="IEnumerator"/> must yield
  51. /// objects of <typeparamref name="T"/>.
  52. /// </param>
  53. /// <param name="equalityComparer">The equality comparer.</param>
  54. /// <returns>
  55. /// A sequence of indices where the pattern can be found
  56. /// in the source.
  57. /// </returns>
  58. /// <exception cref="ArgumentOutOfRangeException">
  59. /// patternSequence - The pattern must contain 1 or more elements.
  60. /// </exception>
  61. private static IEnumerable<long> SearchImplementation<T>(
  62. IEnumerable<T> patternString,
  63. IEnumerator source,
  64. IEqualityComparer<T> equalityComparer)
  65. {
  66. // Pre-process the pattern
  67. var preResult = GetSlide(patternString, equalityComparer);
  68. var pattern = preResult.Pattern;
  69. var slide = preResult.Slide;
  70. var patternLength = pattern.Count;
  71.  
  72. if (pattern.Count == 0)
  73. {
  74. throw new ArgumentOutOfRangeException(
  75. nameof(patternString),
  76. "The pattern must contain 1 or more elements.");
  77. }
  78.  
  79. var buffer = new Dictionary<long, T>(patternLength);
  80. var more = true;
  81.  
  82. long i = 0; // index for source
  83. int j = 0; // index for pattern
  84.  
  85. while (more)
  86. {
  87. more = FillBuffer(
  88. buffer,
  89. source,
  90. i,
  91. patternLength,
  92. out T t);
  93.  
  94. if (equalityComparer.Equals(pattern[j], t))
  95. {
  96. j++;
  97. i++;
  98. }
  99.  
  100. more = FillBuffer(
  101. buffer,
  102. source,
  103. i,
  104. patternLength,
  105. out t);
  106.  
  107. if (j == patternLength)
  108. {
  109. yield return i - j;
  110. j = slide[j - 1];
  111. }
  112. else if (more && !equalityComparer.Equals(pattern[j], t))
  113. {
  114. if (j != 0)
  115. {
  116. j = slide[j - 1];
  117. }
  118. else
  119. {
  120. i = i + 1;
  121. }
  122. }
  123. }
  124. }
  125.  
  126. /// <summary>
  127. /// Fills the buffer.
  128. /// </summary>
  129. /// <remarks>
  130. /// The buffer is used so that it is not necessary to hold the
  131. /// entire source in memory.
  132. /// </remarks>
  133. /// <typeparam name="T">The type of an item.</typeparam>
  134. /// <param name="buffer">The buffer.</param>
  135. /// <param name="s">The source enumerator.</param>
  136. /// <param name="i">The current index.</param>
  137. /// <param name="patternLength">Length of the pattern.</param>
  138. /// <param name="value">The value retrieved from the source.</param>
  139. /// <returns>
  140. /// <c>true</c> if there is potentially more data to process;
  141. /// otherwise <c>false</c>.
  142. /// </returns>
  143. private static bool FillBuffer<T>(
  144. IDictionary<long, T> buffer,
  145. IEnumerator s,
  146. long i,
  147. int patternLength,
  148. out T value)
  149. {
  150. bool more = true;
  151. if (!buffer.TryGetValue(i, out value))
  152. {
  153. more = s.MoveNext();
  154. if (more)
  155. {
  156. value = (T)s.Current;
  157. buffer.Remove(i - patternLength);
  158. buffer.Add(i, value);
  159. }
  160. }
  161.  
  162. return more;
  163. }
  164.  
  165. /// <summary>
  166. /// Gets the offset array which acts as a slide rule for the KMP algorithm.
  167. /// </summary>
  168. /// <typeparam name="T">The type of an item.</typeparam>
  169. /// <param name="pattern">The pattern.</param>
  170. /// <param name="equalityComparer">The equality comparer.</param>
  171. /// <returns>A tuple of the offsets and the enumerated pattern.</returns>
  172. private static (IReadOnlyList<int> Slide, IReadOnlyList<T> Pattern) GetSlide<T>(
  173. IEnumerable<T> pattern,
  174. IEqualityComparer<T> equalityComparer)
  175. {
  176. var patternList = pattern.ToList();
  177. var slide = new int[patternList.Count];
  178.  
  179. int length = 0;
  180. int i = 1;
  181.  
  182. while (i < patternList.Count)
  183. {
  184. if (equalityComparer.Equals(patternList[i], patternList[length]))
  185. {
  186. length++;
  187. slide[i] = length;
  188. i++;
  189. }
  190. else
  191. {
  192. if (length != 0)
  193. {
  194. length = slide[length - 1];
  195. }
  196. else
  197. {
  198. slide[i] = length;
  199. i++;
  200. }
  201. }
  202. }
  203.  
  204. return (slide, patternList);
  205. }
  206. }
  207. }
  208.  
  209. namespace Code
  210. {
  211. using System;
  212. using System.Collections.Generic;
  213. using System.Globalization;
  214. using System.Linq;
  215.  
  216. class Program
  217. {
  218. static void Main(string[] args)
  219. {
  220.  
  221. var testData = new List<(string Source, string Pattern)>
  222. {
  223. (string.Empty, "x"),
  224. ("y", "x"),
  225. ("x", "x"),
  226. ("yx", "x"),
  227. ("xy", "x"),
  228. ("aababccba", "abc"),
  229. ("1x2x3x4", "x"),
  230. ("x1x2x3x4x", "x"),
  231. ("1aababcabcd2aababcabcd3aababcabcd4", "aababcabcd"),
  232. ("ssstring", "sstring")
  233. };
  234.  
  235. foreach(var d in testData)
  236. {
  237. var contains = Ext.Contains(d.Source, d.Pattern);
  238. Console.WriteLine(
  239. $"Source:"{d.Source}", Pattern:"{d.Pattern}", Contains:{contains}");
  240. }
  241.  
  242. Console.ReadKey();
  243. }
  244. }
  245.  
  246. public static class Ext
  247. {
  248. public static bool Contains(
  249. this string source,
  250. string value,
  251. CultureInfo culture = null,
  252. StringComparer comparer = null)
  253. {
  254. comparer = comparer ?? StringComparer.Ordinal;
  255.  
  256. var sourceEnumerator = StringInfo.GetTextElementEnumerator(source);
  257. var sequenceEnumerator = StringInfo.GetTextElementEnumerator(value);
  258.  
  259. var pattern = new List<string>();
  260. while (sequenceEnumerator.MoveNext())
  261. {
  262. pattern.Add((string)sequenceEnumerator.Current);
  263. }
  264.  
  265. return sourceEnumerator.Contains(pattern, comparer);
  266. }
  267. }
  268. }
  269.  
  270. while (more)
  271. {
  272. more = FillBuffer(buffer, source, i, patternLength, out T t);
  273.  
  274. if (equalityComparer.Equals(pattern[j], t))
  275. {
  276. j++;
  277. i++;
  278. }
  279.  
  280. more = FillBuffer(buffer, source, i, patternLength, out T t);
  281.  
  282. ...
  283. }
  284.  
  285. while (more)
  286. {
  287. more = FillBuffer(buffer, source, i, patternLength, out T t);
  288.  
  289. if (equalityComparer.Equals(pattern[j], t))
  290. {
  291. j++;
  292. i++;
  293.  
  294. // now inside the if statement
  295. more = FillBuffer(buffer, source, i, patternLength, out T t);
  296. }
  297.  
  298. ...
  299. }
Add Comment
Please, Sign In to add comment