Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- namespace Code
- {
- using System;
- using System.Collections;
- using System.Collections.Generic;
- using System.Linq;
- /// <summary>
- /// A generic implementation of the Knuth-Morris-Pratt algorithm that searches,
- /// in a memory efficient way, over a given <see cref="IEnumerator"/>.
- /// </summary>
- public static class KMP
- {
- /// <summary>
- /// Determines whether the Enumerator contains the specified pattern.
- /// </summary>
- /// <typeparam name="T">The type of an item.</typeparam>
- /// <param name="source">
- /// The source, the <see cref="IEnumerator"/> must yield
- /// objects of <typeparamref name="T"/>.
- /// </param>
- /// <param name="pattern">The pattern.</param>
- /// <param name="equalityComparer">The equality comparer.</param>
- /// <returns>
- /// <c>true</c> if the source contains the specified pattern;
- /// otherwise, <c>false</c>.
- /// </returns>
- /// <exception cref="ArgumentNullException">pattern</exception>
- public static bool Contains<T>(
- this IEnumerator source,
- IEnumerable<T> pattern,
- IEqualityComparer<T> equalityComparer = null)
- {
- if (pattern == null)
- {
- throw new ArgumentNullException(nameof(pattern));
- }
- equalityComparer = equalityComparer ?? EqualityComparer<T>.Default;
- return SearchImplementation(pattern, source, equalityComparer).Any();
- }
- /// <summary>
- /// Identifies indices of a pattern string in source.
- /// </summary>
- /// <typeparam name="T">The type of an item.</typeparam>
- /// <param name="patternString">The pattern string.</param>
- /// <param name="source">
- /// The source, the <see cref="IEnumerator"/> must yield
- /// objects of <typeparamref name="T"/>.
- /// </param>
- /// <param name="equalityComparer">The equality comparer.</param>
- /// <returns>
- /// A sequence of indices where the pattern can be found
- /// in the source.
- /// </returns>
- /// <exception cref="ArgumentOutOfRangeException">
- /// patternSequence - The pattern must contain 1 or more elements.
- /// </exception>
- private static IEnumerable<long> SearchImplementation<T>(
- IEnumerable<T> patternString,
- IEnumerator source,
- IEqualityComparer<T> equalityComparer)
- {
- // Pre-process the pattern
- var preResult = GetSlide(patternString, equalityComparer);
- var pattern = preResult.Pattern;
- var slide = preResult.Slide;
- var patternLength = pattern.Count;
- if (pattern.Count == 0)
- {
- throw new ArgumentOutOfRangeException(
- nameof(patternString),
- "The pattern must contain 1 or more elements.");
- }
- var buffer = new Dictionary<long, T>(patternLength);
- var more = true;
- long i = 0; // index for source
- int j = 0; // index for pattern
- while (more)
- {
- more = FillBuffer(
- buffer,
- source,
- i,
- patternLength,
- out T t);
- if (equalityComparer.Equals(pattern[j], t))
- {
- j++;
- i++;
- }
- more = FillBuffer(
- buffer,
- source,
- i,
- patternLength,
- out t);
- if (j == patternLength)
- {
- yield return i - j;
- j = slide[j - 1];
- }
- else if (more && !equalityComparer.Equals(pattern[j], t))
- {
- if (j != 0)
- {
- j = slide[j - 1];
- }
- else
- {
- i = i + 1;
- }
- }
- }
- }
- /// <summary>
- /// Fills the buffer.
- /// </summary>
- /// <remarks>
- /// The buffer is used so that it is not necessary to hold the
- /// entire source in memory.
- /// </remarks>
- /// <typeparam name="T">The type of an item.</typeparam>
- /// <param name="buffer">The buffer.</param>
- /// <param name="s">The source enumerator.</param>
- /// <param name="i">The current index.</param>
- /// <param name="patternLength">Length of the pattern.</param>
- /// <param name="value">The value retrieved from the source.</param>
- /// <returns>
- /// <c>true</c> if there is potentially more data to process;
- /// otherwise <c>false</c>.
- /// </returns>
- private static bool FillBuffer<T>(
- IDictionary<long, T> buffer,
- IEnumerator s,
- long i,
- int patternLength,
- out T value)
- {
- bool more = true;
- if (!buffer.TryGetValue(i, out value))
- {
- more = s.MoveNext();
- if (more)
- {
- value = (T)s.Current;
- buffer.Remove(i - patternLength);
- buffer.Add(i, value);
- }
- }
- return more;
- }
- /// <summary>
- /// Gets the offset array which acts as a slide rule for the KMP algorithm.
- /// </summary>
- /// <typeparam name="T">The type of an item.</typeparam>
- /// <param name="pattern">The pattern.</param>
- /// <param name="equalityComparer">The equality comparer.</param>
- /// <returns>A tuple of the offsets and the enumerated pattern.</returns>
- private static (IReadOnlyList<int> Slide, IReadOnlyList<T> Pattern) GetSlide<T>(
- IEnumerable<T> pattern,
- IEqualityComparer<T> equalityComparer)
- {
- var patternList = pattern.ToList();
- var slide = new int[patternList.Count];
- int length = 0;
- int i = 1;
- while (i < patternList.Count)
- {
- if (equalityComparer.Equals(patternList[i], patternList[length]))
- {
- length++;
- slide[i] = length;
- i++;
- }
- else
- {
- if (length != 0)
- {
- length = slide[length - 1];
- }
- else
- {
- slide[i] = length;
- i++;
- }
- }
- }
- return (slide, patternList);
- }
- }
- }
- namespace Code
- {
- using System;
- using System.Collections.Generic;
- using System.Globalization;
- using System.Linq;
- class Program
- {
- static void Main(string[] args)
- {
- var testData = new List<(string Source, string Pattern)>
- {
- (string.Empty, "x"),
- ("y", "x"),
- ("x", "x"),
- ("yx", "x"),
- ("xy", "x"),
- ("aababccba", "abc"),
- ("1x2x3x4", "x"),
- ("x1x2x3x4x", "x"),
- ("1aababcabcd2aababcabcd3aababcabcd4", "aababcabcd"),
- ("ssstring", "sstring")
- };
- foreach(var d in testData)
- {
- var contains = Ext.Contains(d.Source, d.Pattern);
- Console.WriteLine(
- $"Source:"{d.Source}", Pattern:"{d.Pattern}", Contains:{contains}");
- }
- Console.ReadKey();
- }
- }
- public static class Ext
- {
- public static bool Contains(
- this string source,
- string value,
- CultureInfo culture = null,
- StringComparer comparer = null)
- {
- comparer = comparer ?? StringComparer.Ordinal;
- var sourceEnumerator = StringInfo.GetTextElementEnumerator(source);
- var sequenceEnumerator = StringInfo.GetTextElementEnumerator(value);
- var pattern = new List<string>();
- while (sequenceEnumerator.MoveNext())
- {
- pattern.Add((string)sequenceEnumerator.Current);
- }
- return sourceEnumerator.Contains(pattern, comparer);
- }
- }
- }
- while (more)
- {
- more = FillBuffer(buffer, source, i, patternLength, out T t);
- if (equalityComparer.Equals(pattern[j], t))
- {
- j++;
- i++;
- }
- more = FillBuffer(buffer, source, i, patternLength, out T t);
- ...
- }
- while (more)
- {
- more = FillBuffer(buffer, source, i, patternLength, out T t);
- if (equalityComparer.Equals(pattern[j], t))
- {
- j++;
- i++;
- // now inside the if statement
- more = FillBuffer(buffer, source, i, patternLength, out T t);
- }
- ...
- }
Add Comment
Please, Sign In to add comment