Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- using System;
- using System.Collections.Generic;
- using System.Diagnostics;
- using System.Linq;
- using System.Text.RegularExpressions;
- namespace SplitOnLength
- {
- enum WordPolicy
- {
- None,
- ThrowIfTooLong,
- CutIfTooLong
- }
- static class StringSplitter
- {
- public static IEnumerable<string> SimonsApproach(this string input, int length)
- {
- if (input == null)
- yield break;
- string chunk;
- int current = 0;
- int lastSep = -1;
- for (int i = 0; i < input.Length; i++)
- {
- if (char.IsSeparator(input[i]))
- {
- lastSep = i;
- continue;
- }
- if ((i - current) >= length)
- {
- if (lastSep < 0) // big first word case
- continue;
- chunk = input.Substring(current, lastSep - current).Trim();
- if (chunk.Length > 0)
- yield return chunk;
- current = lastSep;
- }
- }
- chunk = input.Substring(current).Trim();
- if (chunk.Length > 0)
- yield return chunk;
- }
- public static IEnumerable<string> JaysOriginalApproach(this string s, int length)
- {
- var pattern = @"^.{0," + length + @"}\W";
- var result = Regex.Match(s, pattern).Groups[0].Value;
- if (result == string.Empty)
- {
- if (s == string.Empty) yield break;
- result = s.Substring(0, length);
- }
- yield return result;
- foreach (var subsequent_result in JaysOriginalApproach(s.Substring(result.Length), length))
- {
- yield return subsequent_result;
- }
- }
- public static IEnumerable<string> JaysApproach(this string s, int length)
- {
- return Regex.Split(s, @"(.{0," + length + @"}) ")
- .Where(x => x != string.Empty);
- }
- public static IEnumerable<string> SaeedsOriginalApproach(this string str, int allowedLength)
- {
- var ret1 = str.Split(' ');
- var ret2 = new List<string>();
- ret2.Add("");
- int index = 0;
- foreach (var item in ret1)
- {
- if (item.Length + 1 + ret2[index].Length <= allowedLength)
- {
- ret2[index] += ' ' + item;
- if (ret2[index].Length >= allowedLength)
- {
- ret2.Add("");
- index++;
- }
- }
- else
- {
- ret2.Add(item);
- index++;
- }
- }
- return ret2;
- }
- public static IEnumerable<string> SaeedsApproach(this string str, int allowedLength)
- {
- var ret1 = str.Split(' ');
- string current = "";
- foreach (var item in ret1)
- {
- if (item.Length + 1 + current.Length <= allowedLength)
- {
- current += ' ' + item;
- if (current.Length >= allowedLength)
- {
- yield return current;
- current = "";
- }
- }
- else
- {
- yield return current;
- current = "";
- }
- }
- }
- public static IEnumerable<string> SplitOnLength(this string input, int length, WordPolicy wordPolicy)
- {
- int index = 0;
- while (index < input.Length)
- {
- int stepsBackward = 0;
- if (index + length < input.Length)
- {
- if (wordPolicy != WordPolicy.None)
- {
- yield return GetBiggestAllowableSubstring(input, index, length, wordPolicy, out stepsBackward);
- }
- else
- {
- yield return input.Substring(index, length);
- }
- }
- else
- {
- yield return input.Substring(index);
- }
- index += (length - stepsBackward);
- }
- }
- static string GetBiggestAllowableSubstring(string input, int index, int length, WordPolicy wordPolicy, out int stepsBackward)
- {
- stepsBackward = 0;
- int lastIndex = index + length - 1;
- if (!char.IsWhiteSpace(input[lastIndex + 1]))
- {
- int adjustedLastIndex = input.LastIndexOf(' ', lastIndex, length);
- stepsBackward = lastIndex - adjustedLastIndex;
- lastIndex = adjustedLastIndex;
- }
- if (lastIndex == -1)
- {
- if (wordPolicy == WordPolicy.ThrowIfTooLong)
- {
- throw new ArgumentOutOfRangeException("The input string contains at least one word greater in length than the specified length.");
- }
- else
- {
- stepsBackward = 0;
- lastIndex = index + length - 1;
- }
- }
- return input.Substring(index, lastIndex - index + 1);
- }
- }
- class Program
- {
- static void Main(string[] args)
- {
- string s = "This is a sample block of text that I would pass through the string splitter.";
- PrintResultsOfSplitMethod("Dan", s, 10, (str, len) => str.SplitOnLength(len, WordPolicy.CutIfTooLong));
- PrintResultsOfSplitMethod("Jay", s, 10, StringSplitter.JaysApproach);
- PrintResultsOfSplitMethod("Jay (original)", s, 10, StringSplitter.JaysOriginalApproach);
- PrintResultsOfSplitMethod("Saeed", s, 10, StringSplitter.SaeedsApproach);
- PrintResultsOfSplitMethod("Saeed (original)", s, 10, StringSplitter.SaeedsOriginalApproach);
- PrintResultsOfSplitMethod("Simon", s, 10, StringSplitter.SimonsApproach);
- Console.WriteLine("Finished. Press Enter to continue.");
- Console.ReadLine();
- // This is the entire text of Kafka's 'In the Penal Colony',
- // as a sample input of significant length.
- string longInput = SplitOnLength.Properties.Resources.InThePenalColony;
- // This is an excerpt of the text of the OP's question,
- // as a sample input of small-medium length.
- string shortInput = SplitOnLength.Properties.Resources.QuestionText;
- bool keepGoing;
- do
- {
- try
- {
- Console.Write("Enter a split size: ");
- int length = int.Parse(Console.ReadLine());
- Console.WriteLine();
- Console.WriteLine("Results from longer input:");
- foreach (var result in GetResultsForInputAndLength(longInput, length).OrderBy(x => x.Value))
- {
- Console.WriteLine("{0}: {1} ms", result.Key, result.Value.TotalMilliseconds);
- }
- Console.WriteLine();
- Console.WriteLine("Results from shorter input:");
- foreach (var result in GetResultsForInputAndLength(shortInput, length).OrderBy(x => x.Value))
- {
- Console.WriteLine("{0}: {1} ms", result.Key, result.Value.TotalMilliseconds);
- }
- }
- catch (Exception ex)
- {
- Console.WriteLine(ex.Message);
- }
- finally
- {
- Console.WriteLine();
- Console.Write("Go again? ");
- keepGoing = Console.ReadLine().StartsWith("Y", StringComparison.OrdinalIgnoreCase);
- Console.WriteLine();
- }
- }
- while (keepGoing);
- }
- static void PrintResultsOfSplitMethod(string name, string input, int length, Func<string, int, IEnumerable<string>> splitter)
- {
- Console.WriteLine("Testing {0}'s method:", name);
- IEnumerable<string> split = splitter(input, length);
- foreach (string s in split)
- {
- Console.WriteLine(s);
- }
- Console.WriteLine();
- }
- static IDictionary<string, TimeSpan> GetResultsForInputAndLength(string input, int length)
- {
- var results = new Dictionary<string, TimeSpan>
- {
- { "Dan", TestSplitMethod(input, length, (str, len) => str.SplitOnLength(len, WordPolicy.CutIfTooLong)) },
- { "Saeed", TestSplitMethod(input, length, StringSplitter.SaeedsOriginalApproach) },
- { "Saeed (original)", TestSplitMethod(input, length, StringSplitter.SaeedsOriginalApproach) },
- { "Jay", TestSplitMethod(input, length, StringSplitter.JaysApproach) },
- { "Simon", TestSplitMethod(input, length, StringSplitter.SimonsApproach) }
- };
- return results;
- }
- static TimeSpan TestSplitMethod(string input, int length, Func<string, int, IEnumerable<string>> splitter)
- {
- // Ensure results aren't thrown off by garbage collections affecting
- // certain runs more or less harshly than others.
- GC.Collect();
- Stopwatch stopwatch = Stopwatch.StartNew();
- IEnumerable<string> split = splitter(input, length);
- foreach (string s in split)
- { }
- stopwatch.Stop();
- return stopwatch.Elapsed;
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement