Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- public class StringChunkingWithWordDetection
- {
- /// <summary>
- /// This also detects words by using a list of space indexes
- /// And reduces the chunk size appropriately
- /// Probably should have used a Natural Language processing lib but already done now
- /// </summary>
- public IEnumerable<string> Process(string str, int maxChunkSize)
- {
- var wordDetectedString = new List<string>();
- foreach (var word in str.Split(' '))
- {
- if (!string.IsNullOrEmpty(word))
- wordDetectedString.Add(word);
- }
- return ChunksUpto(wordDetectedString, maxChunkSize);
- }
- private IEnumerable<string> ChunksUpto(List<string> words, int maxChunkSize)
- {
- var lastWordIs = words.LastOrDefault();
- var chunk = "";
- foreach (var word in words)
- {
- var lastWord = lastWordIs != null && lastWordIs == word;
- var separator = lastWord ? "" : " ";
- var previousChunk = chunk;
- chunk += word + separator;
- if (word.Length > maxChunkSize)
- {
- chunk = word.Substring(0, maxChunkSize);
- yield return chunk;
- chunk = word.Substring(maxChunkSize) + separator;
- previousChunk = "";
- }
- if (chunk.Length == maxChunkSize)
- {
- yield return chunk;
- chunk = "";
- }
- else if (chunk.Length > maxChunkSize)
- {
- chunk = word + separator;
- yield return previousChunk;
- }
- }
- yield return chunk;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement