Advertisement
Guest User

Untitled

a guest
Jan 23rd, 2017
74
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.82 KB | None | 0 0
  1. public class StringChunkingWithWordDetection
  2. {
  3. /// <summary>
  4. /// This also detects words by using a list of space indexes
  5. /// And reduces the chunk size appropriately
  6. /// Probably should have used a Natural Language processing lib but already done now
  7. /// </summary>
  8. public IEnumerable<string> Process(string str, int maxChunkSize)
  9. {
  10. var wordDetectedString = new List<string>();
  11. foreach (var word in str.Split(' '))
  12. {
  13. if (!string.IsNullOrEmpty(word))
  14. wordDetectedString.Add(word);
  15. }
  16.  
  17. return ChunksUpto(wordDetectedString, maxChunkSize);
  18. }
  19.  
  20. private IEnumerable<string> ChunksUpto(List<string> words, int maxChunkSize)
  21. {
  22. var lastWordIs = words.LastOrDefault();
  23. var chunk = "";
  24.  
  25. foreach (var word in words)
  26. {
  27. var lastWord = lastWordIs != null && lastWordIs == word;
  28. var separator = lastWord ? "" : " ";
  29.  
  30. var previousChunk = chunk;
  31. chunk += word + separator;
  32.  
  33. if (word.Length > maxChunkSize)
  34. {
  35. chunk = word.Substring(0, maxChunkSize);
  36. yield return chunk;
  37. chunk = word.Substring(maxChunkSize) + separator;
  38. previousChunk = "";
  39. }
  40.  
  41. if (chunk.Length == maxChunkSize)
  42. {
  43. yield return chunk;
  44. chunk = "";
  45. }
  46. else if (chunk.Length > maxChunkSize)
  47. {
  48. chunk = word + separator;
  49. yield return previousChunk;
  50. }
  51. }
  52.  
  53. yield return chunk;
  54. }
  55. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement